Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

YouPorn Python API

Name: Anonymous 2009-04-14 18:02

I programmed one, who's interested?

Name: Anonymous 2009-04-14 19:35

>>6
I couldn't be bothered to actually write my own. I just cleaned up some of your mess (and no, I haven't tested it).

#!/usr/bin/env python

from urlparse import urljoin

# Do they actually use HTML5 or are you just an idiot?
from html5lib import HTMLParser
from html5lib.treebuilders import getTreeBuilder

import mechanize

URL = 'http://youporn.com/'
ENTER_URL = '%s?user_choice=Enter' % URL
BROWSE_URL = '%sbrowse/%s?page=%s' % (URL, '%s', '%d')
TOP_RATED_URL = '%stop_rated/%s?page=%s' % (URL, '%s', '%d')
MOST_VIEWED_URL = '%smost_viewed/%s?page=%s' % (URL, '%s', '%d')
SEARCH_URL = '%s%s?query=%s&type=%s&page=%s' % (URL, '%s', '%s', '%s', '%d')

class YouPorn(object):
    def __init__(self):
        self.parser = HTMLParser(tree=getTreeBuilder('beautifulsoup'))
        self.browser = mechanize.Browser()
        self.browser.addheaders = []
        self.browser.open(ENTER_URL)

    def filter(self, url):
        watch = lambda href: href and '/watch' in href
        soup = self.parser.parse(self.browser.open(url))

        return [urljoin(URL, a['href']) for
                a in soup.findAll('a', {'href': watch})]

    def download(self, url):
        download = lambda href: '/download/' in href
        soup = self.parser.parse(self.browser.open(url))

        download_url = soup.find('a', {'href': download})['href']
        filename = url.split('/')[-2] + '.flv'
        self.browser.retrieve(download_url, filename)

    def newest(self, page=1, sort_by='rating'):
        return self.filter(BROWSE_URL % (sort_by, page))

    def top_rated(self, page=1, sort_by='week'):
        return self.filter(TOP_RATED_URL % (sort_by, page))

    def most_viewed(self, page=1, sort_by='week'):
        return self.filter(MOST_VIEWED_URL % (sort_by, page))

    def search(self, page=1, sort_by='relevance', type='straight'):
        return self.filter(SEARCH_URL % (sort_by, query, type, page))

def main():
    youporn = YouPorn()
    for video in youporn.most_viewed(sort_by='all')[1:]:
        print 'Downloading %s...' % video
        youporn.download(video)

if __name__ == '__main__':
    main()

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List