Name: Anonymous 2009-04-14 18:02
I programmed one, who's interested?
#!/usr/bin/env python
from urlparse import urljoin
# Do they actually use HTML5 or are you just an idiot?
from html5lib import HTMLParser
from html5lib.treebuilders import getTreeBuilder
import mechanize
URL = 'http://youporn.com/'
ENTER_URL = '%s?user_choice=Enter' % URL
BROWSE_URL = '%sbrowse/%s?page=%s' % (URL, '%s', '%d')
TOP_RATED_URL = '%stop_rated/%s?page=%s' % (URL, '%s', '%d')
MOST_VIEWED_URL = '%smost_viewed/%s?page=%s' % (URL, '%s', '%d')
SEARCH_URL = '%s%s?query=%s&type=%s&page=%s' % (URL, '%s', '%s', '%s', '%d')
class YouPorn(object):
def __init__(self):
self.parser = HTMLParser(tree=getTreeBuilder('beautifulsoup'))
self.browser = mechanize.Browser()
self.browser.addheaders = []
self.browser.open(ENTER_URL)
def filter(self, url):
watch = lambda href: href and '/watch' in href
soup = self.parser.parse(self.browser.open(url))
return [urljoin(URL, a['href']) for
a in soup.findAll('a', {'href': watch})]
def download(self, url):
download = lambda href: '/download/' in href
soup = self.parser.parse(self.browser.open(url))
download_url = soup.find('a', {'href': download})['href']
filename = url.split('/')[-2] + '.flv'
self.browser.retrieve(download_url, filename)
def newest(self, page=1, sort_by='rating'):
return self.filter(BROWSE_URL % (sort_by, page))
def top_rated(self, page=1, sort_by='week'):
return self.filter(TOP_RATED_URL % (sort_by, page))
def most_viewed(self, page=1, sort_by='week'):
return self.filter(MOST_VIEWED_URL % (sort_by, page))
def search(self, page=1, sort_by='relevance', type='straight'):
return self.filter(SEARCH_URL % (sort_by, query, type, page))
def main():
youporn = YouPorn()
for video in youporn.most_viewed(sort_by='all')[1:]:
print 'Downloading %s...' % video
youporn.download(video)
if __name__ == '__main__':
main()