Name: Anonymous 2009-04-14 18:02
I programmed one, who's interested?
#!/usr/bin/env python
import html5lib
import mechanize
URL = "http://youporn.com/"
ENTER_URL = "%s?user_choice=Enter" % URL
BROWSE_URL = "%sbrowse/%s?page=%s" % (URL, "%s", "%d")
TOP_RATED_URL = "%stop_rated/%s?page=%s" % (URL, "%s", "%d")
MOST_VIEWED_URL = "%smost_viewed/%s?page=%s" % (URL, "%s", "%d")
SEARCH_URL = "%s%s?query=%s&type=%s&page=%s" % (URL, "%s", "%s", "%s", "%d")
def _join_url(a, *p):
path = a
for b in p:
if b.startswith('/'):
path = b
elif path == '' or path.endswith('/'):
path += b
else:
path += '/' + b
return path
class YouPorn(object):
def __init__(self):
self._browser = mechanize.Browser()
self._browser.addheaders = []
self._enter()
def _enter(self):
self._browser.open(ENTER_URL)
@staticmethod
def _filter_videos(soup):
watch = lambda href: href and "/watch/" in href
videos = []
for a in soup.findAll("a", {"href":watch}):
videos.append(_join_url(URL, a["href"]))
return videos
def get_newest_videos(self, page=1, sort_by="rating"):
return self._filter_videos(html5lib.parse(self._browser.open(
BROWSE_URL % (sort_by, page)), "beautifulsoup"))
def get_top_rated(self, page=1, sort_by="week"):
return self._filter_videos(html5lib.parse(self._browser.open(
TOP_RATED_URL % (sort_by, page)), "beautifulsoup"))
def get_most_viewed(self, page=1, sort_by="week"):
return self._filter_videos(html5lib.parse(self._browser.open(
MOST_VIEWED_URL % (sort_by, page)),"beautifulsoup"))
def search(self, query, page=1, sort_by="relevance", type="straight"):
return self._filter_videos(html5lib.parse(self._browser.open(
SEARCH_URL % (sort_by, query, type, page)), "beautifulsoup"))
def download_video(self, url):
soup = html5lib.parse(self._browser.open(url), "beautifulsoup")
download = lambda href: "/download/" in href
download_url = soup.find("a", {"href":download})["href"]
self._browser.retrieve(download_url,
self._browser.geturl().split("/")[-2] + ".flv")
def main():
youporn = YouPorn()
for video in youporn.get_most_viewed(sort_by="all")[1:]:
print "Downloading %s..." % video
youporn.download_video(video)
if __name__ == "__main__":
main()#!/usr/bin/env python
from urlparse import urljoin
# Do they actually use HTML5 or are you just an idiot?
from html5lib import HTMLParser
from html5lib.treebuilders import getTreeBuilder
import mechanize
URL = 'http://youporn.com/'
ENTER_URL = '%s?user_choice=Enter' % URL
BROWSE_URL = '%sbrowse/%s?page=%s' % (URL, '%s', '%d')
TOP_RATED_URL = '%stop_rated/%s?page=%s' % (URL, '%s', '%d')
MOST_VIEWED_URL = '%smost_viewed/%s?page=%s' % (URL, '%s', '%d')
SEARCH_URL = '%s%s?query=%s&type=%s&page=%s' % (URL, '%s', '%s', '%s', '%d')
class YouPorn(object):
def __init__(self):
self.parser = HTMLParser(tree=getTreeBuilder('beautifulsoup'))
self.browser = mechanize.Browser()
self.browser.addheaders = []
self.browser.open(ENTER_URL)
def filter(self, url):
watch = lambda href: href and '/watch' in href
soup = self.parser.parse(self.browser.open(url))
return [urljoin(URL, a['href']) for
a in soup.findAll('a', {'href': watch})]
def download(self, url):
download = lambda href: '/download/' in href
soup = self.parser.parse(self.browser.open(url))
download_url = soup.find('a', {'href': download})['href']
filename = url.split('/')[-2] + '.flv'
self.browser.retrieve(download_url, filename)
def newest(self, page=1, sort_by='rating'):
return self.filter(BROWSE_URL % (sort_by, page))
def top_rated(self, page=1, sort_by='week'):
return self.filter(TOP_RATED_URL % (sort_by, page))
def most_viewed(self, page=1, sort_by='week'):
return self.filter(MOST_VIEWED_URL % (sort_by, page))
def search(self, page=1, sort_by='relevance', type='straight'):
return self.filter(SEARCH_URL % (sort_by, query, type, page))
def main():
youporn = YouPorn()
for video in youporn.most_viewed(sort_by='all')[1:]:
print 'Downloading %s...' % video
youporn.download(video)
if __name__ == '__main__':
main()
54c54
< def search(self, query, page=1, sort_by="relevance", type="straight"):
---
def search(self, query, page=1, sort_by="relevance", type="gay"):