Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

YouPorn Python API

Name: Anonymous 2009-04-14 18:02

I programmed one, who's interested?

Name: Anonymous 2009-04-14 20:17

>>13
$ diff old.py new.py
3,4c3,7
< import html5lib
< import mechanize
---
from urlparse import urljoin

# Do they actually use HTML5 or are you just an idiot?
from html5lib import HTMLParser
from html5lib.treebuilders import getTreeBuilder
6,22c9
< URL = "http://youporn.com/"
< ENTER_URL = "%s?user_choice=Enter" % URL
< BROWSE_URL = "%sbrowse/%s?page=%s" % (URL, "%s", "%d")
< TOP_RATED_URL = "%stop_rated/%s?page=%s" % (URL, "%s", "%d")
< MOST_VIEWED_URL = "%smost_viewed/%s?page=%s" % (URL, "%s", "%d")
< SEARCH_URL = "%s%s?query=%s&type=%s&page=%s" % (URL, "%s", "%s", "%s", "%d")
<
< def _join_url(a, *p):
<     path = a
<     for b in p:
<         if b.startswith('/'):
<             path = b
<         elif path == '' or path.endswith('/'):
<             path +=  b
<         else:
<             path += '/' + b
<     return path
---
import mechanize
23a11,16
URL = 'http://youporn.com/'
ENTER_URL = '%s?user_choice=Enter' % URL
BROWSE_URL = '%sbrowse/%s?page=%s' % (URL, '%s', '%d')
TOP_RATED_URL = '%stop_rated/%s?page=%s' % (URL, '%s', '%d')
MOST_VIEWED_URL = '%smost_viewed/%s?page=%s' % (URL, '%s', '%d')
SEARCH_URL = '%s%s?query=%s&type=%s&page=%s' % (URL, '%s', '%s', '%s', '%d')
27,63c20,34
<         self._browser = mechanize.Browser()
<         self._browser.addheaders = []
<         self._enter()
<
<     def _enter(self):
<         self._browser.open(ENTER_URL)
<
<     @staticmethod
<     def _filter_videos(soup):
<         watch = lambda href: href and "/watch/" in href
<         videos = []
<         for a in soup.findAll("a", {"href":watch}):
<             videos.append(_join_url(URL, a["href"]))
<         return videos
<
<     def get_newest_videos(self, page=1, sort_by="rating"):
<         return self._filter_videos(html5lib.parse(self._browser.open(
<             BROWSE_URL % (sort_by, page)), "beautifulsoup"))
<
<     def get_top_rated(self, page=1, sort_by="week"):
<         return self._filter_videos(html5lib.parse(self._browser.open(
<             TOP_RATED_URL % (sort_by, page)), "beautifulsoup"))
<
<     def get_most_viewed(self, page=1, sort_by="week"):
<         return self._filter_videos(html5lib.parse(self._browser.open(
<             MOST_VIEWED_URL % (sort_by, page)),"beautifulsoup"))
<
<     def search(self, query, page=1, sort_by="relevance", type="straight"):
<         return self._filter_videos(html5lib.parse(self._browser.open(
<             SEARCH_URL % (sort_by, query, type, page)), "beautifulsoup"))
<
<     def download_video(self, url):
<         soup = html5lib.parse(self._browser.open(url), "beautifulsoup")
<         download = lambda href: "/download/" in href
<         download_url = soup.find("a", {"href":download})["href"]
<         self._browser.retrieve(download_url,
<             self._browser.geturl().split("/")[-2] + ".flv")
---
        self.parser = HTMLParser(tree=getTreeBuilder('beautifulsoup'))
        self.browser = mechanize.Browser()
        self.browser.addheaders = []
        self.browser.open(ENTER_URL)

    def filter(self, url):
        watch = lambda href: href and '/watch' in href
        soup = self.parser.parse(self.browser.open(url))

        return [urljoin(URL, a['href']) for
                a in soup.findAll('a', {'href': watch})]

    def download(self, url):
        download = lambda href: '/download/' in href
        soup = self.parser.parse(self.browser.open(url))
64a36,50
        download_url = soup.find('a', {'href': download})['href']
        filename = url.split('/')[-2] + '.flv'
        self.browser.retrieve(download_url, filename)

    def newest(self, page=1, sort_by='rating'):
        return self.filter(BROWSE_URL % (sort_by, page))

    def top_rated(self, page=1, sort_by='week'):
        return self.filter(TOP_RATED_URL % (sort_by, page))

    def most_viewed(self, page=1, sort_by='week'):
        return self.filter(MOST_VIEWED_URL % (sort_by, page))

    def search(self, page=1, sort_by='relevance', type='straight'):
        return self.filter(SEARCH_URL % (sort_by, query, type, page))
68,70c54,56
<     for video in youporn.get_most_viewed(sort_by="all")[1:]:
<         print "Downloading %s..." % video
<         youporn.download_video(video)
---
    for video in youporn.most_viewed(sort_by='all')[1:]:
        print 'Downloading %s...' % video
        youporn.download(video)
72c58
< if __name__ == "__main__":
---
if __name__ == '__main__':
74d59
<

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List