Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon.

Pages: 1-

List all threads+sort

Name: s 2010-02-25 22:32

Hi /prog/
I'm writing a python script to get a list of all the threads in a given dis.4chan board, with the option of sorting them however.
I won't have any problems doing this since I know exactly what I'm doing. I'll be using python and some kinda x/html parser maybe beautifulsoup.
Anyways, I was just wondering if anybody has done this already to save me the effart.
Otherwise, let you know when its finished.
()( i bet this board has a secret hidden search function eh ) ()

Name: Anonymous 2010-02-25 22:37

Name: Anonymous 2010-02-25 22:42

In fact, this has already been done ad-infinitum.
http://dis.4chan.org/read/prog/1255410333

Name: Anonymous 2010-02-26 1:57

>>3
wwwwwwww go eat cheeseburgers Randy!

Name: fuck u soup 2010-02-26 4:12

#!/usr/bin/python
# python script to reorder all dis.4chan msg's sort by post count
# this program is top secret proprietary supercorporate property.
# if you use it, read it, or copy it, your family will be incinerated.

from datetime import datetime
import BeautifulSoup
class disl:
    page0=[]
    baseurl='http://dis.4chan.org/list/'
    boardlist=['anime','book','carcom','comp','food','games','img','lang','lounge','music','newnew','newpol','sci','sjis','sports','tech','tele','vip']
    def __init__(self):
        board=raw_input("which board? ("+','.join(self.boardlist)+"): \n")
        print "get ",self.baseurl+board
        print self.getNumPages(board),'pages'
        num=3
        infoget=self.getInfo(board,num)
        print type(infoget), type(infoget.findAll('tr'))
        self.parseInfo(infoget.findAll('tr'))
    def parseInfo(self,resultset):
        """turns a resultset into a (bunch of) dictionaryz"""
        print dir(resultset)
        for result in resultset:
            d={}
            for a in result.findAll('td'):
                print a.find('a',{'rel':'nofollow'})
                print a.find('small')
                print a.next
                #~ b=a.nextGenerator()
                #~ print 'gen,next'
                #~ print b.next()
        
                #~ print a.findAllNext()
                #~ print 'cpn',a.find('a',{'rel':'nofollow'})
                #~ print 'tit',a.find('a',{})
#                print 'numposts',a.fetch()
            print '--------'
       
    def getInfo(self,boardname,num):
        aPage=self.download(self.baseurl+boardname+'/'+str(num))
        pPage=BeautifulSoup.BeautifulSoup(''.join(aPage))
        results=pPage.find('tbody') #.findAll('tr')
        return results
    def getNumPages(self,boardname):
        listpage=self.download(self.baseurl+boardname)
        self.page0=BeautifulSoup.BeautifulSoup(''.join(listpage))
        totalpage=0
        #divclass pages, hborder
        for tagGroup in self.page0.find('div',{'class':'pages'}).findAll('a'):
            totalpages=tagGroup.string
        return int(totalpages)
    def download(self,someurl,suppress=True,retry=1):
        from urllib2 import Request, urlopen, URLError
        req = Request(someurl)
        cnt=0
        while cnt<retry:
            try:
                    response = urlopen(req)
                    retstr = response.readlines() #obviously, return webpage
                    if not suppress: print 'downloading from '+someurl+ '('+str(len(retstr))+')'
                    return retstr
            except URLError, e:
                    if cnt < retry:
                        print 'error downloading, retrying in 5'
                        sleep(5)
                        continue
                    if hasattr(e, 'reason'):
                            print 'We failed to reach a server.'
                            print 'Reason: ', e.reason
                            return -1
                    elif hasattr(e, 'code'):
                            print 'The server couldn\'t fulfill the request.'
                            print 'Error code: ', e.code
                            return -1
            cnt += 1

       
       
       
    def getDate(self, datestr):
        def getmonth(self, mon):
            d = { 'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, "jul":7, 'aug':8, 'sep':9, 'oct':10,  'nov':11, 'dec':12}
            return d[mon.lower()]
        ary=datestr.split()
        if len(ary)!=4: raise ValueError, 'invalid date string'
        sec=00
        min=int( ary[3].split(':')[1] )
        hr=int( ary[3].split(':')[0] )
        day=int( ary[0] )
        mon=int( getmonth(ary[1]) )
        year=int( ary[2] )
        return datetime(year, mon, day, hr, min, sec)
   
if __name__=="__main__": d=disl()

Name: Anonymous 2010-02-26 4:48

Can it search the board too?
Create a topical index?

Name: Anonymous 2010-02-26 10:29

>>6
Hax my annus.

Name: Anonymous 2011-02-04 14:45

Don't change these.
Name: Email:
Entire Thread Thread List