#!/usr/bin/python
# python script to reorder all dis.4chan msg's sort by post count
# this program is top secret proprietary supercorporate property.
# if you use it, read it, or copy it, your family will be incinerated.
from datetime import datetime
import BeautifulSoup
class disl:
page0=[]
baseurl='
http://dis.4chan.org/list/'
boardlist=['anime','book','carcom','comp','food','games','img','lang','lounge','music','newnew','newpol','sci','sjis','sports','tech','tele','vip']
def __init__(self):
board=raw_input("which board? ("+','.join(self.boardlist)+"): \n")
print "get ",self.baseurl+board
print self.getNumPages(board),'pages'
num=3
infoget=self.getInfo(board,num)
print type(infoget), type(infoget.findAll('tr'))
self.parseInfo(infoget.findAll('tr'))
def parseInfo(self,resultset):
"""turns a resultset into a (bunch of) dictionaryz"""
print dir(resultset)
for result in resultset:
d={}
for a in result.findAll('td'):
print a.find('a',{'rel':'nofollow'})
print a.find('small')
print a.next
#~ b=a.nextGenerator()
#~ print 'gen,next'
#~ print b.next()
#~ print a.findAllNext()
#~ print 'cpn',a.find('a',{'rel':'nofollow'})
#~ print 'tit',a.find('a',{})
# print 'numposts',a.fetch()
print '--------'
def getInfo(self,boardname,num):
aPage=self.download(self.baseurl+boardname+'/'+str(num))
pPage=BeautifulSoup.BeautifulSoup(''.join(aPage))
results=pPage.find('tbody') #.findAll('tr')
return results
def getNumPages(self,boardname):
listpage=self.download(self.baseurl+boardname)
self.page0=BeautifulSoup.BeautifulSoup(''.join(listpage))
totalpage=0
#divclass pages, hborder
for tagGroup in self.page0.find('div',{'class':'pages'}).findAll('a'):
totalpages=tagGroup.string
return int(totalpages)
def download(self,someurl,suppress=True,retry=1):
from urllib2 import Request, urlopen, URLError
req = Request(someurl)
cnt=0
while cnt<retry:
try:
response = urlopen(req)
retstr = response.readlines() #obviously, return webpage
if not suppress: print 'downloading from '+someurl+ '('+str(len(retstr))+')'
return retstr
except URLError, e:
if cnt < retry:
print 'error downloading, retrying in 5'
sleep(5)
continue
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
return -1
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
return -1
cnt += 1
def getDate(self, datestr):
def getmonth(self, mon):
d = { 'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, "jul":7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12}
return d[mon.lower()]
ary=datestr.split()
if len(ary)!=4: raise ValueError, 'invalid date string'
sec=00
min=int( ary[3].split(':')[1] )
hr=int( ary[3].split(':')[0] )
day=int( ary[0] )
mon=int( getmonth(ary[1]) )
year=int( ary[2] )
return datetime(year, mon, day, hr, min, sec)
if __name__=="__main__": d=disl()