Name: Anonymous 2011-12-04 3:23
How nasty is this code? Beginner here, don't be too mean
import urllib2, time, math, os, zipfile, tempfile
from lxml.html import fromstring
from string import ascii_letters
class GalleryParser:
def __init__(self):
self.agent = {'User-Agent':'Uzbl (Webkit 1.3) (Linux i686 [i686])'}
self.galleryinfo = {'Title' : None, 'Images' : None}
def get(self, url):
try:
request = urllib2.Request(url, headers = self.agent)
response = urllib2.urlopen(request)
return response.read()
except ValueError:
print "Invalid URL \n"
main()
def readGalleries(self, url):
tree = fromstring(self.get(url))
tdata = tree.xpath("//p[@class='ip']/text()")[0].split()
self.galleryinfo['Title'] = tree.xpath("//title")[0].text
#self.galleryinfo['Images'] = tree.xpath("//td
numpages = int(math.ceil(float(tdata[-2]) / float(tdata[-4])))
thumbpages = ['%s?p=%d' % (url, i) for i in range(numpages)]
pagelinks = []
iurls = []
for page in thumbpages:
tree = fromstring(self.get(page))
pagelinks.extend([el.attrib['href']
for el
in tree.xpath("//div[@class='gdtm']//a")])
for link in pagelinks:
tree = fromstring(self.get(link))
iurls.append(tree.xpath("//iframe/following::img")[0].attrib['src'])
time.sleep(1)
return iurls
def download(self, iurls):
subdirname = filter(lambda x: x in ascii_letters,
self.galleryinfo['Title'])
tempdir = os.path.join(tempfile.gettempdir(), subdirname)
if not os.path.exists(tempdir):
os.mkdir(tempdir)
for url in iurls:
# Deal with php URLs
if "=" in os.path.basename(url):
fname = os.path.basename(url).split("=")[-1]
else:
fname = os.path.basename(url)
f = open(os.path.join(tempdir, fname), 'wb')
f.write(self.get(url))
f.close()
return tempdir
def archive(self, tempdir, savedir):
os.chdir(savedir)
arcname = self.galleryinfo['Title'].strip(" - E-Hentai Galleries") + ".cbz"
zipper = zipfile.ZipFile(arcname, 'w')
for f in map(lambda f: os.path.join(tempdir, f), os.listdir(tempdir)):
zipper.write(f)
zipper.close()
def main():
gallery = raw_input("Enter gallery URL: \n")
savedir = raw_input("Enter save directory: \n")
g = GalleryParser()
iurls = g.readGalleries(gallery)
print len(iurls), " images found. Downloading..."
tempdir = g.download(iurls)
g.archive(tempdir, savedir)
print "Done."
if __name__ == "__main__":
main()