Name: Anonymous 2007-12-01 15:43
I'm feeling a little mischievous, so here is a Python script that spiders images from Fakku:
#!/usr/bin/env python
import urllib2
import os
import sys
import re
def main():
site_root = 'http://www.fakku.net/'
pages = site_root + 'viewmanga.php?view=%d'
pt_pg = re.compile('vonline\.php\?.+?&view=')
pt_im = re.compile('<img.+?src="(.+?)">')
opener = urllib2.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT \
5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'),
('Accept-Encoding', 'gzip,deflate'),
('Accept-Language', 'en-us,en;q=0.5'),
('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;\
q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')]
urllib2.install_opener(opener)
try:
start, end = int(sys.argv[1]), int(sys.argv[2])
except IndexError:
start, end = 1, 2
for n in xrange(start, end):
url = pages % n
text = urlopen(url).read()
url2 = pt_pg.findall(text)[0]
text2 = urlopen(site_root + url2 + '001').read()
try:
img_url = site_root + pt_im.findall(text2)[0].replace('01.', '%s.')
except IndexError:
print 'unavailable online'
continue
for x in xrange(1, 200):
try:
_img_url = img_url % str(x).zfill(2)
except TypeError:
img_url = img_url.replace('00.', '%s.')
try:
_img_url = img_url % str(x).zfill(2)
except TypeError:
continue
p_n = _img_url[_img_url.rfind('/')+1:]
n_ = str(n)
#~ path = os.path.join(n_, p_n)
path = '%s-%s' % (n_, p_n)
if os.path.exists(path):
continue
try:
pic = urlopen(_img_url)
except urllib2.HTTPError:
if x > 20:
print 'end'
break
else:
print 'skip'
continue
#~ if not os.path.exists(n_):
#~ os.mkdir(n_)
print path
target = open(path, 'wb')
target.write(pic.read())
target.close()
def urlopen(url):
print url
req = urllib2.Request(url)
return urllib2.urlopen(req)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import urllib2
import os
import sys
import re
def main():
site_root = 'http://www.fakku.net/'
pages = site_root + 'viewmanga.php?view=%d'
pt_pg = re.compile('vonline\.php\?.+?&view=')
pt_im = re.compile('<img.+?src="(.+?)">')
opener = urllib2.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT \
5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'),
('Accept-Encoding', 'gzip,deflate'),
('Accept-Language', 'en-us,en;q=0.5'),
('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;\
q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')]
urllib2.install_opener(opener)
try:
start, end = int(sys.argv[1]), int(sys.argv[2])
except IndexError:
start, end = 1, 2
for n in xrange(start, end):
url = pages % n
text = urlopen(url).read()
url2 = pt_pg.findall(text)[0]
text2 = urlopen(site_root + url2 + '001').read()
try:
img_url = site_root + pt_im.findall(text2)[0].replace('01.', '%s.')
except IndexError:
print 'unavailable online'
continue
for x in xrange(1, 200):
try:
_img_url = img_url % str(x).zfill(2)
except TypeError:
img_url = img_url.replace('00.', '%s.')
try:
_img_url = img_url % str(x).zfill(2)
except TypeError:
continue
p_n = _img_url[_img_url.rfind('/')+1:]
n_ = str(n)
#~ path = os.path.join(n_, p_n)
path = '%s-%s' % (n_, p_n)
if os.path.exists(path):
continue
try:
pic = urlopen(_img_url)
except urllib2.HTTPError:
if x > 20:
print 'end'
break
else:
print 'skip'
continue
#~ if not os.path.exists(n_):
#~ os.mkdir(n_)
print path
target = open(path, 'wb')
target.write(pic.read())
target.close()
def urlopen(url):
print url
req = urllib2.Request(url)
return urllib2.urlopen(req)
if __name__ == '__main__':
main()