Name: Anonymous 2012-03-20 6:11
Share some 'really quick hack' scripts you have made.
Scrape gelbooru for images based on a tag search list
Scrape gelbooru for images based on a tag search list
import os
import sys
import re
import urllib
import urllib2
tagsfile = open('tags.txt', 'r')
tags = tagsfile.read().split('\n')
for tag in tags:
pid = 0
keepgoing = 1
while keepgoing:
print 'sp', 'http://gelbooru.com/index.php?page=post&s=list&tags='+tag+'&pid='+str(pid)
searchpage = urllib2.urlopen('http://gelbooru.com/index.php?page=post&s=list&tags='+tag+'&pid='+str(pid)).read()
searchpids = re.findall('<a id="p(\\d+)"', searchpage)
if len(searchpids) == 0:
keepgoing = 0
for searchpid in searchpids:
print 'ip', 'http://gelbooru.com/index.php?page=post&s=view&id='+searchpid
imagepage = urllib2.urlopen('http://gelbooru.com/index.php?page=post&s=view&id='+searchpid).read()
imageurl = re.findall('<a href="([^"]+)"[^>]+>Original image</a>', imagepage)[0]
print 'i', imageurl
urllib.urlretrieve(imageurl, 'out/' + imageurl.split('/')[-1])
pid += 28[code]
[b]Rename all files in a directory to its md5 hash[/b]
[code]import os
import sys
import hashlib
failed = ''
for fn in os.listdir(sys.argv[1]):
md5 = hashlib.md5()
ffn = os.path.join(sys.argv[1], fn)
fext = os.path.splitext(fn)[1]
with open(ffn, 'rb') as f:
for chunk in iter(lambda: f.read(128 * md5.block_size), ''):
md5.update(chunk)
fhash = md5.hexdigest()
try:
fnn = fhash + fext
print fn, '=>', fnn
os.rename(ffn, os.path.join(sys.argv[1], fnn))
except:
failed += 'failed: ' + fn + '\n'
print failed