I'm not
>>9, but I've already written something like this and I'll post it because
why not. I don't give a damn if it doesn't fit your needs.
I wrote it back in late October 2008 (doesn't work in FIOC 3k). I stole the encode_multipart_formdata function from somewhere else online. I hacked it together hastily, so I've added some comments. If you're not going to bother rewriting it, attribute to my
TRIPCODE somewhere.
import mimetypes, urllib2, os.path
from random import randint
#takes integer, string, filepath string. Python: where the type system is an afterthought
def post_b(thread, comment, img = None):
#change board accordingly. To post to a board you'll need to know the board's subdomain
#and data server, whatever they're called, e.g. zip and bin for /a/
return post_multipart("dat.4chan.org", "/b/imgboard.php", get_postdata(thread, comment), \
(get_filedata(img),), headers={"Referer":"http://img.4chan.org/b/res/%d.html" % thread})
def get_postdata(thread, comment, name = '', email = 'noko', password='1234'): #EXPERT SECURITY
return (("MAX_FILE_SIZE", "2097152"), ("resto", str(thread)), ("name", name), ("email", email),
("com", comment), ("pwd", password), ("mode", "regist")) #EXPERT HTML ANALYSIS
#This is a terrible function
def get_filedata(filepath = None, ulname = None): #I forgot what ulname means
if filepath:
if ulname == None:
ulname = os.path.basename(filepath)
#loads an entire file into a string. I can't be bothered to find a better way to do it
return ('upfile', ulname, file(filepath, "rb").read())
else:
#this sends back a blank file name with blank data, there is definitely a better way to do it
return ('upfile', '', '')
def post_multipart(host, selector, fields, files, headers = None):
if headers == None:
headers = {}
content_type, body = encode_multipart_formdata(fields, files)
#EXPERT FIREFOX COPYING
headers['User-Agent'] = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092510 Ubuntu/8.04 (hardy) Firefox/3.0.3'
headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
headers['Accept-Language'] = 'en-us,en;q=0.5'
headers['Accept-Encoding'] = 'gzip,deflate'
headers['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
headers['Keep-Alive'] = '300'
headers['Connection'] = 'keep-alive'
headers['Content-Type'] = content_type
headers['Content-Length'] = str(len(body))
r = urllib2.Request("http://%s%s" % (host, selector), body, headers)
return urllib2.urlopen(r).read()
def encode_multipart_formdata(fields, files):
BOUNDARY = get_random_boundary() #EXPERT UPPERCASE CONSTANT
data = []
for (key, value) in fields:
data.append('--' + BOUNDARY)
data.append('Content-Disposition: form-data; name="%s"' % key)
data.append('')
data.append(value)
#loop allows multiple files to be uploaded, although on 4chan one file will do
for (key, filename, value) in files:
print key, filename, len(value)
data.append('--' + BOUNDARY)
data.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
data.append('Content-Type: %s' % get_content_type(filename))
data.append('')
data.append(value)
data.append('--' + BOUNDARY + '--')
data.append('')
body = '\r\n'.join(data)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY #EXPERT FORMATTING
return content_type, body
def get_random_boundary():
return '%016d' % randint(0, 10**32-1) #you may be screwed if the file contains the boundary
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
Hopefully this is a good starting point for you to write a 4chan file dumper in Haskell.