Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

Critique my code

Name: Anonymous 2011-07-26 4:31

Hello /prog/, I'm a complete beginner and I just wrote the longest script I have yet written. It's not finished, but would you please critique it and tell me if it's ugly or not?


#########################################
# g.e-hentai.org Gallery Scraper v0.1   #  
# by VitamnP                            #
# ENTERPRISE QUALITY                    #
#########################################

import urllib2
import time
from math import ceil
from BeautifulSoup import BeautifulSoup

def requestPage(url):
    # Define user agent to be used in requests
    agent = {'User-Agent':'Uzbl (Webkit 1.3) (Linux i686 [i686])'}
   
    # Request page from server for parsing, identifying as a browser so we don't
    # get banned
    request = urllib2.Request(url, data=None, headers=agent)
    response = urllib2.urlopen(request)
    info = response.info()
    page = response.read()
   
    # Create BeautifulSoup object from page for parsing if the page type is HTML
    soup = BeautifulSoup(page)

    return soup

def getInfo(url):
    # Request page and prepare for parsing
    soup = requestPage(url)   
   
    # Get number of images and total size in MB
    image_string = soup.find(text='Images:')
    info_string = image_string.findNext(text=True)
   
    # Determine number of thumbnail pages and generate URLs
    num_images = float(info_string.split(' ')[0])
    num_pages = ceil(num_images / 20.0) - 1
    pages_list = []
    while num_pages >= 0:
        pages_list.append(url + '?p=' + str(num_pages))
        num_pages -= 1
   
    return {'pages_list' : pages_list, 'info_string' : info_string}
   
def getImagePageLinks(pages_list):
    # Create a list to store image page URLs
    all_page_links = []
   
    # Go through each thumbnail page link in the list, extract the links to
    # the image pages, and store them in a list
    for link in pages_list:
        soup = requestPage(link)
        image_links = soup.findAll('div', {"class" : "gdtm"})
        for link in image_links:
            all_page_links.append(link.findNext('a')['href'])

    return all_page_links
   
def getAllImageLinks(all_page_links, wait_time_secs):
    # Create a list to store the image URLs
    all_image_links = []

    # For each URL in all_image_links, request the page from the server and save
    # the image on the page to a file in the directory specified by the user
    for link in all_image_links:
        soup = requestPage(link)
        topframe = soup.find('iframe')
        imgurl = topframe.findNext('img')['src']
        all_images.append(imgurl)
        time.sleep(wait_time_secs)

    return all_image_links

def download(image_links, save_dir):
   
    for image in image_links:
        filename = save_dir + image.split('/')[-1]
        f = open(filename, 'wb')
        f.write(data)
        f.close()
       
       
   
       
def main():
    print 'g.e-hentai.org Gallery Scraper v0.1'
    print "by VitamnP"
   
    gallery_url = raw_input("Enter gallery URL: \n")
    save_dir = raw_input("Enter save directory: \n")
    wait_time_secs = input("Seconds to wait between page loads: \n")
   
    info = getInfo(gallery_url)
    print "Gallery contains ", info['info_string']

    all_page_links = getImagePageLinks(info['pages_list'])
    all_image_links = getAllImageLinks(all_page_links, wait_time_secs)

    download(all_image_links, save_dir)
   
if __name__ == "__main__":
    main()

Name: Anonymous 2011-07-26 6:12

Great size. Look thick. Solid. Tight. Keep us all posted on your continued progress with any new progress pics or vid clips. Show us what you got man. Wanna see how freakin' huge, solid, thick and tight you can get. Thanks for the motivation.

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List