Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

Critique my code

Name: Anonymous 2011-07-26 4:31

Hello /prog/, I'm a complete beginner and I just wrote the longest script I have yet written. It's not finished, but would you please critique it and tell me if it's ugly or not?


#########################################
# g.e-hentai.org Gallery Scraper v0.1   #  
# by VitamnP                            #
# ENTERPRISE QUALITY                    #
#########################################

import urllib2
import time
from math import ceil
from BeautifulSoup import BeautifulSoup

def requestPage(url):
    # Define user agent to be used in requests
    agent = {'User-Agent':'Uzbl (Webkit 1.3) (Linux i686 [i686])'}
   
    # Request page from server for parsing, identifying as a browser so we don't
    # get banned
    request = urllib2.Request(url, data=None, headers=agent)
    response = urllib2.urlopen(request)
    info = response.info()
    page = response.read()
   
    # Create BeautifulSoup object from page for parsing if the page type is HTML
    soup = BeautifulSoup(page)

    return soup

def getInfo(url):
    # Request page and prepare for parsing
    soup = requestPage(url)   
   
    # Get number of images and total size in MB
    image_string = soup.find(text='Images:')
    info_string = image_string.findNext(text=True)
   
    # Determine number of thumbnail pages and generate URLs
    num_images = float(info_string.split(' ')[0])
    num_pages = ceil(num_images / 20.0) - 1
    pages_list = []
    while num_pages >= 0:
        pages_list.append(url + '?p=' + str(num_pages))
        num_pages -= 1
   
    return {'pages_list' : pages_list, 'info_string' : info_string}
   
def getImagePageLinks(pages_list):
    # Create a list to store image page URLs
    all_page_links = []
   
    # Go through each thumbnail page link in the list, extract the links to
    # the image pages, and store them in a list
    for link in pages_list:
        soup = requestPage(link)
        image_links = soup.findAll('div', {"class" : "gdtm"})
        for link in image_links:
            all_page_links.append(link.findNext('a')['href'])

    return all_page_links
   
def getAllImageLinks(all_page_links, wait_time_secs):
    # Create a list to store the image URLs
    all_image_links = []

    # For each URL in all_image_links, request the page from the server and save
    # the image on the page to a file in the directory specified by the user
    for link in all_image_links:
        soup = requestPage(link)
        topframe = soup.find('iframe')
        imgurl = topframe.findNext('img')['src']
        all_images.append(imgurl)
        time.sleep(wait_time_secs)

    return all_image_links

def download(image_links, save_dir):
   
    for image in image_links:
        filename = save_dir + image.split('/')[-1]
        f = open(filename, 'wb')
        f.write(data)
        f.close()
       
       
   
       
def main():
    print 'g.e-hentai.org Gallery Scraper v0.1'
    print "by VitamnP"
   
    gallery_url = raw_input("Enter gallery URL: \n")
    save_dir = raw_input("Enter save directory: \n")
    wait_time_secs = input("Seconds to wait between page loads: \n")
   
    info = getInfo(gallery_url)
    print "Gallery contains ", info['info_string']

    all_page_links = getImagePageLinks(info['pages_list'])
    all_image_links = getAllImageLinks(all_page_links, wait_time_secs)

    download(all_image_links, save_dir)
   
if __name__ == "__main__":
    main()

Name: Anonymous 2011-07-26 21:29

>>27

enterprise-level developers don't need to know about P vs NP? What if they encounter an NP problem while developing and spend time trying to make a polynomial algorithm for it?

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List