Name: Anonymous 2010-05-10 1:41
gem install hpricot
ruby fukkensaved.rb
-------------------
# fukkensaved.rb
# scrape an entire thread's worth of pictures from /b/
require 'rubygems'
require 'open-uri'
require 'hpricot'
USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1b4) Gecko/20090423 Firefox/3.5b4 GTB5 (.NET CLR 3.5.30729)'
def fetch_thread(thread_id)
Hpricot(open("http://boards.4chan.org/b/res/#{thread_id}", 'User-Agent' => USER_AGENT))
end
def save_image(thread_no, url)
if !File.exists?("#{thread_no}/#{File.basename(url)}")
f = File.open("#{thread_no}/#{File.basename(url)}", 'wb')
open(url, 'User-Agent' => USER_AGENT) do |i|
f.write(i.read)
end
f.close
end
end
print "Thread number? "
thread_id = gets.chomp
puts "Loading thread #{thread_id}..."
imgs = fetch_thread(thread_id).search("img[@md5]")
# Make a directory based on the thread number
Dir.mkdir(thread_id) unless File.directory? thread_id
# Download each image to this new directory
imgs.each do |img|
puts "Downloading file #{img.parent.attributes['href']}..."
save_image(thread_id, img.parent.attributes['href'])
end
ruby fukkensaved.rb
-------------------
# fukkensaved.rb
# scrape an entire thread's worth of pictures from /b/
require 'rubygems'
require 'open-uri'
require 'hpricot'
USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1b4) Gecko/20090423 Firefox/3.5b4 GTB5 (.NET CLR 3.5.30729)'
def fetch_thread(thread_id)
Hpricot(open("http://boards.4chan.org/b/res/#{thread_id}", 'User-Agent' => USER_AGENT))
end
def save_image(thread_no, url)
if !File.exists?("#{thread_no}/#{File.basename(url)}")
f = File.open("#{thread_no}/#{File.basename(url)}", 'wb')
open(url, 'User-Agent' => USER_AGENT) do |i|
f.write(i.read)
end
f.close
end
end
print "Thread number? "
thread_id = gets.chomp
puts "Loading thread #{thread_id}..."
imgs = fetch_thread(thread_id).search("img[@md5]")
# Make a directory based on the thread number
Dir.mkdir(thread_id) unless File.directory? thread_id
# Download each image to this new directory
imgs.each do |img|
puts "Downloading file #{img.parent.attributes['href']}..."
save_image(thread_id, img.parent.attributes['href'])
end