Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

Scraping Culver

Name: Anonymous 2013-07-01 8:27


#!/bin/bash
curl --silent "https://dis.4chan.org/list/prog/[0-715];" \
  | grep -i 'Culver' \
  | sed 's/read\/prog/https:\/\/dis.4chan.org\/read\/prog/g' > ohyes.html


maybe there's porn in these links

Name: Anonymous 2013-07-01 10:46

Over 200 pics of Culver:

(use srfi-1 srfi-13 tcp)
(define (get-page uri)
 (define (parse-uri uri)
  (let* ((d (substring uri (+ (string-contains uri "://") 3)))
     (slash-index (string-contains d "/")))
   (list (substring d 0 slash-index) (substring d slash-index))))

 (define (cut-header s) (substring s (+ (string-contains s "\n\r\n") 3)))

 (let ((host+page (parse-uri uri)))
  (call-with-values
   (lambda () (tcp-connect (first host+page) 80))
   (lambda (r w)
    (display
     (format "GET ~a HTTP/1.1\r\nHost: ~a\r\nConnection: close\r\n\r\n"
      (second host+page) (first host+page)) w)
    (cut-header
     (string-unfold eof-object? values
      (lambda (x) (read-char r)) (read-char r)))))))

(define (tokenise str delimiter)
 (string-tokenize str
  (lambda (c) (not (char=? c delimiter)))))

(define (get-image page)
 ((lambda (s)
   (string-append
    (substring s 0 (- (string-length s) 6)) ".jpg"))
  (find (lambda (x) (string-contains x "_m.jpg")) (tokenise page #\"))))

(define (get-newer page)
 (string-append
  "http://www.flickr.com"
  (find (lambda (x) (string-contains x "photosof")) (tokenise page #\'))))

(define (write-file path data)
 (with-output-to-file path
  (lambda () (write-string data))))

(define (fetch-the-culver newest oldest)
 (let iter ((n 1) (page (get-page oldest)) (uri oldest))
  (let ((image-uri (get-image page))
    (file-path (format "culver~a.jpg" (number->string n))))
   (display (format "~a -> ~a\n" image-uri file-path))
   (write-file file-path (get-page image-uri)))
  (if (not (string=? uri newest))
   (let ((next-uri (get-newer page)))
    (iter (+ n 1) (get-page next-uri) next-uri)))))

(fetch-the-culver
 "http://www.flickr.com/photos/adactio/5155673220/in/photosof-leahculver/"
 "http://www.flickr.com/photos/thomashawk/3263299572/in/photosof-leahculver/")

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List