Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

Practical /Prog/ Challenge

Name: Anonymous 2007-09-08 17:17 ID:P8+/2q3A

The challenge is to make a PROGWATCH program

What it does is, scans this file:
http://dis.4chan.org/prog/subject.txt
every 1 min, 10 mins or 30 seconds or so..

When a change occurs (e.g. someone makes a new post or whatever) then it should exec some program set by the user, with the given args

so for example, you could set it up to open your webbrowser for  the page, or get growl to display "New thread on /prog/, title: Ive read SICP!" etc etc

I will post mine afterwards.. anyway good luck and GO FOR IT!

Name: sage 2007-09-09 13:05 ID:Heaven

"Practical

Name: sage 2007-09-09 13:06 ID:Heaven

"Practical

Name: sage 2007-09-09 13:09 ID:Heaven

"Static

Name: sage 2007-09-09 13:10 ID:Heaven

"Practical

Name: Anonymous 2007-09-09 13:10 ID:8qs12Acw

Here is my solution in OCaml: (I suck at OCaml, but it was fun)

open Http_client.Convenience
open Unix
open Str
open List
open Hashtbl
open Printf

type post = {
  post_subject: string;
  poster: string;
  thread_id: string;
  post_id: string;
  post_num: int
}

type thread = {
  id: string;
  subject: string;
  known_posts: (string, post) t;
  mutable num_posts: int
}


(* logic goes here, feel free to use Unix.system to call some interesting program *)
let on_new_thread thread =
  printf "New thread %s about %s\n" thread.id thread.subject;;

let on_bump_thread thread post =
  printf "Thread %s (%s) bumped by %s with post %s, #%d in thread\n" thread.id thread.subject post.poster post.post_id post.post_num;;

let re_split_lines = (regexp "[\r\n]+");;
let re_split_token = (regexp "<>");;

let parse_posts raw_posts =
  let parse_post raw_post =
    let bits = (Str.split re_split_token raw_post) in
    { post_subject=(nth bits 0); poster=(nth bits 1); thread_id=(nth bits 3); post_id=(nth bits 6); post_num=(int_of_string (nth bits 4)) }
  in
  map parse_post (Str.split re_split_lines raw_posts);;

let update_threads threads posts =
  let update_thread post =
    let thread =
      try
        find threads post.thread_id
      with Not_found ->
        let new_thread = { id=post.thread_id; subject=post.post_subject; known_posts=(Hashtbl.create 50); num_posts=post.post_num } in
        add threads new_thread.id new_thread;
        on_new_thread new_thread;
        new_thread
    in

    (* Yeah, we lose some known threads at the start here, who cares. *)
    if (not (mem thread.known_posts post.post_id)) && (thread.num_posts < post.post_num)
    then begin
      thread.num_posts <- post.post_num;
      add thread.known_posts post.post_id post;
      on_bump_thread thread post;
    end;
  in
  List.iter update_thread posts;;

let thread_watch url =
  let threads = (Hashtbl.create 50) in
  let rec loop () =
    let fresh_posts = (parse_posts (http_get url)) in
    update_threads threads fresh_posts;
    flush_all ();
    sleep 20;
    loop ()
  in
  loop ();;

(* lets start our thread watcher... *)
thread_watch "http://dis.4chan.org/prog/subject.txt";;;


----

Requires netclient, compiles with ocamlfind ocamlopt -package netclient -linkpkg -o subject str.cmxa subject.ml

Name: Anonymous 2007-09-09 13:36 ID:TWv7+hVQ

So, fetch subject.txt (\approx 165 000 bytes) every minute for one day, 165 000B * 60 * 24 \approx 226 MB traffic per day, per retard who leaves this script open. That's quite the load compared to regular textboard usage.

Name: Anonymous 2007-09-09 13:37 ID:8qs12Acw

Except I'm doing it every 20 seconds because I'm cool like that.

Name: Anonymous 2007-09-09 13:39 ID:Heaven

>>86
Yeah, and considering world4ch has RSS feeds, these things are not very ``practical''. But this is still a better challenge than those copied Project Euler problems.

Name: Anonymous 2007-09-09 13:39 ID:8qs12Acw

Although, it should be trivial to make it fetch say, only the first 1KB or something like that.

Name: Anonymous 2007-09-09 13:51 ID:DWe23HTk

>>89
Well, you don't want to miss saged replies to posts on the last page, now do you?

Name: Anonymous 2007-09-09 13:51 ID:uyxePQ7x

Name: Anonymous 2007-09-09 13:54 ID:Heaven

>>91
That is not a full solution, though. Even when the list actually gets updated, we are only interested in the updated lines, all of which seem to be at the beginning of the file.

Name: Anonymous 2007-09-09 14:08 ID:Heaven

>>90
Oh crap. You're right. Disregard >>92.

Name: Anonymous 2007-09-09 14:39 ID:8qs12Acw

Here is a better version of my OCaml one.

This time it doesn't keep track of all the posts that it's seen (because it seems pointless). Another thing to do would be to drop threads that it can't see in the file anymore out of the hashtable.

No, I didn't have to be that anal about the types with the poster, but why not.

open Http_client.Convenience
open Unix
open Str
open List
open Hashtbl
open Printf

type poster = Poster of string | Unknown_poster;;

type post = {
  subject: string;
  poster: poster;
  thread_id: string;
  post_id: string;
  post_num: int
};;

let poster_of_string str =
  if (String.length str) = 0 then Unknown_poster else Poster str;;

let string_of_poster poster =
  match poster with
      Poster p -> p
    | Unknown_poster -> "Unknown";;


(* logic goes here, feel free to use Unix.system to call some interesting program *)
let on_new_thread post =
  let poster_str = (string_of_poster post.poster) in
  if post.post_num = 1 then
    printf "New thread %s about %s started by %s\n" post.thread_id post.subject poster_str
  else
    printf "New thread %s about %s last posted in by %s has %d posts already\n" post.thread_id post.subject poster_str post.post_num;;

let on_bump_thread post =
  printf "Thread %s (%s) bumped by %s with post %s, #%d in thread\n" post.thread_id post.subject (string_of_poster post.poster) post.post_id post.post_num;;


let re_split_lines = (regexp "[\r\n]+");;
let re_split_token = (regexp "<>");;

let parse_posts raw_posts =
  let parse_post raw_post =
    let bits = (Str.split re_split_token raw_post) in
    { subject=(nth bits 0); poster=(poster_of_string (nth bits 1)); thread_id=(nth bits 3); post_id=(nth bits 6); post_num=(int_of_string (nth bits 4)) }
  in
  map parse_post (Str.split re_split_lines raw_posts);;

let update_threads threads fresh_posts =
  let update_thread post =
      try
        let thread = (find threads post.thread_id) in
        if thread.post_num < post.post_num
        then begin
          Hashtbl.replace threads post.thread_id post;
          on_bump_thread post
        end;
      with Not_found ->
        add threads post.thread_id post;
        on_new_thread post
  in
  List.iter update_thread fresh_posts;;

let thread_watch url =
  let threads = (Hashtbl.create 50) in
  let rec loop () =
    let fresh_posts = (parse_posts (http_get url)) in
    update_threads threads fresh_posts;
    flush_all ();
    sleep 20;
    loop ()
  in
  loop ();;

(* lets start our thread watcher... *)
thread_watch "http://dis.4chan.org/prog/subject.txt";;;

Name: Anonymous 2007-09-09 16:16 ID:699WrGup

>>88
Since when does world4ch have RSS feeds??

Name: Anonymous 2007-09-09 16:17 ID:Heaven

>>92
No, lines are updated far down the subject.txt if it is due to sage

Name: Anonymous 2007-09-09 16:28 ID:Heaven

>>96
I already corrected myself in >>93.

>>95
Okay, it doesn't. They are atom feeds. I can't remember when they appeared, though.

Name: Anonymous 2007-09-09 16:32 ID:699WrGup

>>97
Where are they? Is what I meant.

Name: Anonymous 2007-09-09 16:33 ID:699WrGup

Anyway, I don't see the point of atom/rss feeds on a messageboard like this.

Name: Anonymous 2007-09-09 16:33 ID:Heaven

Name: Anonymous 2007-09-09 16:34 ID:Heaven

>>99
Me neither.

Name: Anonymous 2007-09-09 20:49 ID:2naHzNYd

>>100
WE HAVE AN EXPERT WINNER

Name: Anonymous 2007-09-09 20:53 ID:ESwnjzn2

>>100
AWESOME!

Name: Anonymous 2007-09-09 20:57 ID:ESwnjzn2

asdf

Name: Anonymous 2007-09-09 20:57 ID:ESwnjzn2

105

Name: Anonymous 2007-09-09 21:15 ID:Mlb21P8h

how far does this go anyways

Name: Anonymous 2007-09-09 22:25 ID:Heaven

>>106
pretty damn far

Name: Anonymous 2007-09-09 22:36 ID:ynCRakIl

Come on fuckers, where are your solutions? I still haven't seen any lisp ones.

Name: Anonymous 2007-09-09 22:41 ID:Mlb21P8h

>>108
lisp is a nerd joke; it's not for actually developing programs

Name: Anonymous 2007-09-09 22:58 ID:ynCRakIl

>>109
I was going to write one in lisp, but then I decided to write the OCaml one - thinking that someone would shortly write one in lisp.

Name: Anonymous 2007-09-09 23:24 ID:Heaven

>>110
ocaml is for fags

Name: Anonymous 2007-09-10 0:21 ID:PClu1KVF

>>108
It hardly seems worthwhile when there are so many versions already.

Name: Anonymous 2007-09-10 0:30 ID:cHAiMhj/

>>112
ooo, Mr PClu1KVF is TOO CHICKEN!!!!!!! BUK BUK BUKKAKE!!!!

Name: Anonymous 2007-09-10 0:32 ID:PClu1KVF

>>113
NOW YOU DID IT

Name: Anonymous 2007-09-10 0:42 ID:2FXDvzsf

bukkake

Name: Anonymous 2007-09-10 1:29 ID:Heaven

>>112
Just three, really, if we don't count the shellscript hacks.

Name: Anonymous 2007-09-10 6:44 ID:2vgkCCNf

>>108
Below. It took a bit of time, because I'm hired as a Rails programmer right now, so I had to fight the confusion. Also, I misinterpreted the format, and then couldn't be bothered to change the code to match the actual format, and only did it today (it actually took surprisingly little time and code, something like 5 lines).

It's designed to be run as a single file, like that:
sbcl --noinform --noprint --disable-debugger --load progwatch.lisp

Tested and known to run in SBCL, should in others but I can't say for sure. Also, SBCL is a whiny bitch and I can't get it to STFU with all compilation warnings.

(in-package :cl-user)

(eval-when (:compile-toplevel :load-toplevel :execute)
  (setf *compile-verbose* nil)
  (setf *compile-print* nil)

#+sbcl (setf *invoke-debugger-hook*
             (lambda (condition hook)
               (declare (ignore hook))
               ;; Uncomment to get backtraces on errors
               ;; (sb-debug:backtrace 20)
               (format *error-output* "Error: ~A~%" condition)
               (quit)))

  (require :asdf)
  (asdf:oos 'asdf:load-op :asdf-install :verbose nil)
  (asdf:oos 'asdf:load-op :cl-ppcre :verbose nil)
  (asdf:oos 'asdf:load-op :trivial-http :verbose nil)
  (asdf:oos 'asdf:load-op :iterate :verbose nil))

(defpackage :progwatch
  (:use :cl :cl-user :ppcre :iterate :trivial-http))

(in-package :progwatch)

(defvar *last-post* 0)
(defvar *known-posts* (make-hash-table :size 3000))
(defparameter *url* "http://dis.4chan.org/prog/subject.txt";)
(defparameter *update-interval* 60)

(defun group-threads (posts)
  (let (threads)
    (iter (for (post-id thread-id subject) in posts)
          (let ((thread (getf threads thread-id)))
            (setf (getf threads thread-id)
                  (cons (list post-id subject)
                        thread))))
    threads))

(defun extract-new-posts (status known last)
  (let ((last-seen last))
    (values
     (iter (for post in status)
           (unless (or (<= (car post) last)
                       (<= (car post) (gethash (nth 1 post) known 0)))
             (setf last-seen (max last-seen (car post)))
             (setf (gethash (nth 1 post) known) (car post))
             (collect post)))
     last-seen)))

(defun new-posts (status known last out)
  (multiple-value-bind (new last-seen) (extract-new-posts status known last)
    (setf new (group-threads new))
    (values
     (when new
       (format out "~%New posts:  ~{~&  Thread ~a: ~{~&    ~{Post ~a: ~a~}~}~}" new))
     last-seen)))

(defun success (response)
  (= response 200))

(defun unescape (post)
  (destructuring-bind (topic x y thread-id z d post-id) post
      (declare (ignore x y z d))
      (flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
               (declare (ignore start end reg-starts reg-ends))
               (format nil "~a"
                       (code-char (parse-integer
                                   (subseq target-string
                                           (+ 2 match-start)
                                           (1- match-end)))))))
        (list
         (parse-integer post-id)
         (parse-integer thread-id)
         (cl-ppcre:regex-replace-all "&#[0-9]+;" topic #'convert)))))

(defun get-status (url)
  (destructuring-bind (response headers text) (http-get url)
    (declare (ignore headers))
    (when (success response)
      (iter (for line in-stream text using #'read-line)
            (collect (unescape (split "<>" line)))))))

(defun run ()
  (loop
     (multiple-value-bind (new last-post)
         (new-posts (get-status *url*) *known-posts* *last-post* t)
       (declare (ignore new))
       (setf *last-post* last-post))
     (finish-output)
     (sleep *update-interval*)))

(handler-case (run)
  (t () nil))

Name: Anonymous 2007-09-10 7:26 ID:zTctovPs

That's more like it.

Name: Anonymous 2007-09-10 7:36 ID:Fj/3x8oh

>>117
You forgot -O3 -funroll-loops -malign-double rite?

Name: Anonymous 2007-09-10 7:40 ID:Heaven

>>118
What?

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List