Name: Anonymous 2010-07-17 5:51
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Data.SQLite;
namespace Progscrape
{
class Progscrape
{
static void Main(string[] args)
{
var db = new SQLiteConnection("Data Source=prog.db");
db.Open();
(new SQLiteCommand("CREATE TABLE IF NOT EXISTS posts (thread INTEGER, id INTEGER, author TEXT, email TEXT, trip TEXT, time INTEGER, body TEXT, primary key (thread, id));", db)).ExecuteNonQuery();
(new SQLiteCommand("CREATE TABLE IF NOT EXISTS threads (thread INTEGER PRIMARY KEY, title TEXT, last_post INTEGER);", db)).ExecuteNonQuery();
var subjectTxtReq = (HttpWebRequest)WebRequest.Create("http://dis.4chan.org/prog/subject.txt");
subjectTxtReq.UserAgent = "progscrape.NET/1.0";
subjectTxtReq.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");
subjectTxtReq.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
Console.Write("Fetching subject.txt...");
var subjectTxt = new StreamReader(subjectTxtReq.GetResponse().GetResponseStream(), new UTF8Encoding()).ReadToEnd().Split(new Char[] {'\n'});
Console.WriteLine("done.");
foreach (var line in subjectTxt)
{
var data = line.Split(new string[] { "<>" }, StringSplitOptions.None);
if(data.Length == 7)
{
var thread = int.Parse(data[3]);
var getLastPost = new SQLiteCommand("SELECT last_post FROM threads WHERE thread = @thread;", db);
getLastPost.Parameters.Add(new SQLiteParameter("thread", thread));
var lastPost = getLastPost.ExecuteScalar();
if (lastPost == null)
{
lastPost = 0;
var insertThread = new SQLiteCommand("INSERT INTO threads VALUES(@thread, @title, @lastPost);", db);
insertThread.Parameters.Add(new SQLiteParameter("thread", thread));
insertThread.Parameters.Add(new SQLiteParameter("title", data[0]));
insertThread.Parameters.Add(new SQLiteParameter("lastPost", (int)lastPost));
insertThread.ExecuteNonQuery();
}
if (int.Parse(data[6]) > (int)lastPost)
{
lastPost = int.Parse(data[6]);
var updateThread = new SQLiteCommand("UPDATE threads SET last_post = @lastPost;", db);
updateThread.Parameters.Add(new SQLiteParameter("lastPost", lastPost));
updateThread.ExecuteNonQuery();
Console.Write("updating thread {0}...", thread);
var threadReq = (HttpWebRequest)WebRequest.Create("http://dis.4chan.org/json/prog/" + data[3]);
threadReq.UserAgent = "progscrape.NET/1.0";
threadReq.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");
threadReq.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
var threadJson = JObject.Parse(new StreamReader(threadReq.GetResponse().GetResponseStream(), new UTF8Encoding()).ReadToEnd());
for (int i = 0; i < int.Parse(data[4]); ++i)
{
var postJson = threadJson[i.ToString()];
if (postJson != null)
{
var updatePost = new SQLiteCommand("INSERT OR REPLACE INTO posts VALUES(@thread, @id, @author, \"\", \"\", @time, @body);", db);
updatePost.Parameters.Add(new SQLiteParameter("thread", thread));
updatePost.Parameters.Add(new SQLiteParameter("id", i));
updatePost.Parameters.Add(new SQLiteParameter("author", (string)postJson["Author"]));
updatePost.Parameters.Add(new SQLiteParameter("time", int.Parse((string)postJson["now"])));
updatePost.Parameters.Add(new SQLiteParameter("body", (string)postJson["com"]));
updatePost.ExecuteNonQuery();
}
}
Console.WriteLine("done.");
}
}
else
Console.WriteLine("subject.txt fail (length {0}, skipping thread): {1}", data.Length, line);
}
db.Close();
}
}
}