Not really what we were talking about, but here's a FUSE progfs. You'll need
http://code.google.com/p/fusepy/.
It's twice as slow as balls and it doesn't properly set mtime/ctime/atime because it does very little caching and I don't want
getattr to instigate a web request every time, but it kind of works. Someone else can figure out how to export it over 9P.
#!/usr/bin/python
import argparse
import errno
import json
import os
import re
import stat
import sys
import time
import urllib2
import fuse
unix_time = lambda s: \
int(time.mktime(time.strptime(s, '%a, %d %b %Y %H:%M:%S %Z')))
def parse_path(path):
"""
Paths are at most three levels deep. This always returns a three-member
list and fills the blanks with None.
"""
path = filter(None, path.split('/'))
if len(path) > 3:
raise fuse.FuseOSError(errno.ENOENT)
while len(path) < 3:
path.append(None)
return path
def parse_name(name):
"""
Takes the contents of the name field and returns ['name!trip', 'email'].
"""
m = re.match(name,
'^([^<]*)<a href="mailto:([^"]*)">([^<]*)</a>(.*)$',
re.DOTALL)
if m is None:
return '', ''
else:
return ''.join(m.group(1), m.group(3), m.group(4)), m.group(2)
class HeadRequest(urllib2.Request):
"""Makes a HEAD request rather than GET."""
get_method = lambda self: 'HEAD'
class ProgFS(fuse.LoggingMixIn, fuse.Operations):
def __init__(self, board='prog', tmpdir=None):
self.board = board
self.subject_url = 'http://dis.4chan.org/%s/subject.txt' % self.board
self.thread_url = 'http://dis.4chan.org/json/%s/%%s/' % self.board
self.post_url = 'http://dis.4chan.org/json/%s/%%s/%%s' % self.board
self.files = {}
self.last_modified = 0
self.threads = {}
self._get_subject_txt()
def _get_subject_txt(self):
"""
Checks if subject.txt has changed, and if so, fetches the new file
and updates the threads table.
"""
r = urllib2.urlopen(HeadRequest(self.subject_url))
if self.last_modified >= unix_time(r.headers.getheader('last-modified')):
# No change. Stop now.
r.close()
return
r.close()
r = urllib2.urlopen(self.subject_url)
self.last_modified = unix_time(r.headers.getheader('last-modified'))
regex = re.compile(u"""
^(?P<subject>.*) # Subject
<>
.*? # Creator's name
<>
.*? # Thread icon
<>
(?P<id>-?\d*) # Time posted/thread ID
<>
(?P<replies>\d*) # Number of replies
<>
.*? # ???
<>
(?P<last_post>\d*) # Time of last post
\\n$""", re.VERBOSE)
for line in r.readlines():
# FIXME this loop is slow as balls
# Replacing the regex with split('<>') is faster, but Shiichan is
# full of corner cases and that loses /prog/ threads.
thread = regex.match(line).groupdict()
if thread['id'] not in self.threads:
self.threads[thread['id']] = {}
self.threads[thread['id']]['title'] = thread['subject']
self.threads[thread['id']]['last_modified'] = \
float(thread['last_post'])
self.threads[thread['id']]['posts'] = \
int(thread['replies'])
r.close()
def getattr(self, path, fh=None):
path = parse_path(path)
if not path[0]:
return {'st_mode': (stat.S_IFDIR | 0555),
'st_ctime': self.last_modified,
'st_mtime': self.last_modified,
'st_atime': time.time(),
'st_uid': os.getuid(),
'st_gid': os.getgid()}
if path[0] not in self.threads:
raise fuse.FuseOSError(errno.ENOENT)
if not path[1]:
# Thread folder
return {'st_mode': (stat.S_IFDIR | 0555),
'st_ctime': self.threads[path[0]]['last_modified'],
'st_mtime': self.threads[path[0]]['last_modified'],
'st_atime': self.threads[path[0]]['last_modified'],
'st_uid': os.getuid(),
'st_gid': os.getgid()}
if path[1] == 'title' and not path[2]:
# Thread title file
return {'st_mode': (stat.S_IFREG | 0444),
'st_ctime': float(path[0]),
'st_mtime': float(path[0]),
'st_atime': float(path[0]),
'st_size': 1024,
'st_uid': os.getuid(),
'st_gid': os.getgid()}
try:
path[1] = int(path[1])
except ValueError:
# Not a post folder
raise fuse.FuseOSError(errno.ENOENT)
if path[1] < 1 or path[1] > self.threads[path[0]]['posts']:
# Post index out of range
raise fuse.FuseOSError(errno.ENOENT)
if not path[2]:
# Post folder
# TODO fetch post for accurate times
return {'st_mode': (stat.S_IFDIR | 0555),
'st_ctime': self.threads[path[0]]['last_modified'],
'st_mtime': self.threads[path[0]]['last_modified'],
'st_atime': self.threads[path[0]]['last_modified'],
'st_uid': os.getuid(),
'st_gid': os.getgid()}
if path[2] not in ('poster', 'email', 'body'):
raise fuse.FuseOSError(errno.ENOENT)
# Post data file
# TODO fetch post for accurate times
return {'st_mode': (stat.S_IFREG | 0444),
'st_ctime': self.threads[path[0]]['last_modified'],
'st_mtime': self.threads[path[0]]['last_modified'],
'st_atime': self.threads[path[0]]['last_modified'],
'st_size': 1024 * 1024,
'st_uid': os.getuid(),
'st_gid': os.getgid()}
def read(self, path, size, offset, fh):
path = parse_path(path)
if path[0] not in self.threads or not path[1]:
raise fuse.FuseOSError(errno.ENOENT)
if path[1] == 'title':
return self.threads[path[0]]['title'][offset : offset + size]
if path[2] not in ('poster', 'email', 'body'):
raise fuse.FuseOSError(errno.ENOENT)
try:
int(path[1])
except ValueError:
raise fuse.FuseOSError(errno.ENOENT)
r = urllib2.urlopen(self.post_url % (path[0], path[1]))
post = json.loads(r.read())[path[1]]
r.close()
if path[2] == 'body':
body = post['com'] + '\n'
return body[offset : offset + size]
name, email = parse_name(post['name'])
if path[2] == 'name':
if name:
name = name + '\n'
return name[offset : offset + size]
else:
if email:
email = email + '\n'
return email[offset : offset + size]
def readdir(self, path, fh):
self._get_subject_txt()
path = parse_path(path)
if not path[0]:
# Contents of the board directory
return ['.', '..'] + self.threads.keys()
if path[0] not in self.threads:
raise fuse.FuseOSError(errno.ENOENT)
if not path[1]:
# Contents of a thread directory
return ['.', '..', 'title'] + \
[str(i + 1) for i in range(self.threads[path[0]]['posts'])]
if path[1] == 'title':
raise fuse.FuseOSError(errno.ENOTDIR)
try:
path[1] = int(path[1])
except ValueError:
raise fuse.FuseOSError(errno.ENOENT)
if path[1] < 0 or path[1] > self.threads[path[0]]['posts']:
raise fuse.FuseOSError(errno.ENOENT)
return ['.', '..', 'poster', 'email', 'body']
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-b', '--board', action='store',
help='board')
parser.add_argument('mountpoint', action='store',
help='mount point')
parser.add_argument('-f', '--foreground', action='store_true',
help='run in the foreground (useful for debugging)')
args = parser.parse_args()
fuse = fuse.FUSE(ProgFS(args.board or 'prog'),
args.mountpoint,
foreground=args.foreground)