Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

Project ChanText

Name: !GEJzSATORI 2008-02-17 23:04

'sup /prog/,

Having played with Markov-chain text generators, I came up with the idea of gathering statistics by parsing randomly selected threads from a certain board at periodic intervals, then using those accumulated word frequencies to generate text, which would then have the flavor of the board.

Your thoughts, /prog/?

Name: Anonymous 2008-02-20 14:59

FIOC version:

import re, random
_acceptable_chars = "'-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

BOL_MARKER = '>'
BOL_MARKER_ID = 0
EOL_MARKER_ID = -1
_sentence_end = re.compile('[.?!;:]+')

class Markov(object):
    def __init__(self):
        self.words = [BOL_MARKER]
        self.chain = [[]]
        self.bchain = [[]]

    def _learn(self, sentence):
        if not len(sentence):
            return
        last_id = BOL_MARKER_ID
        for word in sentence:
            word = word.lower()
            if word not in self.words:
                self.words.append(word)
                self.chain.append([])
                self.bchain.append([])
            word_id = self.words.index(word)
            self.chain[last_id].append(word_id)
            self.bchain[word_id].append(last_id)
            last_id = word_id
        self.chain[last_id].append(EOL_MARKER_ID)

    def _parse(self, sentence):
        return filter(lambda c: c in _acceptable_chars, sentence).split()

    def generate(self, base_word=None):
        if not len(self.chain[0]):
            return None
        try:
            base_id = self.words.index(base_word.lower())
        except (ValueError, AttributeError):
            base_id = BOL_MARKER_ID
        left = []
        right = []
        word_id = base_id
        while word_id != BOL_MARKER_ID:
            left.insert(0, word_id)
            word_id = random.choice(self.bchain[word_id])
        word_id = base_id
        while word_id != EOL_MARKER_ID:
            right.append(word_id)
            word_id = random.choice(self.chain[word_id])
        sentence = left + right[1:]
        return ' '.join(self.words[word_id] for word_id in sentence).capitalize() + '.'

    def reply(self, line):
        sentences = []
        words = set()
        for sentence in _sentence_end.split(line):
            sentence = self._parse(sentence)
            sentences.append(sentence)
            words.update(sentence)
        words = words.intersection(self.words)
        s = self.generate(words and random.choice(list(words)) or None)
        for sentence in sentences:
            self._learn(sentence)
        return s

def main():
    markov = Markov()
    while True:
        try:
            line = raw_input('> ')
        except (EOFError, KeyboardInterrupt):
            print
            break
        line = line.strip()
        if line.startswith('?'):
            line = markov.generate(line[1:])
        elif line:
            line = markov.reply(line)
        print line or markov.generate() or '...'

if __name__ == '__main__':
    main()

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List