Having played with Markov-chain text generators, I came up with the idea of gathering statistics by parsing randomly selected threads from a certain board at periodic intervals, then using those accumulated word frequencies to generate text, which would then have the flavor of the board.
Your thoughts, /prog/?
Name:
Anonymous2008-02-20 14:59
FIOC version:
import re, random
_acceptable_chars = "'-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def _learn(self, sentence):
if not len(sentence):
return
last_id = BOL_MARKER_ID
for word in sentence:
word = word.lower()
if word not in self.words:
self.words.append(word)
self.chain.append([])
self.bchain.append([])
word_id = self.words.index(word)
self.chain[last_id].append(word_id)
self.bchain[word_id].append(last_id)
last_id = word_id
self.chain[last_id].append(EOL_MARKER_ID)
def _parse(self, sentence):
return filter(lambda c: c in _acceptable_chars, sentence).split()
def generate(self, base_word=None):
if not len(self.chain[0]):
return None
try:
base_id = self.words.index(base_word.lower())
except (ValueError, AttributeError):
base_id = BOL_MARKER_ID
left = []
right = []
word_id = base_id
while word_id != BOL_MARKER_ID:
left.insert(0, word_id)
word_id = random.choice(self.bchain[word_id])
word_id = base_id
while word_id != EOL_MARKER_ID:
right.append(word_id)
word_id = random.choice(self.chain[word_id])
sentence = left + right[1:]
return ' '.join(self.words[word_id] for word_id in sentence).capitalize() + '.'
def reply(self, line):
sentences = []
words = set()
for sentence in _sentence_end.split(line):
sentence = self._parse(sentence)
sentences.append(sentence)
words.update(sentence)
words = words.intersection(self.words)
s = self.generate(words and random.choice(list(words)) or None)
for sentence in sentences:
self._learn(sentence)
return s
def main():
markov = Markov()
while True:
try:
line = raw_input('> ')
except (EOFError, KeyboardInterrupt):
print
break
line = line.strip()
if line.startswith('?'):
line = markov.generate(line[1:])
elif line:
line = markov.reply(line)
print line or markov.generate() or '...'