Name: Anonymous 2012-02-03 3:07
I need your help to separate characters from a Japanese string. For example if my string was "飛行機", I'd like to get 飛, 行 and 機 individually.
Grammar = split "|" \(sentence = (noun_phrase verb_phrase)
|noun_phrase = (Article Noun)
|verb_phrase = (Verb noun_phrase)
|Article = the a
|Noun = man ball woman table
|Verb = hit took saw liked)
generate [@P]=mapc r P; P:rewrites=P,pick,r; P=P
rewrites Category = Grammar,<[@_ [!Category "=" @RHS] @_]=RHS>
from random import choice
def Dict(**args): return args
grammar = Dict(
S = [['NP','VP']],
NP = [['Art', 'N']],
VP = [['V', 'NP']],
Art = ['the', 'a'],
N = ['man', 'ball', 'woman', 'table'],
V = ['hit', 'took', 'saw', 'liked']
)
def generate(phrase):
"Generate a random sentence or phrase"
if isinstance(phrase, list):
return mappend(generate, phrase)
elif phrase in grammar:
return generate(choice(grammar[phrase]))
else: return [phrase]
def generate_tree(phrase):
"""Generate a random sentence or phrase,
with a complete parse tree."""
if isinstance(phrase, list):
return map(generate_tree, phrase)
elif phrase in grammar:
return [phrase] + generate_tree(choice(grammar[phrase]))
else: return [phrase]
def mappend(fn, list):
"Append the results of calling fn on each element of list."
return reduce(lambda x,y: x+y, map(fn, list))
>>> generate('S')
['the', 'man', 'saw', 'the', 'table']
>>> ' '.join(generate('S'))
'the man saw the table'