Instead of parsing the tree, you can make SyntaxNet output all in conll format, which is easier to parse. The conll format for your proposal is as follows:
1 Alice _ NOUN NNP _ 10 nsubj _ _
2 , _ . , _ 1 punct _ _
3 who _ PRON WP _ 6 nsubj _ _
4 had _ VERB VBD _ 6 aux _ _
5 been _ VERB VBN _ 6 aux _ _
6 reading _ VERB VBG _ 1 rcmod _ _
7 about _ ADP IN _ 6 prep _ _
8 SyntaxNet _ NOUN NNP _ 7 pobj _ _
9 , _ . , _ 10 punct _ _
10 saw _ VERB VBD _ 0 ROOT _ _
11 Bob _ NOUN NNP _ 10 dobj _ _
12 in _ ADP IN _ 10 prep _ _
13 the _ DET DT _ 14 det _ _
14 hallway _ NOUN NN _ 12 pobj _ _
15 yesterday _ NOUN NN _ 10 tmod _ _
16 . _ . . _ 10 punct _ _
. ( ), ( ) 7- (, , ). node 0.
conll, demo.sh(, , ):
$PARSER_EVAL \
--input=$INPUT_FORMAT \
--output=stdout-conll \
--hidden_layer_sizes=64 \
--arg_prefix=brain_tagger \
--graph_builder=structured \
--task_context=$MODEL_DIR/context.pbtxt \
--model_path=$MODEL_DIR/tagger-params \
--slim_model \
--batch_size=1024 \
--alsologtostderr \
| \
$PARSER_EVAL \
--input=stdin-conll \
--output=stdout-conll \
--hidden_layer_sizes=512,512 \
--arg_prefix=brain_parser \
--graph_builder=structured \
--task_context=$MODEL_DIR/context.pbtxt \
--model_path=$MODEL_DIR/parser-params \
--slim_model \
--batch_size=1024 \
--alsologtostderr
( )
( , . )
demo.sh, , . , , ( :)).
, python, . , demo.sh python.
. python, .
-, conll . .
import sys
class Word:
"A class containing the information of a single line from a conll file."
def __init__(self, columns):
self.id = int(columns[0])
self.form = columns[1]
self.head = int(columns[6])
self.children = []
words = []
for line in sys.stdin:
line = filter(None, line.rstrip().split(" "))
words.append(Word(line))
, . .
, , . , , .
lookup = [[] for _ in range(len(words) + 1)]
for word in words:
lookup[word.head].append(word)
:
def buildTree(head):
"Find the children for the given head in the lookup, recursively"
children = lookup[head]
for child in children:
child.children = buildTree(child.id)
return children
tree = buildTree(0)[0]
, Word:
def __str__(self):
if len(self.children) == 0:
return "[" + self.form + "]"
else:
return "[" + self.form + " " + "".join(str(child) for child in self.children) + "]"
def __repr__(self):
return self.__str__()
:
print tree
:
cat input.conll | ./my_parser.py
:
echo "Alice, who had been reading about SyntaxNet, saw Bob in the hallway yesterday." | syntaxnet/demo.sh | ./my_parser.py