In the MAYZ toolkit, data, such as parse trees and derivations, are represented with feature structures. This page explains the definitions of the feature structures.
word <- [bot] + [INPUT\string(0), %% input word itself SURFACE\string(1), %% preprocessed input word BASE\string(2), %% base form of the input word INPUT_POS\string(5), %% input POS itself POS\string(6), %% preprocessed POS BASE_POS\string(7), %% POS of the base form of the input word POSITION\integer(10) %% the position of the input word ].
A phrase structure of an input treebank (parse tree) is represented with the following feature structure.
%% head/argument/modifier marks head_mark <- [bot]. non_head <- [head_mark]. %% other than 'head' argument <- [non_head]. %% for argument modifier <- [non_head]. %% for modifier empty <- [head_mark]. %% for empty trees non_empty <- [head_mark]. %% for non-empty trees head <- [head_mark, non_empty]. %% head non_head_empty <- [non_head, empty]. %% empty non-head non_head_non_empty <- [non_head, non_empty]. %% non-empty non-head argument_empty <- [argument, non_head_empty]. %% empty argument argument_non_empty <- [argument, non_head_non_empty]. %% non-empty argument modifier_empty <- [modifier, non_head_empty]. %% empty modifier modifier_non_empty <- [modifier, non_head_non_empty]. %% non-empty modifier %% representation of PropBank-style annotations propbank_label <- [bot] + [PROP_ID\integer(0)]. %% ID number uniquely assigned in a sentence propbank_arg <- [propbank_label] + %% annotation for arguments [ARG_POS\string(1)]. %% argument label ("ARG1", ...) propbank_rel <- [propbank_label] + %% annotation for predicates [PRED_NAME\string(1), %% name of a predicate PRED_SEM_ID\string(2)]. %% semantic ID of a predicate %% list of 'propbank_label' prop_list <- [list]. prop_cons <- [prop_list, cons] + [hd\propbank_label, tl\prop_list]. prop_nil <- [prop_list, nil]. %% a node in a parse tree tree_node <- [bot] + [SYM\string(0), %% symbol of the node FUNC\list(1), %% function labels (SBJ, TPC, etc.) HEAD_MARK\head_mark(2), %% head/argument/modifier marks PROP_LIST\prop_list(5), %% for PropBank-style annotations NODE_SIGN\bot(10), %% sign corresponding to the node ANNOT\bot(11) %% other annotations ]. tree_node_nts <- [tree_node] + %% nonterminal node [ID\list(3), %% coindex ID assigned to the node SCHEMA_NAME\bot(4) %% schema name ]. tree_node_term <- [tree_node] + %% terminal node [WORD\word(3)]. %% word corresponding to the node tree_node_empty <- [tree_node] + %% empty terminal node [COIND\list(3), %% coindex IDs EMPTY_WORD\string(4), %% string representing an empty word COIND_NODE\list(5) %% coindexed nodes ]. %% parse tree tree_base <- [bot] + [TREE_NODE\tree_node(0)]. %% the node of the tree tree <- [tree_base]. %% terminal/nonterminal of the tree tree_nts <- [tree] + %% nonterminal [TREE_NODE\tree_node_nts, TREE_DTRS\list(20)]. %% daughter trees tree_term <- [tree] + %% terminal [TREE_NODE\tree_node_term]. tree_empty <- [tree] + %% empty terminal [TREE_NODE\tree_node_empty]. %% used for pattern matching of trees tree_any <- [tree_base] + [ANY_TREES\list(10)]. %% a list of matched treesFor 'tree_any', see "Pattern matching of trees".
derivation <- [bot] + %% node in a derivation [DERIV_SIGN\bot(5)]. %% sign corresponding to the node derivation_internal <- [derivation] + %% internal node of a derivation [DERIV_SCHEMA\bot(0), %% name of the applied schema DERIV_DTRS\list(10)]. %% daughter derivations derivation_terminal <- [derivation] + %% terminal node of a derivation [TERM_WORD\word(0), %% word corresponding to the node TERM_TEMPLATE\lex_template(1), %% template name assigned to the word LEXENTRY_SIGN\bot(10), %% sign of a lexical entry LEXEME_SIGN\bot(11) %% sign of a lexeme ].