. :
:
props.setProperty("customAnnotatorClass.segment","edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");
props.setProperty("pos.model","edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");
props.setProperty("parse.model","edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
props.setProperty("segment.model","edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
props.setProperty("segment.serDictionary","edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
props.setProperty("segment.sighanCorporaDict","edu/stanford/nlp/models/segmenter/chinese");
props.setProperty("segment.sighanPostProcessing","true");
props.setProperty("ssplit.boundaryTokenRegex","[.]|[!?]+|[。]|[!?]+");
props.setProperty("ner.model","edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz");
props.setProperty("ner.applyNumericClassifiers","false");
props.setProperty("ner.useSUTime","false");
, - , - - PTBTokenizer ( ).
: props.setProperty( "tokenize.language", "ES" ); props.setProperty( "sentiment.model", "//" );
props.setProperty("pos.model","src/models/pos-tagger/spanish/spanish-distsim.tagger");
props.setProperty("ner.model","src/models/ner/spanish.ancora.distsim.s512.crf.ser.gz");
props.setProperty("ner.applyNumericClassifiers","false");
props.setProperty("ner.useSUTime","false");
props.setProperty("parse.model","src/models/lexparser/spanishPCFG.ser.gz");
. , 'tokenize.language' 'es'. . "ch", "cn", "zh", "zh-cn", .
, .