Stanford NLP - Sentiment Analysis for Chinese

I want to create a mood analysis program that takes a dataset in Chinese and determines if there are more positive, negative, or neutral statements. Following an example, I create a sentiment analysis for English (stanford-corenlp) that works exactly what I want, but accepting in Chinese.

Questions:

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
    // gender,lemma,ner,parse,pos,sentiment,sspplit, tokenize
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

     // read some text in the text variable

    String sentimentText = "Fun day, isn't it?";
    String[] ratings = {"Very Negative","Negative", "Neutral", "Positive", "Very Positive"};
    Annotation annotation = pipeline.process(sentimentText);
    for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
     Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
     int score = RNNCoreAnnotations.getPredictedClass(tree);
     System.out.println("sentence:'"+ sentence + "' has a score of "+ (score-2) +" rating: " + ratings[score]);
     System.out.println(tree);

, , . , . . http://nlp.stanford.edu/software/corenlp.shtml, . - , , , ?

, !

PS: java , , , .

:

, , Stanford Parser? java,

stanford

0
2

, :

: , ​​ CrowdFlower. " " CrowdFlower .

+1

. :

:

props.setProperty("customAnnotatorClass.segment","edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");


        props.setProperty("pos.model","edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");
        props.setProperty("parse.model","edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");

        props.setProperty("segment.model","edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
        props.setProperty("segment.serDictionary","edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
        props.setProperty("segment.sighanCorporaDict","edu/stanford/nlp/models/segmenter/chinese");
        props.setProperty("segment.sighanPostProcessing","true");

        props.setProperty("ssplit.boundaryTokenRegex","[.]|[!?]+|[。]|[!?]+");


        props.setProperty("ner.model","edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz");
        props.setProperty("ner.applyNumericClassifiers","false");
        props.setProperty("ner.useSUTime","false");

, - , - - PTBTokenizer ( ).

:   props.setProperty( "tokenize.language", "ES" );           props.setProperty( "sentiment.model", "//" );

        props.setProperty("pos.model","src/models/pos-tagger/spanish/spanish-distsim.tagger");


        props.setProperty("ner.model","src/models/ner/spanish.ancora.distsim.s512.crf.ser.gz");
        props.setProperty("ner.applyNumericClassifiers","false");
        props.setProperty("ner.useSUTime","false");

        props.setProperty("parse.model","src/models/lexparser/spanishPCFG.ser.gz");

. , 'tokenize.language' 'es'. . "ch", "cn", "zh", "zh-cn", . , .

0

All Articles