I wrote a simler program in java using the ford parser. You must generate the tagged word of the list of arrays with java stand for parser.
package postagger; /* * * * lphabetical list of part-of-speech tags used in the Penn Treebank Project: Number Tag Description 1. CC Coordinating conjunction 2. CD Cardinal number 3. DT Determiner 4. EX Existential there 5. FW Foreign word 6. IN Preposition or subordinating conjunction 7. JJ Adjective 8. JJR Adjective, comparative 9. JJS Adjective, superlative 10. LS List item marker 11. MD Modal 12. NN Noun, singular or mass 13. NNS Noun, plural 14. NNP Proper noun, singular 15. NNPS Proper noun, plural 16. PDT Predeterminer 17. POS Possessive ending 18. PRP Personal pronoun 19. PRP$ Possessive pronoun 20. RB Adverb 21. RBR Adverb, comparative 22. RBS Adverb, superlative 23. RP Particle 24. SYM Symbol 25. TO to 26. UH Interjection 27. VB Verb, base form 28. VBD Verb, past tense 29. VBG Verb, gerund or present participle 30. VBN Verb, past participle 31. VBP Verb, non-3rd person singular present 32. VBZ Verb, 3rd person singular present 33. WDT Wh-determiner 34. WP Wh-pronoun 35. WP$ Possessive wh-pronoun 36. WRB Wh-adverb */ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Scanner; import java.io.StringReader; import semanticengine.Description; import edu.stanford.nlp.objectbank.TokenizerFactory; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.PTBTokenizer; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.trees.*; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; public class EnglishParser { public static LexicalizedParser lp = null; public static void main(String[] args) { EnglishParser MC=new EnglishParser(); Scanner sc=new Scanner(System.in); String s=""; while(s!="end") { s=sc.nextLine(); ArrayList<TaggedWord> AT=MC.Parse(s); Description obj= new Description(AT ); System.out.println (AT); } } public static void demoDP(LexicalizedParser lp, String filename) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenier here (as below) and pass it // to DocumentPreprocessor for (List<HasWord> sentence : new DocumentPreprocessor(filename)) { Tree parse = lp.apply(sentence); parse.pennPrint(); System.out.println(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection tdl = gs.typedDependenciesCCprocessed(true); System.out.println(tdl); System.out.println(); } } //Method for Pos taging.(POS) tagger that assigns its class //(verb, adjective, ...) to each word of the sentence, // para@ english is the argument to be tagged public ArrayList<TaggedWord> Parse(String English) { String[] sent =English.split(" ");// { "This", "is", "an", "easy", "sentence", "." }; List<CoreLabel> rawWords = new ArrayList<CoreLabel>(); for (String word : sent) { CoreLabel l = new CoreLabel(); l.setWord(word); rawWords.add(l); } Tree parse = lp.apply(rawWords); return parse.taggedYield(); } public EnglishParser() { lp = new LexicalizedParser("grammar/englishPCFG.ser.gz"); } // static methods only } // return pattern of the sentence public String getPattern(ArrayList<TaggedWord> Sen) { Iterator<TaggedWord> its = Sen.iterator(); while (its.hasNext()) { TaggedWord obj = its.next(); if ((obj.tag().equals("VBZ")) || (obj.tag().equals("VBP"))) { if (its.hasNext()) { TaggedWord obj2 = its.next(); if (obj2.tag().equals("VBG")) { if (its.hasNext()) { TaggedWord obj3 = its.next(); if ((obj3.tag().equals("VBN"))) { return "PRESENT_CONT_PASS"; } } return "PRESENT_CONT"; // Present Continues } else if ((obj2.tag().equals("VBN"))) { return "PRESENT_PASS"; } return "PRESENT_SIMP"; } else { return "PRESENT_SIMP"; } } else if (obj.tag().equals("VBD")) { if (its.hasNext()) { TaggedWord obj2 = its.next(); if (obj2.tag().equals("VBG")) { if (its.hasNext()) { TaggedWord obj3 = its.next(); if ((obj3.tag().equals("VBN"))) { return "PATT_CONT_PASS"; } } return "PAST_CONT"; } else if ((obj2.tag().equals("VBN"))) { return "PAST_PASS"; } return "PAST_SIMP"; } else { return "PAST_SIMP"; } } else if (obj.tag().equals("VB")) { if (its.hasNext()) { TaggedWord obj2 = its.next(); if (obj2.tag().equals("VBG")) { return "FUT_CONT"; } else if ((obj2.tag().equals("VBN"))) { return "FUT_CONT"; } } else { return "FUT_SIMP"; } } } return "NO_PATTERN"; }
source share