Time Frequency in Lucene 4.0

Trying to calculate frequency using Lucene 4.0. I got the frequency of working with documents, but I can’t understand how to use the frequency of the term using the API. Here is the code I have:

private static void addDoc(IndexWriter writer, String content) throws IOException { FieldType fieldType = new FieldType(); fieldType.setStoreTermVectors(true); fieldType.setStoreTermVectorPositions(true); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.setStored(true); Document doc = new Document(); doc.add(new Field("content", content, fieldType)); writer.addDocument(doc); } public static void main(String[] args) throws IOException, ParseException { Directory directory = new RAMDirectory(); Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(directory, config); addDoc(writer, "Lucene is stupid"); addDoc(writer, "Java is great"); writer.close(); IndexReader reader = DirectoryReader.open(directory); System.out.println(reader.docFreq(new Term("content", "Lucene"))); reader.close(); } 

I tried to do something like reader.getTermVector(0, "content")... but I can't find a method to just get the frequency of a specific term in this document.

Thanks!

+4
source share
1 answer

K, figured it out. You can get the DocsEnum object from MultiFields and then MultiFields over it.

 private static void addDoc(IndexWriter writer, String content) throws IOException { FieldType fieldType = new FieldType(); fieldType.setStoreTermVectors(true); fieldType.setStoreTermVectorPositions(true); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.setStored(true); Document doc = new Document(); doc.add(new Field("content", content, fieldType)); writer.addDocument(doc); } public static void main(String[] args) throws IOException, ParseException { Directory directory = new RAMDirectory(); Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(directory, config); addDoc(writer, "bla bla bla bleu bleu"); addDoc(writer, "bla bla bla bla"); writer.close(); DirectoryReader reader = DirectoryReader.open(directory); DocsEnum de = MultiFields.getTermDocsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("bla")); int doc; while((doc = de.nextDoc()) != DocsEnum.NO_MORE_DOCS) { System.out.println(de.freq()); } reader.close(); } 
+2
source

All Articles