I have a solr search that returns contextual highlight results showing a URL and emails with a space after periods - for example, "www. Google. Com" or "email @google. Com". Anyway, to disable this, as usual? Thank.
Query:
/solr/core1/select?q=email&start=0&rows=10&sort=score+desc&fq=%28categories%3A%28*8*%29%29&wt=javabin&version=2
highlight the section in the configuration:
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
<int name="hl.fragsize">250</int>
</lst>
</fragmenter>
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
<float name="hl.regex.slop">0.5</float>
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst>
</fragmenter>
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<b class="highlite">]]></str>
<str name="hl.simple.post"><![CDATA[</b>]]></str>
</lst>
</formatter>
<encoder name="html"
class="solr.highlight.HtmlEncoder" />
<fragListBuilder name="simple"
default="true"
class="solr.highlight.SimpleFragListBuilder"/>
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">
</fragmentsBuilder>
</highlighting>
Updated Scheme:
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
Fields
<fields>
.....
<field name="tease" type="text" indexed="false" stored="true" />
<field name="text" type="text" indexed="false" stored="true" />
<field name="fulltext" type="text" indexed="true" stored="false" multiValued="true"/>
</fields>
<copyField source="text" dest="fulltext"/>
<copyField source="title" dest="fulltext"/>
<copyField source="tease" dest="fulltext"/>
<defaultSearchField>fulltext</defaultSearchField>
source
share