Marking text in an html document

Suppose I have the following markup:

<html> <head> <title>Page Title</title> </head> <body> <h1>Some title</h1> <p>First paragraph</p> <p>Second paragraph</p> </body> <html> 

I need to note some parts of the text, namely the "first paragraph of seconds", It will look something like this:

 <html> <head> <title>Page Title</title> </head> <body> <h1>Some title</h1> <p>F <mark> irst paragraph</p><p>Secon </mark> d paragraph</p> </body> <html> 

But the problem is that the html markup will be broken. The more complex the markup, the more problems this approach will have.

Question:

Looking for ideas on how I can take the first HTML example and apply a function to return an html structure, where the "first paragraph of the second" is specially marked somehow.

I currently have:

  • parent container of the first paragraph line
  • text "first paragraph second"
  • first text offset in the first paragraph
+6
source share
2 answers

If you want to highlight text in a document, this plugin will be useful to you.

https://github.com/julmot/jquery.mark

Script example: https://jsfiddle.net/julmot/vpav6tL1/

Use is as simple as:

 $(".context").mark("keyword"); 
+4
source

Basically, you should:

  • split documents into words
  • identify the first word with the parent element
  • skip offset
  • mark matching words

Making changes at the word level will not allow you to break the markup. I have added a working example below. However, I am not sure that it will work with all browsers.

Some of the functions like mergeWords are not used in the example, but I have included them because they may be useful.

 var splittedToWords = false; function ignore(el) { return (el.nodeType == 8) || (el.tagName == "BLOCKQUOTE") || (el.tagName == "SCRIPT") || (el.tagName == "DIV") || (!el.hasChildNodes() && el.textContent.match(/\S+/) == null); } function splitToWords(el) { if (el.hasChildNodes()){ var count = el.childNodes.length; for (var i = count - 1; i >= 0; i--) { var node = el.childNodes[i]; if (!ignore(node)) splitToWords(node); } } else { //text node var words = el.textContent.match(/(\S+\s*)/g) || []; var count = words.length; var parentNode = el.parentNode; for (var i = 0; i < count; i++) { var wordNode = document.createElement("span"); wordNode.className = "word"; wordNode.innerText = words[i]; wordNode.setAttribute["word-index"] = i; parentNode.insertBefore(wordNode, el); } parentNode.removeChild(el); } splittedToWords = true; } function unwrap(element) { var next = element.nextSibling; var parent = element.parentNode; parent.removeChild(element); var current; var frag = document.createDocumentFragment(); do { current = element.nextSibling; frag.insertBefore(element, null); } while ((element = current)); parent.insertBefore(frag, next); } function mergeWords(el) { var words = document.getElementsByClassName("word"); count = words.length; if (count > 0) for (var i = 0; i < count; i++) uwrap(words[i]); } function markWord(el, pos, len) { var text = el.innerText; var pre = text.substr(0, pos); var mark = '<mark>' + text.substr(pos, len) + '</mark>'; var post = text.substring(pos + len, text.length); el.innerHTML = pre + mark + post; } function mark(element, offset, text) { if (!splittedToWords) { var body = document.body; splitToWords(body); } var words = document.getElementsByClassName("word"); var wordsCount = words.length; var first = null; for (var i = 0; i < wordsCount; i++ ) { if (words[i].parentElement == element) { first = i; break; } } done = false; var i = first; var pos = 0; do { var word = words[i]; var wordLength = word.innerText.length; if (offset > pos + wordLength) { i++; pos += wordLength; continue; } else { done = true; } } while (!done); var tWords = text.match(/(\S+\s*)/g) || []; var tWordsCount = tWords.length; if (tWordsCount == 0) return; for (var ti = 0; ti < tWordsCount; ti++) { var wordEl = words[i++]; var word = wordEl.innerText; var tWord = tWords[ti].trim(); var pos = word.indexOf(tWord); if (pos == -1) continue; //or maybe return. markWord(wordEl, pos, tWord.length); } } var e = document.getElementById("e"); //do the magic mark(e, 1, 'irst paragraph Second'); 
 <h1>Some title</h1> <p id="e">First paragraph</p> <p>Second paragraph</p> 
0
source

All Articles