The fastest way to avoid HTML tags as HTML objects?

I am writing a Chrome extension that includes performing a lot of the following task: disinfection of strings that may contain HTML tags by converting < , > and & to &lt; , &gt; and &amp; respectively.

(In other words, just like PHP htmlspecialchars(str, ENT_NOQUOTES) - I don't think there is a real need to convert double quote characters.)

This is the fastest function I've found so far:

 function safe_tags(str) { return str.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') ; } 

But there is still a lot of backwardness when I need to run several thousand lines at a time.

Can anyone improve this? This is mainly for strings of 10 to 150 characters, if that matters.

(One of my ideas was not to worry about coding the sign more than the sign - was there any real danger?)

+93
performance javascript string html regex
Mar 31 '11 at 11:28
source share
11 answers

You can try passing a callback function to perform a replacement:

 var tagsToReplace = { '&': '&amp;', '<': '&lt;', '>': '&gt;' }; function replaceTag(tag) { return tagsToReplace[tag] || tag; } function safe_tags_replace(str) { return str.replace(/[&<>]/g, replaceTag); } 

Here is a performance test: http://jsperf.com/encode-html-entities for comparison with calling the replace function repeatedly and using the DOM method suggested by Dmitry.

Your path seems to be faster ...

Why do you need this?

+74
Mar 31 '11 at 12:26
source share

Here you can do it like this:

 var escape = document.createElement('textarea'); function escapeHTML(html) { escape.textContent = html; return escape.innerHTML; } function unescapeHTML(html) { escape.innerHTML = html; return escape.textContent; } 

Here is a demo.

+91
Feb 12 '12 at 17:58
source share

Martijn method as a prototype function:

 String.prototype.escape = function() { var tagsToReplace = { '&': '&amp;', '<': '&lt;', '>': '&gt;' }; return this.replace(/[&<>]/g, function(tag) { return tagsToReplace[tag] || tag; }); }; var a = "<abc>"; var b = a.escape(); // "&lt;abc&gt;" 
+27
Nov 24 '12 at 4:24
source share

The fastest way:

 function escapeHTML(html) { return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML; } 

This method is about twice as fast as the replacement methods, see http://jsperf.com/htmlencoderegex/35 .

Source: stack overflow

+9
Jun 19 '15 at 5:38
source share

All-in-one script:

 // HTML entities Encode/Decode function htmlspecialchars(str) { var map = { "&": "&amp;", "<": "&lt;", ">": "&gt;", "\"": "&quot;", "'": "&#39;" // ' -> &apos; for XML only }; return str.replace(/[&<>"']/g, function(m) { return map[m]; }); } function htmlspecialchars_decode(str) { var map = { "&amp;": "&", "&lt;": "<", "&gt;": ">", "&quot;": "\"", "&#39;": "'" }; return str.replace(/(&amp;|&lt;|&gt;|&quot;|&#39;)/g, function(m) { return map[m]; }); } function htmlentities(str) { var textarea = document.createElement("textarea"); textarea.innerHTML = str; return textarea.innerHTML; } function htmlentities_decode(str) { var textarea = document.createElement("textarea"); textarea.innerHTML = str; return textarea.value; } 

http://pastebin.com/JGCVs0Ts

+8
Jun 29 2018-12-12T00:
source share

The source code for AngularJS also has a version of angular -sanitize.js .

 var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, // Match everything outside of normal chars and " (quote character) NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; /** * Escapes all potentially dangerous characters, so that the * resulting string can be safely inserted into attribute or * element text. * @param value * @returns {string} escaped text */ function encodeEntities(value) { return value. replace(/&/g, '&amp;'). replace(SURROGATE_PAIR_REGEXP, function(value) { var hi = value.charCodeAt(0); var low = value.charCodeAt(1); return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';'; }). replace(NON_ALPHANUMERIC_REGEXP, function(value) { return '&#' + value.charCodeAt(0) + ';'; }). replace(/</g, '&lt;'). replace(/>/g, '&gt;'); } 
+8
May 31 '15 at
source share

Even faster / shorter solution:

 escaped = new Option(html).innerHTML 

This is due to some strange JavaScript remnant when the Option element saves a constructor that automatically performs this kind of escaping.

Credit for https://github.com/jasonmoo/t.js/blob/master/t.js

+4
Mar 09 '19 at 20:50
source share

 function encode(r) { return r.replace(/[\x26\x0A\x3c\x3e\x22\x27]/g, function(r) { return "&#" + r.charCodeAt(0) + ";"; }); } test.value=encode('How to encode\nonly html tags &<>\'" nice & fast!'); /* \x26 is &ampersand (it has to be first), \x0A is newline, \x22 is ", \x27 is ', \x3c is <, \x3e is > */ 
 <textarea id=test rows=11 cols=55>www.WHAK.com</textarea> 
+2
Jul 26 '15 at 13:33
source share

Martijn method as a single function with label processing " (using in javascript):

 function escapeHTML(html) { var fn=function(tag) { var charsToReplace = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&#34;' }; return charsToReplace[tag] || tag; } return html.replace(/[&<>"]/g, fn); } 
0
Nov 02 '14 at 7:50
source share

I'm not quite sure about the speed, but if you are looking for simplicity, I would suggest using the escape lodash / underscore function.

0
Nov 07 '18 at 20:51
source share

A bit late for the show, but what's wrong with encodeURIComponent () and decodeURIComponent () ?

-2
Mar 20 '18 at 19:14
source share



All Articles