Regular expression to replace & with & amp;

I have a line that has and as such below.

"This R&M & Exapmle  . It is very big & Complicated &146; example." 

I want to replace & with & but when i use $str =~ s/&/&/ig; which give the following conclusion.

 "This R&M & Company  . It is very big & CMM Level3 &146; Organization." 

And I expect this.

 "This R&M & Company  . It is very big & CMM Level3 &146; Organization." 

Please help me. I do not know how to fix this.

+4
source share
4 answers

You can use a negative predictive statement :

 $str =~ s/&(?!\w+;)/&/g; 
+10
source
 use HTML::Entities; encode_entities decode_entities "This R&M & Exapmle  . It is very big & Complicated &146; example." # returns: "This R&M & Exapmle  . It is very big & Complicated &146; example." 

&146; misspelled for ’ . If you have more such errors, filter / replace them before coding back and forth.

+7
source

I found the best answer to this question before and accepted the code posted, set it up and made it mine, but I can't find it anywhere.

Anyway, here is the solution I made from it.

Now the encoder only supports   , & , " , < and > , but it’s very easy to add support for more HTML objects.

First of all, here is Encoder:

 var Encoder = { encode: (function() { var translate_re = /&(nbsp|amp|quot|lt|gt);/g, translate = { 'nbsp': String.fromCharCode(160), 'amp' : '&', 'quot': '"', 'lt' : '<', 'gt' : '>' }, translator = function($0, $1) { return translate[$1]; }; return function(s) { if(typeof s === 'string') return s.replace(translate_re, translator); else return s; }; })(), decode: (function() { var reg_str = '(<|>|"|&|' + String.fromCharCode(160) + ')'; var translate_re = new RegExp(reg_str, 'g'); var translate = { '&' : '&amp', '"': '&quot', '<' : '&lt', '>' : '&gt' }; translate[String.fromCharCode(160)] = '&nbsp;'; var translator = function($0, $1) { return translate[$1]; }; return function(s) { if(typeof s === 'string') return s.replace(translate_re, translator); else return s; }; })() }; 

 var Encoder = { encode: (function() { var translate_re = /&(nbsp|amp|quot|lt|gt);/g, translate = { 'nbsp': String.fromCharCode(160), 'amp' : '&', 'quot': '"', 'lt' : '<', 'gt' : '>' }, translator = function($0, $1) { return translate[$1]; }; return function(s) { if(typeof s === 'string') return s.replace(translate_re, translator); else return s; }; })(), decode: (function() { var reg_str = '(<|>|"|&|' + String.fromCharCode(160) + ')'; var translate_re = new RegExp(reg_str, 'g'); var translate = { '&' : '&amp', '"': '&quot', '<' : '&lt', '>' : '&gt' }; translate[String.fromCharCode(160)] = '&nbsp;'; var translator = function($0, $1) { return translate[$1]; }; return function(s) { if(typeof s === 'string') return s.replace(translate_re, translator); else return s; }; })() }; //Here is our string with HTML entities in it var str = 'Non-Breaking Space: "&nbsp;", Ampersand: "&amp;", Quote: "&quot", Less-Than: "&lt", Greater-Than: "&gt"'; //Lets get our div's var output_not_endcoded = document.getElementById("output_not_endcoded"); var output_endcoded = document.getElementById("output_endcoded"); //If this div exists, add the string with the HTML entities as is if(output_not_endcoded) output_not_endcoded.innerHTML = str; //If the other div exists, decode the HTML entities and set it as its contents if(output_endcoded) output_endcoded.innerHTML = Encoder.decode(str); 
 * { font: 13.2px "Courier New", Arial, sans-serif; } body { font-size: 100%; } .row { width:100%; height:auto; padding: 8px 6px; } 
 With HTML Entities: <div id="output_not_endcoded" class="row" ></div> <br/> With HTML Entities Decoded: <div id="output_endcoded" class="row" ></div> 

It is very easy to add support for other HTML objects.

Looking at the encoder, you will see our translation section. One part contains the regular expression and the other part contains our translation fields.

Regex:

 var translate_re = /&(nbsp|amp|quot|lt|gt);/g 

Translations:

 translate = { 'nbsp': String.fromCharCode(160), 'amp' : '&', 'quot': '"', 'lt' : '<', 'gt' : '>' } 

Say you wanted to add support for the copyright symbol "Β©". The object name for this character is ©. To add support for this character, simply add it to the regular expression and translation:

Regex:

 var translate_re = /&(nbsp|amp|quot|lt|gt|copy);/g 

Translations:

 translate = { 'nbsp': String.fromCharCode(160), 'amp' : '&', 'quot': '"', 'lt' : '<', 'gt' : '>', 'copy': 'Β©', } 

You will need to make sure that you add support for both encoding and decoding functions if you want full support for encoding and decoding.

And this! Hope this helps!

0
source

Refresh regex to reverse ampersand with negative appearance to avoid changing HTML objects

 &(?!(#[0-9]{2,4}|[Az]{2,6});) 
0
source

All Articles