Regex Prevention for SI Prefixes in Javascript

In Javascript, I use regular expressions to capture SI units (and some non-SI units) and format them correctly with a multiplication point.

For example: "Js" becomes "J⋅s" and "mΩm" becomes "mΩ⋅m", etc.

The problem is that there are some prefixes that are also SI units (for example, "m"), and therefore what I wrote incorrectly converts "mΩ" to "m⋅Ω".

var $dot = "\u22c5";
var $minus = "\u2212";

var $prefix = "(p|n|\u00B5|m|c|d|k|M|G|T)";
var $si_unit = "(m|g|l|L|s|A|K|mol|cd|Hz|rad|sr|N|Pa|J|W|C|V|F|\u03A9|S|Wb|T|H|\u00B0C|\u00B0F|lm|lx|Bq|Gy|Sv|kat|eV|\u0025)";
var $power = "([+" + $minus + "]?\d+)";

var $unit = "(" + $prefix + "?" + $si_unit + $power + "?)";
var $multiplied = $unit + "(" + $dot + $unit + ")*";
var $denominator = $multiplied + "(\/" + $multiplied + ")?";

var $corrections= [
    {
        // corrects dot products
        pattern: new RegExp( $unit + "(?=" + $unit + ")", "g" ),
        correction: "$1" + $dot
    }
    // more correction patterns later
];

function correct( $string ) {

    var $corrected = $string;
    $corrections.forEach( function( corrector ) {
        $corrected = $corrected.replace( corrector.pattern, corrector.correction );
    });
    return $corrected;
}

correct( "m" ); // m - CORRECT
correct( "mΩ" ); // m⋅Ω - INCORRECT, should be mΩ
correct( "Ωm" ); // Ω⋅m - CORRECT
correct( "mΩm" ); // mΩ⋅m - CORRECT

Refresh , as pointed out by @nhahtdh, the problem is backtracking since it does not find $unitafter Ω, so it goes back and identifies m and Ω as $unit + $unitinstead of a $prefix + $unit. I need to prevent rollback in this case.

+4
2

, .

, , , mmΩ? ⋅ Ω m ⋅ mΩ? , .

Edit:

, : ( , )

mmmmol

:

  • m ⋅ m ⋅ m ⋅ mol = m ^ 3 ⋅ mol
  • m ⋅ m ⋅ mmol = m ^ 2 ⋅ mmol
  • m ⋅ mm ⋅ mol
  • mm ⋅ m ⋅ mol

, , , , , .

2:

- , , , .

3:

, endOfLine. .

, . , m , mol.

. this jsfiddle

var $dot = "\u22c5";
var $minus = "\u2212";

var $prefix = "(?:\u00B5|c|d|G|k|m|M|n|p|T)";
var $si_unit = "(?:kat|mol|rad|\u00B0C|\u00B0F|Bq|cd|eV|Gy|Hz|lm|lx|Pa|sr|Sv|Wb|\u0025|\u03A9|A|C|F|g|H|J|K|l|L|m|N|s|S|T|V|W)";
var $power = "(?:[+" + $minus + "]?\\d+)";

var $unit = "("+ $prefix + "?"+ $si_unit + $power + "?)";

var $multiplied = $unit + "(" + $dot + $unit + ")*";
var $denominator = $multiplied + "(\/" + $multiplied + ")?";

var $corrections= [
    {
        // corrects dot products
        pattern: new RegExp( $unit + "(?=($|" + $unit + "))", "g" ),
        correction: "$1" + $dot
    }
    // more correction patterns later
];

$(document).ready(function() {


    var resultsElem = $("#results");

    addToResults(resultsElem, correct( "m" )); // m - CORRECT
    addToResults(resultsElem, correct( "m\u03A9" )); // mΩ - CORRECT, should be mΩ
    addToResults(resultsElem, correct( "\u03A9m" )); // Ω⋅m - CORRECT
    addToResults(resultsElem, correct( "m\u03A9m" )); // mΩ⋅m - CORRECT
    addToResults(resultsElem, correct( "mmmmol" )); // mm⋅mmol - CORRECT
});

function correct( $string ) {

    var $corrected = $string;
    $corrections.forEach( function( corrector ) {
        $corrected = $corrected.replace( corrector.pattern, corrector.correction );
    });

    //if you want greedy behaviour, you will have to match for the end of the line too.
    //The replace function will put a dot at the end too. Remove it if it there.
    if($corrected.charAt($corrected.length -1 ) == $dot){
        $corrected = $corrected.substring(0,$corrected.length -1);
    }
    return $corrected;
}

function addToResults(elem, theResult){
    elem.append(theResult).append("<br>");
}
+2
((?:p|n|m){0,1}(?:m|g|l|L|s|A))((?:p|n|m){0,1}(?:m|g|l|L|s|A))

EDIT: , , , , + ( ) 1 2

(?:p|n|m) being the prefixes 

(?:m|g|l|L|s|A) being the units

+1

All Articles