This function will delete all groups that are not followed by a quantifier, and is not an appearance. It assumes regular expression of ECMAScript flavor and that capture groups ( ( ... ) ) are not significant.
function removeUnnecessaryParenthesis(s) { // Tokenize the pattern var pieces = s.split(/(\\.|\[(?:\\.|[^\]\\])+]|\((?:\?[:!=])?|\)(?:[*?+]\??|\{\d+,?\d*}\??)?)/g); var stack = []; for (var i = 0; i < pieces.length; i++) { if (pieces[i].substr(0,1) == "(") { // Opening parenthesis stack.push(i); } else if (pieces[i].substr(0,1) == ")") { // Closing parenthesis if (stack.length == 0) { // Unbalanced; Just skip the next one. continue; } var j = stack.pop(); if ((pieces[j] == "(" || pieces[j] == "(?:") && pieces[i] == ")") { // If it is a capturing group, or a non-capturing group, and is // not followed by a quantifier; // Clear both the opening and closing pieces. pieces[i] = ""; pieces[j] = ""; } } } return pieces.join(""); }
Examples:
removeUnnecessaryParenthesis("((((A)B)C)D)") --> "ABCD" removeUnnecessaryParenthesis("((((A)?B)C)D)") --> "(A)?BCD" removeUnnecessaryParenthesis("((((A)B)?C)D)") --> "(AB)?CD"
He is not trying to determine if the bracket contains only one token ( (A)? ). This will require a longer token pattern.
source share