Regex Replace: with ":", etc.

I have a bunch of lines like:

"Hello, here a test colon:. Here a test semi-colon&#59;"

I would like to replace this with

"Hello, here a test colon:. Here a test semi-colon;"

And so on for all printed ASCII values .

I am currently using it boost::regex_searchfor matching &#(\d+);, creating a line when processing each match in turn (including adding a substring that does not contain matches since the last match I found).

Can anyone think of a better way to do this? I am open to methods without regular expressions, but in this case the regular expression seems reasonably reasonable.

Thank,

House

+4
source share
12 answers

, , & , . : , , &#. , .

, .

? , , 3 , 1. ASCII -  -~. ?\d\d;.

, boost:: regex:: replace:

For each match // Using regex_iterator<>
    Print the prefix of the match
    Remove the first 2 and last character of the match (&#;)
    lexical_cast the result to int, then truncate to char and append.

Print the suffix of the last match.
+9

, , , ++, boost regex response, SNOBOL. ASCII. - Unicode.

        NUMS = '1234567890'
MAIN    LINE = INPUT                                :F(END)
SWAP    LINE ?  '&#' SPAN(NUMS) . N ';' = CHAR( N ) :S(SWAP)
        OUTPUT = LINE                               :(MAIN)
END
+3
* Repaired SNOBOL4 Solution
* &#38;#38; -> &#38;
     digit = '0123456789'
main line = input                        :f(end)
     result = 
swap line arb . l
+    '&#' span(digit) . n ';' rem . line :f(out)
     result = result l char(n)           :(swap)
out  output = result line                :(main)
end
+3

regex boost, , replace(), lambdas . , .

Python:

s = "Hello, here a test colon&#58;. Here a test semi-colon&#59;"
re.sub(r'&#(1?\d\d);', lambda match: chr(int(match.group(1))), s)

:

"Hello, here a test colon:. Here a test semi-colon;"

boost , regex_replace. ++ , , . , (\ d\d), $1, . , boost.

+1

SNOBOL - "&". :

        dd = "0123456789"
        ccp = "#" span(dd) $ n ";" *?(s = s char(n)) fence (*ccp | null)
   rdl  line = input                              :f(done)
   repl line "&" *?(s = ) ccp = s                 :s(repl)
        output = line                             :(rdl)
   done
   end
+1

, , , perl "e". . .

echo ", β„– 58. β„– 59;
& # 38; # 65;. abc. & # 126;.def."
| perl -we 'sub translate {my $x = $_ [0]; if (($ x >= 32) && ($ x <= 126))
{return sprintf ( "% c", $x); } else {return "& #". $x. ";"; }}
while (< gt;) {s/& # (1?\d\d);/& translate ($ 1)/ge; ; } '

, :

#!/usr/bin/perl -w

sub translate
{
  my $x=$_[0];

  if ( ($x >= 32) && ($x <= 126) )
  {
    return sprintf( "%c", $x );
  }
  else
  {
    return "&#" . $x . ";" ;
  }
}

while (<>)
{
  s/&#(1?\d\d);/&translate($1)/ge;
  print;
}

perl perl, , ...


C:

. , .

+1

Perl (. @mrree answer):

  • :
$ cat ent.txt 
Hello, &#12; here a test colon&#58;. 
Here a test semi-colon&#59; '&#131;'
  • :
$ perl -pe's~(1?\d\d);~
> sub{ return chr($1) if (31 < $1 && $1 < 127); $& }->()~eg' ent.txt
  • :
$ perl -pe"s~(1(?:[01][0-9]|2[0-6])|3[2-9]|[4-9][0-9]);~chr($1)~eg" ent.txt
  • :
Hello, &#12; here a test colon:. 
Here a test semi-colon; '&#131;'
+1

boost:: spirit , NCR s.

// spirit_ncr2a.cpp
#include <iostream>
#include <string>
#include <boost/spirit/include/classic_core.hpp>

int main() {
  using namespace BOOST_SPIRIT_CLASSIC_NS; 

  std::string line;
  while (std::getline(std::cin, line)) {
    assert(parse(line.begin(), line.end(),
         // match "&#(\d+);" where 32 <= $1 <= 126 or any char
         *(("&#" >> limit_d(32u, 126u)[uint_p][&putchar] >> ';')
           | anychar_p[&putchar])).full); 
    putchar('\n');
  }
}
  • :
    $ g++ -I/path/to/boost -o spirit_ncr2a spirit_ncr2a.cpp
  • :
    $ echo "Hello, &#12; here a test colon&#58;." | spirit_ncr2a
  • :
    "Hello, &#12; here a test colon:." 
+1

, , , , , !

python oneliner:

''.join([x.isdigit() and chr(int(x)) or x for x in re.split('&#(\d+);',THESTRING)])

?

0

NCR, Flex:

/** ncr2a.y: Replace all NCRs by corresponding printable ASCII characters. */
%%
&#(1([01][0-9]|2[0-6])|3[2-9]|[4-9][0-9]); { /* accept 32..126 */
  /**recursive: unput(atoi(yytext + 2)); skip '&#'; `atoi()` ignores ';' */
  fputc(atoi(yytext + 2), yyout); /* non-recursive version */
}

:

$ flex ncr2a.y
$ gcc -o ncr2a lex.yy.c -lfl

:

$ echo "Hello, &#12; here a test colon&#58;. 
> Here a test semi-colon&#59; '&#131;'
> &#38;#59; <-- may be recursive" \
> | ncr2a

:

Hello, &#12; here a test colon:.
Here a test semi-colon; '&#131;'
&#59; <-- may be recursive

:

Hello, &#12; here a test colon:.
Here a test semi-colon; '&#131;'
; <-- may be recursive
0

, , -, , , 32 126, . , ( , , ).

      dd = "0123456789"
      ccp = "#" span(dd) $ n *lt(n,127) *ge(n,32) ";" *?(s = s char(n))
 +      fence (*ccp | null)
 rdl  line = input                              :f(done)
 repl line "&" *?(s = ) ccp = s                 :s(repl)
      output = line                             :(rdl)
 done
 end

(, # 131; # 58; :; # 131;: ":

      dd = "0123456789"
      ccp = "#" (span(dd) $ n ";") $ enc
 +      *?(s = s (lt(n,127) ge(n,32) char(n), char(10) enc))
 +      fence (*ccp | null)
 rdl  line = input                              :f(done)
 repl line "&" *?(s = ) ccp = s                 :s(repl)
      output = replace(line,char(10),"#")       :(rdl)
 done
 end
0

Here is a version based on boost::regex_token_iterator. The program replaces the decimal NCR to read with the stdincorresponding ASCII characters and print them on stdout.

#include <cassert>
#include <iostream>
#include <string>
#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>

int main()
{
  boost::regex re("&#(1(?:[01][0-9]|2[0-6])|3[2-9]|[4-9][0-9]);"); // 32..126
  const int subs[] = {-1, 1}; // non-match & subexpr
  boost::sregex_token_iterator end;
  std::string line;

  while (std::getline(std::cin, line)) {
    boost::sregex_token_iterator tok(line.begin(), line.end(), re, subs);

    for (bool isncr = false; tok != end; ++tok, isncr = !isncr) {
      if (isncr) { // convert NCR e.g., '&#58;' -> ':'
        const int d = boost::lexical_cast<int>(*tok);
        assert(32 <= d && d < 127);
        std::cout << static_cast<char>(d);
      }
      else
        std::cout << *tok; // output as is
    }
    std::cout << '\n';
  }
}
0
source

All Articles