Replace duplicate rows in a row

I am trying to find (and replace) a duplicate string in a string.

My line might look like this:

Lorem ipsum dolor sit amet sit amet sit amet sit nostrud occupation amit sit ullamco labis nisi ut aliquip ex ea commodo therefore.

It should become:

Lorem ipsum dolor sit amet sit nostrud exercise amit sit ullamco labis nisi ut aliquip ex ea commodo, therefore.

Please note that amit sit is not deleted as it is not repeated.

Or the line could be like this:

Lorem ipsum dolor sit amet () sit amet () sit amet () sit nostrud exercitation ullamco labis nisi ut aliquip aliquip ex ea como etc.

which should become:

Lorem ipsum dolor sit amet() sit nostrud exercitation ullamco labis nisi ut aliquip ex ea commodo .

, a-z, (ascii) . , - .

( ) :

2 3 4 5

:

2

2,3,4, . , .

+5
7

:

<?php

    function split_repeating($string)
    {
        $words = explode(' ', $string);
        $words_count = count($words);

        $need_remove = array();
        for ($i = 0; $i < $words_count; $i++) {
            $need_remove[$i] = false;
        }

        // Here I iterate through the number of words that will be repeated and check all the possible positions reps
        for ($i = round($words_count / 2); $i >= 1; $i--) {
            for ($j = 0; $j < ($words_count - $i); $j++) {
                $need_remove_item = !$need_remove[$j];
                for ($k = $j; $k < ($j + $i); $k++) {
                    if ($words[$k] != $words[$k + $i]) {
                        $need_remove_item = false;
                        break;
                    }
                }
                if ($need_remove_item) {
                    for ($k = $j; $k < ($j + $i); $k++) {
                        $need_remove[$k] = true;
                    }
                }
            }
        }

        $result_string = '';
        for ($i = 0; $i < $words_count; $i++) {
            if (!$need_remove[$i]) {
                $result_string .= ' ' . $words[$i];
            }
        }
        return trim($result_string);
    }



    $string = 'Lorem ipsum dolor sit amet sit amet sit amet sit nostrud exercitation amit sit ullamco laboris nisi ut aliquip ex ea commodo consequat.';

    echo $string . '<br>';
    echo split_repeating($string) . '<br>';
    echo 'Lorem ipsum dolor sit amet sit nostrud exercitation amit sit ullamco laboris nisi ut aliquip ex ea commodo consequat.' . '<br>' . '<br>';



    $string = 'Lorem ipsum dolor sit amet () sit amet () sit amet () sit nostrud exercitation ullamco laboris nisi ut aliquip aliquip ex ea commodo consequat.';

    echo $string . '<br>';
    echo split_repeating($string) . '<br>';
    echo 'Lorem ipsum dolor sit amet () sit nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.';

?>

:

<?php

    function split_repeating($string)
    {
        $words = explode(' ', $string);
        $words_count = count($words);

        $need_remove = array();
        for ($i = 0; $i < $words_count; $i++) {
            $need_remove[$i] = false;
        }

        for ($j = 0; $j < ($words_count - 1); $j++) {
            $need_remove_item = !$need_remove[$j];
            for ($k = $j + 1; $k < ($words_count - 1); $k += 2) {
                if ($words[$k] != $words[$k + 2]) {
                    $need_remove_item = false;
                    break;
                }
            }
            if ($need_remove_item) {
                for ($k = $j + 2; $k < $words_count; $k++) {
                    $need_remove[$k] = true;
                }
            }
        }

        $result_string = '';
        for ($i = 0; $i < $words_count; $i++) {
            if (!$need_remove[$i]) {
                $result_string .= ' ' . $words[$i];
            }
        }
        return trim($result_string);
    }



    $string = '2 questions 3 questions 4 questions 5 questions';

    echo $string . '<br>';
    echo split_repeating($string) . '<br>';
    echo '2 questions';

?>
+1

, \2 .. . , , :

$string =~ s/(\w+) ( \1)+/$1/g

.

+2

. preg_replace(), , . , , ( n ) 50 :

1:

$result = preg_replace('/
    # Match a doubled "phrase" having length up to 50 chars.
    (            # $1: Phrase having whitespace boundaries.
      (?<=\s|^)  # Assert phrase preceded by ws or BOL.
      \S         # First char of phrase is non-whitespace.
      .{0,49}?   # Lazily match phrase (50 chars max).
    )            # End $1: Phrase
    (?:          # Group for one or more duplicate phrases.
      \s+        # Doubled phrase separated by whitespace.
      \1         # Match duplicate of phrase.
    ){1,}        # Require one or more duplicate phrases.
    /x', '$1', $text);

, , , . , , "" "".

: . .

, , ( ):

2:

$result = preg_replace('/
    # Match doubled "phrases" with wildcard digits first word.
    (            # $1: 1st word of phrase (digits).
    \b           # Anchor 1st phrase word to word boundary.
    \d+          # Phrase 1st word is string of digits.
    \s+          # 1st and 2nd words separated by whitespace.
    )            # End $1:  1st word of phrase (digits).
    (            # $2: Part of phrase after 1st digits word.
      \S         # First char of phrase is non-whitespace.
      .{0,49}?   # Lazily match phrase (50 chars max).
    )            # End $2: Part of phrase after 1st digits word.
    (?:          # Group for one or more duplicate phrases.
      \s+        # Doubled phrase separated by whitespace.
      \d+        # Match duplicate of phrase.
      \s+        # Doubled phrase separated by whitespace.
      \2         # Match duplicate of phrase.
    ){1,}        # Require one or more duplicate phrases.
    /x', '$1$2', $text);
+2

((?:\b|^)[\x20-\x7E]+)(\1)+ ASCII, . hello hello, double l .

, , \x##-\x##\x##-\x## ( ## - hex) -\x##, .

, , , , . , , - ((?:\b|^)[\x20-\x7E]+\s)(\1)+ ( \s).

((?:\b|^)[\x20-\x7E]+\s)(.*(\1))+ , , , , .

: , $string ~= /((?:\b|^)[\x20-\x7E]+\s)(.*(\1))+/$1/ig Perl PHP, .

+1

...

, eval(base64_decode(...)), :

function fixi($str) {
    $a = explode(" ", $str);
    return implode(' ', fix($a));
}

function fix($a) {
    $l = count($a);
    $len = 0;
    for($i=1; $i <= $l/2; $i++) {
        for($j=0; $j <= $l - 2*$i; $j++) {
            $n = 1;
            $found = false;
            while(1) {
                $a1 = array_slice($a, $j, $i);
                $a2 = array_slice($a, $j+$n*$i, $i);
                if ($a1 != $a2)
                    break;
                $found = true;
                $n++;
            }
            if ($found && $n*$i > $len) {
                $len = $n*$i;
                $f_j = $j;
                $f_i = $i;
            }
        }
    }
    if ($len) {
        return array_merge(
            fix(array_slice($a, 0, $f_j)),
            array_slice($a, $f_j, $f_i),
            fix(array_slice($a, $f_j+$len, $l))
        );
    }
    return $a;
}

, .

+1

2 3 4 5

2

, :

$string =~ s/(\d+ (.*))( \d+ (\2))+/$1/g;

, - (), , , , -, . .

+1

, . !. Ridgerunners, dtanders , ( ) , .

, :), ​​Nox. , - ( , , PHP).

, , :

function split_repeating_num($string) {
$words = explode(' ', $string);
$all_words = $words;
$num_words = count($words);
$max_length = 100; //max length of substring to check
$max_words = 4; //maximum number of words in substring 
$found = array();
$current_pos = 0;
$unset = array();
foreach ($words as $key=>$word) {
    //see if this word exist in the next part of the string
    $len = strlen($word);
    if ($len === 0) continue;
    $current_pos += $len + 1; //+1 for the space
    $substr = substr($string, $current_pos, $max_length);
    if (($pos = strpos(substr($string, $current_pos, $max_length), $word)) !== false) {
        //found it
        //set pointer words and all_words to same value
        while (key($all_words) < $key ) next($all_words);
        while (key($all_words) > $key ) prev($all_words);
        $next_word = next($all_words);

        while (is_numeric($next_word) || $next_word === '') {
            $next_word = next($all_words);
        }
        // see if it follows the word directly
        if ($word === $next_word) {
            $unset [$key] = 1;
        } elseif ($key + 3 < $num_words) {
            for($i = $max_words; $i > 0; $i --) {
                $x = 0;
                $string_a = '';
                $string_b = '';
                while ($x < $i ) {
                    while (is_numeric($next_word) || $next_word === '' ) {
                        $next_word = each($all_words);
                    }
                    $x ++;
                    $string_a .= $next_word;
                    $string_b .= $words [key($all_words) + $i];
                }

                if ($string_a === $string_b) {
                    //we have a match
                    for($x = $key; $x < $i + $key; $x ++)
                        $unset [$x] = 1;
                }
            }
        }
    }

}
foreach ($unset as $k=>$v) {
    unset($words [$k]);
}
return implode(' ', $words);

}

, , , , .

0

All Articles