Perl bloats in sequence using dynamic programming

I am comparing a reference sequence of bases of size 5500 and a sequence of requests of size 3600 using dynamic programming (semi-global alignment), in fact I know little about complexity and performance, and the code explodes and gives me an "out of memory" error. Knowing that it works fine on smaller sequences, my question is: is this behavior normal or may I have another problem with the code? If this is a normal hint to solve this problem? Thanks in advance.

sub semiGlobal {
    my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = @_;
    # initialization: first row to 0 ;
    my @matrix;
    $matrix[0][0]{score}   = 0;
    $matrix[0][0]{pointer} = "none";
    for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
        $matrix[0][$j]{score}   = 0;
        $matrix[0][$j]{pointer} = "none";
    }

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        $matrix[$i][0]{score}   = $GAP * $i;
        $matrix[$i][0]{pointer} = "up";
    }

    # fill
    my $max_i     = 0;
    my $max_j     = 0;
    my $max_score = 0;

    print "seq2: ".length($seq2);
    print "seq1: ".length($seq1);

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
            my ( $diagonal_score, $left_score, $up_score );
            # calculate match score
            my $letter1 = substr( $seq1, $j - 1, 1 );
            my $letter2 = substr( $seq2, $i - 1, 1 );
            if ( $letter1 eq $letter2 ) {
                $diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} +  $MATCH;
            }
            else {
                $diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} +  $MISMATCH;
            }

            # calculate gap scores
            $up_score   = $matrix[ $i - 1 ][$j]{score} +  $GAP;
            $left_score = $matrix[$i][ $j - 1 ]{score} +  $GAP;

            # choose best score
            if ( $diagonal_score >= $up_score ) {
                if ( $diagonal_score >= $left_score ) {
                    $matrix[$i][$j]{score}   = $diagonal_score;
                    $matrix[$i][$j]{pointer} = "diagonal";
                }
                else {
                    $matrix[$i][$j]{score}   = $left_score;
                    $matrix[$i][$j]{pointer} = "left";
                }
            }
            else {
                if ( $up_score >= $left_score ) {
                    $matrix[$i][$j]{score}   = $up_score;
                    $matrix[$i][$j]{pointer} = "up";
                }
                else {
                    $matrix[$i][$j]{score}   = $left_score;
                    $matrix[$i][$j]{pointer} = "left";
                }
            }

            # set maximum score
            if ( $matrix[$i][$j]{score} > $max_score ) {
                $max_i     = $i;
                $max_j     = $j;
                $max_score = $matrix[$i][$j]{score};
            }
        }
    }

    my $align1 = "";
    my $align2 = "";
    my $j = $max_j;
    my $i = $max_i;

    while (1) {
        if ( $matrix[$i][$j]{pointer} eq "none" ) {
            $stseq1 = $j;
            last;
        }

        if ( $matrix[$i][$j]{pointer} eq "diagonal" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
            $j--;
        }
        elsif ( $matrix[$i][$j]{pointer} eq "left" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= "-";
            $j--;
        }
        elsif ( $matrix[$i][$j]{pointer} eq "up" ) {
            $align1 .= "-";
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
        }
    }

    $align1 = reverse $align1;
    $align2 = reverse $align2;
    return ( $align1, $align2, $stseq1 ,$max_j);
}
+4
source share
2 answers

- @matrix . . :

sub semiGlobal {

    use Tie::Array::CSV; 

    tie my @matrix, 'Tie::Array::CSV', 'temp.txt'; # Don't forget to add your own error handler.


    my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = @_;

    # initialization: first row to 0 ;

    $matrix[0][0] = '0 n';
    for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
        $matrix[0][$j] = '0 n';
    }

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {

        my $score = $GAP * $i;
        $matrix[$i][0] = join ' ',$score,'u';
    }

    #print Dumper(\@matrix);

    # fill
    my $max_i     = 0;
    my $max_j     = 0;
    my $max_score = 0;

    print "seq2: ".length($seq2)."\n";
    print "seq1: ".length($seq1)."\n";

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
            my ( $diagonal_score, $left_score, $up_score );

            # calculate match score
            my $letter1 = substr( $seq1, $j - 1, 1 );
            my $letter2 = substr( $seq2, $i - 1, 1 );
            my $score = (split / /, $matrix[ $i - 1 ][ $j - 1 ])[0];
            if ( $letter1 eq $letter2 ) {
                $diagonal_score = $score +  $MATCH;
            }
            else {
                $diagonal_score = $score +  $MISMATCH;
            }

            # calculate gap scores
            $up_score   = (split / /,$matrix[ $i - 1 ][$j])[0] +  $GAP;
            $left_score = (split / /,$matrix[$i][ $j - 1 ])[0] +  $GAP;

            # choose best score
            if ( $diagonal_score >= $up_score ) {
                if ( $diagonal_score >= $left_score ) {
                    $matrix[$i][$j] = join ' ',$diagonal_score,'d';
                }
                else {
                    $matrix[$i][$j] = join ' ', $left_score, 'l';
                }
            }
            else {
                if ( $up_score >= $left_score ) {
                    $matrix[$i][$j] = join ' ', $up_score, 'u';
                }
                else {
                    $matrix[$i][$j] = join ' ', $left_score, 'l';
                }
            }

            # set maximum score
            if ( (split / /, $matrix[$i][$j])[0] > $max_score ) {
                $max_i     = $i;
                $max_j     = $j;
                $max_score = (split / /, $matrix[$i][$j])[0];

            }
        }
    }


    my $align1 = "";
    my $align2 = "";
    my $stseq1;

    my $j = $max_j;
    my $i = $max_i;

    while (1) {
        my $pointer = (split / /, $matrix[$i][$j])[1];
        if ( $pointer eq "n" ) {
            $stseq1 = $j;
            last;
        }

        if ( $pointer eq "d" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
            $j--;
        }
        elsif ( $pointer eq "l" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= "-";
            $j--;
        }
        elsif ( $pointer eq "u" ) {
            $align1 .= "-";
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
        }
    }

    $align1 = reverse $align1;
    $align2 = reverse $align2;

    untie @matrix; # Don't forget to add your own error handler.

    unlink 'temp.txt'; # Don't forget to add your own error handler.

    return ( $align1, $align2, $stseq1 ,$max_j);
} 

.

+2

, @j_random_hacker @Ashalynd Perl. , .

, "", , perl. , , .

, , , @Ashalynd. , . - , :

use strict;
use warnings;

# define constants for array positions and pointer values
# so the code is still readable.
# (If you have the "Readonly" CPAN module you may want to use it for constants
# instead although none of the downsides of the "constant" pragma apply in this code.)
use constant {
  SCORE => 0,
  POINTER => 1,

  DIAGONAL => 0,
  LEFT => 1,
  UP => 2,
  NONE => 3,
};

...

sub semiGlobal2 {
  my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = @_;

  # initialization: first row to 0 ;
  my @matrix;

  # score and pointer are now stored in an array
  # using the defined constants as indices
  $matrix[0][0][SCORE]   = 0;

  # pointer value is now a constant integer
  $matrix[0][0][POINTER] = NONE;

  for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
      $matrix[0][$j][SCORE]   = 0;
      $matrix[0][$j][POINTER] = NONE;
  }

  for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {

      $matrix[$i][0][SCORE]   = $GAP * $i;
      $matrix[$i][0][POINTER] = UP;
  }

... # continue to make the appropriate changes throughout the code

, , 3600 char 5500 char. , , 2 . 23 , , , 32 .

, Algorithm:: NeedlemanWunsch. , , , , . Inline Perl XS wrapper C

+1

All Articles