Protein pairwise alignment c++

150 Views Asked by At

Hi I'm trying to implement a pairwise alignment algorithm for protein sequences. I have this the exact same algorithm working for DNA sequences but for some reason when I copy it over to and implement with protein it doesn't The problem with the traceback portion which in will post below.

    Prot1_Seq = "AAGG";
    Prot2_Seq = "AAGG";

    cout << "\nSequence 1: " << Prot1_Seq << endl;
    int Prot1_len = Prot1_Seq.length();

    cout << "\nSequence 2: " << Prot2_Seq << endl;
    int Prot2_len = Prot2_Seq.length();

     int matrix[Prot1_len+1][Prot2_len+1] {0};

     // applying the gap penalty to the outer border columns
        for (int i {1}; i <= Prot1_len; i++)
        {
                matrix[i][0] = gap_pen  * i;

        }
        for (int j {1}; j <= Prot2_len; j++)
        {
                matrix[0][j] = gap_pen * j;

        }

fstream myfile;
        myfile.open("100pam.txt", ios::in);

    if (!myfile.is_open())
        {
                cerr << "Error can not open file" << endl;
                exit(1);
        }


    const int len = 20;
        int pam_matrix[len][len];

        string line;

        getline(myfile,line); // get ing rid of the first line of pam matrix file, by using getline to grab it

        cout << "\n" << endl;

        for (int i = 0; i < len; i++)
        {
                char Row_first_letter;
                myfile >> Row_first_letter; // using the >> to grab the first char of each row leaving the pam matrix with just numbers

                for(int j = 0; j < len; j++)
                {
                        myfile >> pam_matrix[i][j];
                        cout << pam_matrix[i][j] << "\t";
                }
                cout << endl;
        }

    cout << endl;

    int match_score {};
    int mismatch {};

    char row_labels [20] {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
    char col_labels [20] {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};

    for(int i {0}; i <= Prot1_len; i++)
        {
                for(int j {0}; j <= Prot2_len; j++)
                {
                        if (i > 0 && j > 0)
                        {
                                if (Prot1_Seq[i-1] == Prot2_Seq[j-1])
                                {

                    int row = distance(row_labels, find(row_labels, row_labels + 20, Prot1_Seq[i-1]));
                                        int col = distance(col_labels, find(col_labels, col_labels + 20, Prot2_Seq[j-1]));

                    match_score = pam_matrix[row][col];

                    matrix[i][j] = matrix[i-1][j-1] + match_score;
                                }
                else if (Prot1_Seq[i-1] != Prot2_Seq[j-1])
                {
                    //row_labels[i-1] = Prot1_Seq[i-1];
                    //col_labels[i-1] = Prot2_Seq[i-1];

                    int row = distance(row_labels, find(row_labels, row_labels + 20, Prot1_Seq[i-1]));
                    int col = distance(col_labels, find(col_labels, col_labels + 20, Prot2_Seq[j-1]));

                    mismatch = pam_matrix[row][col];

                    matrix[i][j] = max({matrix[i-1][j-1] + mismatch,
                                                matrix[i-1][j] + gap_pen ,
                                                matrix[i][j-1] + gap_pen});
                }
                        }
                        cout << matrix[i][j] << "\t";
                }
                cout << endl;
        }

 int terminal = matrix[Prot1_len+1-1][Prot2_len+1-1]; // rows and colums are length+1 long. and to get the edge value it's col/row length -1.
    cout << terminal << endl;

        int temp = terminal;
        int decrement_X = Prot1_len+1-1;
        int decrement_Y = Prot2_len+1-1;

        string Prot1_aligned = "";
        string Prot2_aligned = "";

        int inc_seq1 = Prot1_len-1;
        int inc_seq2 = Prot2_len-1;

        while (!(decrement_X == 0 || decrement_Y == 0))
        {//while
                if (terminal == matrix[decrement_X][decrement_Y-1] + gap_pen) {
                        temp = matrix[decrement_X][decrement_Y-1];
                        cout << temp << endl;
                        terminal = temp;
                        decrement_Y--;
                        Prot1_aligned += "_";
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq2--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y] + gap_pen) {
                        temp = matrix[decrement_X-1][decrement_Y];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += "_";
                        inc_seq1--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y-1] + match_score) {
                        temp = matrix[decrement_X-1][decrement_Y-1];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        decrement_Y--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq1--;
                        inc_seq2--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y-1] + mismatch) {
                        temp = matrix[decrement_X-1][decrement_Y-1];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        decrement_Y--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq1--;
                        inc_seq2--;
                }
        } //while

    reverse(Prot1_aligned.begin(), Prot1_aligned.end());
        reverse(Prot2_aligned.begin(), Prot2_aligned.end());

        cout << "Performing alignment" << " .................................................. 100%" << endl;
        cout << "Alignment complete! Your aligned sequences are:" << endl;

        cout << "Sequence 1: " << Prot1_aligned << endl;
        cout << "Sequence 2: " << Prot2_aligned << endl;
}

Let's say I have 2 protein sequences AAGG. This would generate a matrix:
0 -5 -10 -15 -20
-5 4 -1 -6 -11
-10 -1 8 3 -2
-15 -6 3 13 8
-20 -11 -2 8 18

using the traceback code I would expect the values: 18, 13, 8, 4, 0. But for some reason my program just stops at 8 and does not else. Not even seg faulting or quit the program. Any solutions?

0

There are 0 best solutions below