Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <time.h>
- #include <cmath>
- #include <iostream>
- #include <fstream>
- #include <vector>
- #include <string>
- using namespace std;
- int min(int i1, int i2, int i3){
- if(i1 <= i2 && i1 <= i3)
- return i1;
- if(i2 <= i1 && i2 <= i3)
- return i2;
- if(i3 <= i2 && i3 <= i1)
- return i3;
- }
- int whichmin(int i1, int i2, int i3){
- if(i2 <= i1 && i2 <= i3)
- return 2;
- if(i1 <= i2 && i1 <= i3)
- return 1;
- if(i3 <= i2 && i3 <= i1)
- return 3;
- }
- int main(int argc, char** argv){
- ifstream input;
- ofstream output;
- output.open("output.txt");
- input.open(argv[1]);
- string seq;
- while(input >> seq){
- vector<char> seq1;
- vector<char> seq2;
- int comma = 0; //copy values before comma into seq1
- for(int i = 0; i < seq.length(); i++){
- if(seq[i] == ',')
- break;
- seq1.push_back(seq[i]);
- comma++;
- }
- //copy values after comma into seq2
- for(int i = comma + 1; i < seq.length(); i++){
- seq2.push_back(seq[i]);
- }
- //convert A to 1, T to 2, G to 3, C to 4 in both vectors
- for(int i = 0; i < seq1.size(); i++){
- if(seq1[i] == 'A')
- seq1[i] = '1';
- else if(seq1[i] == 'T')
- seq1[i] = '2';
- else if(seq1[i] == 'G')
- seq1[i] = '3';
- else if(seq1[i] == 'C')
- seq1[i] = '4';
- }
- for(int i = 0; i < seq2.size(); i++){
- if(seq2[i] == 'A')
- seq2[i] = '1';
- else if(seq2[i] == 'T')
- seq2[i] = '2';
- else if(seq2[i] == 'G')
- seq2[i] = '3';
- else if(seq2[i] == 'C')
- seq2[i] = '4';
- }
- ifstream matrixf;
- matrixf.open(argv[2]);
- string lines[6];
- int matrix[5][5];
- for(int i = 0; i < 6; i++){ //grab the table
- matrixf >> lines[i];
- }
- for(int i = 0; i < 5; i++){ //copy important values
- matrix[i][0] = int (lines[i+1][2]) - 48;
- matrix[i][1] = int (lines[i+1][4]) - 48;
- matrix[i][2] = int (lines[i+1][6]) - 48;
- matrix[i][3] = int (lines[i+1][8]) - 48;
- matrix[i][4] = int (lines[i+1][10]) - 48;
- }
- int cost[seq1.size()+1][seq2.size()+1];
- int path[seq1.size()+1][seq2.size()+1];
- cost[0][0] = matrix[0][0];
- int sumx = 0, sumy = 0; //base cases. We need base case to be cost
- //of aligning string will all inserts
- //for path base case is coming from [i-1] for [i][o]
- //and coming from [j-1] for [0][j]
- for(int i = 1; i < seq1.size()+1; i++){
- int n = int (seq1[i-1]) - 48;
- sumx += matrix[0][n];
- cost[i][0] = sumx;
- path[i][0] = 1;
- }
- for(int i = 1; i < seq2.size()+1; i++){
- int n = int (seq2[i-1]) - 48;
- sumy += matrix[0][n];
- cost[0][i] = sumy;
- path[0][i] = 2;
- }
- //dynamic programming
- for(int i = 1; i < seq1.size() + 1; i++){
- for(int j = 1; j < seq2.size() + 1; j++){
- int s1, s2;
- s1 = int (seq1[i-1]) - 48;
- s2 = int (seq2[j-1]) - 48;
- cost[i][j] = min(cost[i-1][j] + matrix[s1][0],
- cost[i][j-1] + matrix[0][s2],
- cost[i-1][j-1] + matrix[s1][s2]);
- path[i][j] = whichmin(cost[i-1][j] + matrix[s1][0],
- cost[i][j-1] + matrix[0][s2],
- cost[i-1][j-1] + matrix[s1][s2]);
- }
- }
- int iteri = seq1.size(), iterj = seq2.size();
- vector<char> alseq1; //trace back the path
- vector<char> alseq2;
- while(iteri != 0 || iterj != 0){
- if(path[iteri][iterj] == 3){
- alseq1.insert(alseq1.begin(),seq1[iteri-1]);
- alseq2.insert(alseq2.begin(),seq2[iterj-1]);
- iteri--;
- iterj--;
- }else if(path[iteri][iterj] == 2){
- alseq1.insert(alseq1.begin(),'-');
- alseq2.insert(alseq2.begin(),seq2[iterj-1]);
- iterj--;
- }else{
- alseq1.insert(alseq1.begin(),seq1[iteri-1]);
- alseq2.insert(alseq2.begin(),'-');
- iteri--;
- }
- }
- for(int i = 0; i < alseq1.size(); i++){ //convert numbers back to letters
- if(alseq1[i] == '1'){
- alseq1[i] = 'A';
- }
- if(alseq1[i] == '2'){
- alseq1[i] = 'T';
- }
- if(alseq1[i] == '3'){
- alseq1[i] = 'G';
- }
- if(alseq1[i] == '4'){
- alseq1[i] = 'C';
- }
- }
- for(int i = 0; i < alseq2.size(); i++){
- if(alseq2[i] == '1'){
- alseq2[i] = 'A';
- }
- if(alseq2[i] == '2'){
- alseq2[i] = 'T';
- }
- if(alseq2[i] == '3'){
- alseq2[i] = 'G';
- }
- if(alseq2[i] == '4'){
- alseq2[i] = 'C';
- }
- }
- for(int i = 0; i < alseq1.size(); i++) //output to file
- output << alseq1[i];
- output << ',';
- for(int i = 0; i < alseq2.size(); i++)
- output << alseq2[i];
- output << ":" << cost[seq1.size()][seq2.size()] << endl;
- }
- output.close();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement