Advertisement
Guest User

MSAEvaluator

a guest
May 22nd, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.55 KB | None | 0 0
  1. package gbi;
  2.  
  3. import java.io.*;
  4.  
  5. /**
  6. * Reads in an MSA in FastA format and a BLOSUM matrix and then computes the alignment score
  7. * Exercise 4, Assignment 5
  8. *
  9. * @author Famke Bäuerle and Viktor Schiff, Code by Kay Nieselt and Daniel Huson, 5.2017
  10. */
  11. public class MSAEvaluator {
  12. private FastA msa; // the input file
  13. private BlosumMatrix blosumMatrix; // the BLOSUM matrix
  14.  
  15. /**
  16. * Constructor
  17. *
  18. * @param fastAFile file containing the MSA
  19. * @param blosumFile file containing the BLOSUM scoring blosumMatrix
  20. * @throws IOException
  21. */
  22. public MSAEvaluator (String fastAFile,String blosumFile) throws IOException {
  23. // initialize the members msa and blosumMatrix members and read in the data
  24. msa = new FastA();
  25. BufferedReader r1 = new BufferedReader(new FileReader(fastAFile));
  26. msa.read(r1);
  27. blosumMatrix = new BlosumMatrix();
  28. BufferedReader r2 = new BufferedReader(new FileReader(blosumFile));
  29. blosumMatrix.read(r2);
  30.  
  31. }
  32.  
  33. /**
  34. * check whether the msa member contains a valid MSA (i.e., are all the sequences of the same length)
  35. *
  36. * @return true, if variable msa contains a valid MSA
  37. */
  38. public boolean isValidMSA() {
  39. int numSeq = msa.size();
  40. String[] allSeq = new String[numSeq];
  41. for (int i = 0; i < numSeq; i++) { //put all Sequences into array
  42. allSeq[i] = msa.getSequence(i);
  43. }
  44. int[] seqLen = new int[numSeq];
  45. for (int i = 0; i < numSeq; i++) { //get length of every sequence
  46. seqLen[i] = allSeq[i].length();
  47. }
  48. for(int i = 1; i < seqLen.length; i++) { //compare all lengths with first length
  49. if(seqLen[0] != seqLen[i]) return false; //if length is different, return false
  50. }
  51. return true;
  52. }
  53.  
  54. /**
  55. * computes the sum of pairs score for the alignment. Use the last row and column of the blosum matrix - denoted by * - as gap penality
  56. *
  57. * @return sum of pairs score
  58. */
  59. public int computeSumOfPairsScore () {
  60. int rowScore = 0;
  61. int score = 0;
  62. char[] rowChars = new char[msa.size()];
  63. for (int j = 0; j < msa.getSequence(0).length(); j++) { //loop through the whole sequence
  64. for (int i = 0; i < msa.size(); i++) { //loop through all sequences
  65. rowChars[i] = msa.getSequence(i).charAt(j); //fill char array with char from every sequence at the same index
  66. //loop through all chars and compare them
  67. for (int p = 0; p < rowChars.length - 1; p++) { //0 to n-1
  68. for (int k = p + 1; k < rowChars.length; k++) { //1 to n
  69. char char1 = rowChars[p];
  70. char char2 = rowChars[k];
  71. if (char1 == char2 && char1 == '-') { //both char1 and char2 are gaps
  72. rowScore = rowScore + 0;
  73. }
  74. else if (char1 == '-' || char2 == '-') { //either char1 or char 2 is gap
  75. rowScore = rowScore - 4;
  76. }
  77. else { //char1 and char2 are amino acids
  78. rowScore = rowScore + blosumMatrix.getScore(char1, char2);
  79. }
  80. }
  81. }
  82. score = score + rowScore;
  83. rowScore = 0;
  84. }
  85.  
  86. }
  87. //compare every Sequence with the other sequences
  88. /* for (int i = 0; i < msa.size() - 1; i++) { //0 to n-1
  89. String seq1 = msa.getSequence(i);
  90. for (int j = i + 1; j < msa.size(); j++) { //1 to n
  91. String seq2 = msa.getSequence(j);
  92. //loop through all letters of both Sequences
  93. for (int p = 0; p < seq1.length(); p++) {
  94. char char1 = seq1.charAt(p);
  95. char char2 = seq2.charAt(p);
  96. if (char1 == char2 && char1 == '-') { //both char1 and char2 are gaps
  97. score = score + 0;
  98. }
  99. else if (char1 == '-' || char2 == '-') { //either char1 or char 2 is gap
  100. score = score - 4;
  101. }
  102. else { //char1 and char2 are amino acids
  103. score = score + blosumMatrix.getScore(char1, char2);
  104. }
  105. }
  106. }
  107. } */
  108.  
  109. return score;
  110. }
  111.  
  112. /**
  113. * gets the alignment
  114. *
  115. * @return MSA
  116. */
  117. public FastA getMsa() {
  118. return msa;
  119. }
  120.  
  121. /**
  122. * gets the BLOSUM matrix
  123. *
  124. * @return BLOSUM matrix
  125. */
  126. public BlosumMatrix getBlosumMatrix() {
  127. return blosumMatrix;
  128. }
  129.  
  130. /**
  131. * run the program
  132. *
  133. * @param args
  134. * @throws IOException
  135. */
  136. public static void main (String[] args) throws IOException {
  137. System.out.println("Famke Bäuerle and Viktor Schiff");
  138.  
  139. MSAEvaluator msaEvaluator=new MSAEvaluator("msa.fasta","blosum62.txt");
  140.  
  141. //msaEvaluator.getMsa().write(new OutputStreamWriter(System.out));
  142. msaEvaluator.getBlosumMatrix().write(new OutputStreamWriter(System.out));
  143.  
  144. if(msaEvaluator.isValidMSA()) {
  145. System.out.println("Sum of pairs score: " + msaEvaluator.computeSumOfPairsScore());
  146. }
  147. else
  148. System.err.println("Alignment invalid");
  149. }
  150. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement