Advertisement
Guest User

Untitled

a guest
Jan 26th, 2020
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.62 KB | None | 0 0
  1. #pragma once
  2.  
  3. #include <experimental/optional>
  4. #include <fstream>
  5. #include <cassert>
  6. #include <set>
  7. #include <vector>
  8.  
  9. #include "path.hpp"
  10. #include "sequences_handler.hpp"
  11.  
  12. /**
  13. * Joins paths to generate assembled scaffoleds from paths between contigs.
  14. */
  15. class PathJoiner {
  16. const int OVERHANG_ADDITIONS_PERCENTAGE = 20;
  17. public:
  18. PathJoiner(SequencesHandler &handler) : handler_(handler) {}
  19.  
  20. /**
  21. * Generates a solution consiting of potentially multiple scaffoleds.
  22. * @param generated_paths - each pair represents a path between two contigs
  23. */
  24. void constructSolution(std::map<utility::pii, Path> generated_paths) {
  25. generated_paths_ = generated_paths;
  26.  
  27. contigs_ = handler_.getAllByType(SequencesHandler::TYPE::CONTIG);
  28. while (1) {
  29. Path best_scaffold;
  30. int best_scaffoled_included = -1;
  31. for (auto contig: contigs_) {
  32. if (solved_for_[contig]) continue;
  33.  
  34. solution_included_contigs_ = 0;
  35. Path path;
  36. path.length = handler_.getLength(contig);
  37. path.vertices.push_back(contig);
  38. path.extension_lengths.push_back(path.length);
  39.  
  40. dfsConnectContigs(path, contig, 1);
  41.  
  42. printf("Constructed potential scaffold with length: %d, containing %d vertices:\n", (int)solution_.length, (int)solution_.vertices.size());
  43. for (auto v: solution_.vertices) {
  44. if (handler_.getType(v) == SequencesHandler::TYPE::CONTIG) printf("%d ", v);
  45. }
  46. printf("\n");
  47.  
  48. if (solution_included_contigs_ > best_scaffoled_included) {
  49. best_scaffoled_included = solution_included_contigs_;
  50. best_scaffold = solution_;
  51. }
  52. }
  53.  
  54. if (best_scaffoled_included == -1) break;
  55.  
  56. for (auto visited_v: best_scaffold.vertices) {
  57. if (handler_.getType(visited_v) == SequencesHandler::TYPE::CONTIG) {
  58. solved_for_[visited_v] = true;
  59. solved_for_[visited_v^1] = true;
  60. }
  61. }
  62.  
  63. printf("Constructed best_scaffold with length: %d, containing:\n", best_scaffold.length);
  64. for (auto v: best_scaffold.vertices) {
  65. if (handler_.getType(v) == SequencesHandler::TYPE::CONTIG) printf("%d ", v);
  66. }
  67. printf("\n");
  68. scaffolds_.push_back(best_scaffold);
  69. }
  70. }
  71.  
  72. /**
  73. * Outputs generated scaffoleds to the file.
  74. * @param path - filename to output to
  75. */
  76. void outputSolutions(std::string path) {
  77. std::ofstream out_file;
  78. out_file.open(path);
  79. for (int i = 0; i < (int)scaffolds_.size(); ++i) {
  80. out_file << ">" << i << std::endl;
  81. out_file << handler_.reconstructPath(scaffolds_[i]) << std::endl;
  82. }
  83. out_file.close();
  84. }
  85.  
  86. private:
  87. /**
  88. * Finss a path passing through as much as possible unused contigs with
  89. * minimal length
  90. * @params Path - currently assembled path
  91. * @params current - current contig
  92. * @params included_contigs - number of contigs included in th epath
  93. */
  94. void dfsConnectContigs(Path path, int current, int included_contigs) {
  95.  
  96. int added_length = 0;
  97. for (int i = 0; i < (int)path.vertices.size(); ++i) {
  98. if (handler_.getType(path.vertices[i]) == SequencesHandler::TYPE::READ) {
  99. added_length += path.extension_lengths[i];
  100. }
  101. }
  102. if (added_length > OVERHANG_ADDITIONS_PERCENTAGE/100.0*path.length) return;
  103.  
  104. visited_.insert(current);
  105.  
  106. for (auto next_contig: contigs_) {
  107. if (solved_for_[next_contig]) continue;
  108. if (visited_.count(next_contig)) continue;
  109.  
  110. // if this contig has been reverse visited
  111. if (visited_.count(utility::reverseId(next_contig)))
  112. continue;
  113.  
  114. // no connecting path
  115. if (!generated_paths_.count({current, next_contig})) continue;
  116. Path new_path = path;
  117. utility::mergePaths(&new_path, generated_paths_[{current, next_contig}]);
  118.  
  119. dfsConnectContigs(new_path, next_contig, included_contigs+1);
  120. }
  121.  
  122. if (solution_included_contigs_ < included_contigs ||
  123. (solution_included_contigs_ == included_contigs && solution_.length > path.length)) {
  124. solution_ = path;
  125. solution_included_contigs_ = included_contigs;
  126. }
  127.  
  128. visited_.erase(current);
  129. }
  130.  
  131. SequencesHandler handler_;
  132. std::map<utility::pii, Path> generated_paths_;
  133. std::vector<int> contigs_;
  134.  
  135. std::set<int> visited_;
  136. std::map<int, bool> solved_for_;
  137. std::vector<Path> scaffolds_;
  138. int solution_included_contigs_;
  139. Path solution_;
  140. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement