Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma once
- #include <experimental/optional>
- #include <fstream>
- #include <cassert>
- #include <set>
- #include <vector>
- #include "path.hpp"
- #include "sequences_handler.hpp"
- /**
- * Joins paths to generate assembled scaffoleds from paths between contigs.
- */
- class PathJoiner {
- const int OVERHANG_ADDITIONS_PERCENTAGE = 20;
- public:
- PathJoiner(SequencesHandler &handler) : handler_(handler) {}
- /**
- * Generates a solution consiting of potentially multiple scaffoleds.
- * @param generated_paths - each pair represents a path between two contigs
- */
- void constructSolution(std::map<utility::pii, Path> generated_paths) {
- generated_paths_ = generated_paths;
- contigs_ = handler_.getAllByType(SequencesHandler::TYPE::CONTIG);
- while (1) {
- Path best_scaffold;
- int best_scaffoled_included = -1;
- for (auto contig: contigs_) {
- if (solved_for_[contig]) continue;
- solution_included_contigs_ = 0;
- Path path;
- path.length = handler_.getLength(contig);
- path.vertices.push_back(contig);
- path.extension_lengths.push_back(path.length);
- dfsConnectContigs(path, contig, 1);
- printf("Constructed potential scaffold with length: %d, containing %d vertices:\n", (int)solution_.length, (int)solution_.vertices.size());
- for (auto v: solution_.vertices) {
- if (handler_.getType(v) == SequencesHandler::TYPE::CONTIG) printf("%d ", v);
- }
- printf("\n");
- if (solution_included_contigs_ > best_scaffoled_included) {
- best_scaffoled_included = solution_included_contigs_;
- best_scaffold = solution_;
- }
- }
- if (best_scaffoled_included == -1) break;
- for (auto visited_v: best_scaffold.vertices) {
- if (handler_.getType(visited_v) == SequencesHandler::TYPE::CONTIG) {
- solved_for_[visited_v] = true;
- solved_for_[visited_v^1] = true;
- }
- }
- printf("Constructed best_scaffold with length: %d, containing:\n", best_scaffold.length);
- for (auto v: best_scaffold.vertices) {
- if (handler_.getType(v) == SequencesHandler::TYPE::CONTIG) printf("%d ", v);
- }
- printf("\n");
- scaffolds_.push_back(best_scaffold);
- }
- }
- /**
- * Outputs generated scaffoleds to the file.
- * @param path - filename to output to
- */
- void outputSolutions(std::string path) {
- std::ofstream out_file;
- out_file.open(path);
- for (int i = 0; i < (int)scaffolds_.size(); ++i) {
- out_file << ">" << i << std::endl;
- out_file << handler_.reconstructPath(scaffolds_[i]) << std::endl;
- }
- out_file.close();
- }
- private:
- /**
- * Finss a path passing through as much as possible unused contigs with
- * minimal length
- * @params Path - currently assembled path
- * @params current - current contig
- * @params included_contigs - number of contigs included in th epath
- */
- void dfsConnectContigs(Path path, int current, int included_contigs) {
- int added_length = 0;
- for (int i = 0; i < (int)path.vertices.size(); ++i) {
- if (handler_.getType(path.vertices[i]) == SequencesHandler::TYPE::READ) {
- added_length += path.extension_lengths[i];
- }
- }
- if (added_length > OVERHANG_ADDITIONS_PERCENTAGE/100.0*path.length) return;
- visited_.insert(current);
- for (auto next_contig: contigs_) {
- if (solved_for_[next_contig]) continue;
- if (visited_.count(next_contig)) continue;
- // if this contig has been reverse visited
- if (visited_.count(utility::reverseId(next_contig)))
- continue;
- // no connecting path
- if (!generated_paths_.count({current, next_contig})) continue;
- Path new_path = path;
- utility::mergePaths(&new_path, generated_paths_[{current, next_contig}]);
- dfsConnectContigs(new_path, next_contig, included_contigs+1);
- }
- if (solution_included_contigs_ < included_contigs ||
- (solution_included_contigs_ == included_contigs && solution_.length > path.length)) {
- solution_ = path;
- solution_included_contigs_ = included_contigs;
- }
- visited_.erase(current);
- }
- SequencesHandler handler_;
- std::map<utility::pii, Path> generated_paths_;
- std::vector<int> contigs_;
- std::set<int> visited_;
- std::map<int, bool> solved_for_;
- std::vector<Path> scaffolds_;
- int solution_included_contigs_;
- Path solution_;
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement