Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "genome.h"
- #include <unordered_map>
- namespace genome {
- std::string assembly(const std::size_t k, const std::vector<std::string> & reads)
- {
- if (k == 0 || reads.empty()) {
- return "";
- }
- unsigned count = 0;
- std::unordered_map<std::string_view, unsigned> get_id;
- constexpr unsigned factor = 2;
- get_id.reserve(factor * (reads.size() * (reads[0].size() - k) + 1));
- for (const auto & word : reads) {
- for (unsigned i = 0; i <= word.size() - k; ++i) {
- auto [it, inserted] = get_id.emplace(std::string_view{&word[i], k}, count);
- if (inserted) {
- ++count;
- }
- }
- }
- std::vector<std::vector<unsigned>> graph(count);
- std::vector<std::string_view> id_to_string(count);
- std::vector<unsigned> in_deg(count);
- for (const auto & word : reads) {
- std::string_view from{&word[0], k};
- unsigned from_id = get_id[from];
- id_to_string[from_id] = from;
- for (unsigned i = 1; i <= word.size() - k; ++i) {
- std::string_view to{&word[i], k};
- const unsigned to_id = get_id[to];
- id_to_string[to_id] = to;
- graph[from_id].emplace_back(to_id);
- ++in_deg[to_id];
- from_id = to_id;
- }
- }
- unsigned start = 0;
- for (unsigned i = 0; i < graph.size(); ++i) {
- if ((graph[i].size() - in_deg[i]) == 1) {
- start = i;
- break;
- }
- }
- std::string genome;
- genome.resize(reads[0].size() + ((reads.size() - 1) * (reads[0].size() - k)));
- unsigned index_in_genome = genome.size() - 1;
- std::vector<unsigned> stack;
- stack.reserve((reads[0].size() - k) * reads.size());
- stack.emplace_back(start);
- while (!stack.empty()) {
- const unsigned vert = stack.back();
- if (!graph[vert].empty()) {
- stack.emplace_back(graph[vert].back());
- graph[vert].pop_back();
- }
- else {
- stack.pop_back();
- genome[index_in_genome] = id_to_string[vert].back();
- --index_in_genome;
- }
- }
- id_to_string[start].copy(genome.data(), k - 1);
- return genome;
- }
- } // namespace genome
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement