Advertisement
supremeXD

Untitled

Jun 12th, 2022
717
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.24 KB | None | 0 0
  1. #include "genome.h"
  2.  
  3. #include <unordered_map>
  4.  
  5. namespace genome {
  6.  
  7. std::string assembly(const std::size_t k, const std::vector<std::string> & reads)
  8. {
  9.     if (k == 0 || reads.empty()) {
  10.         return "";
  11.     }
  12.  
  13.     unsigned count = 0;
  14.     std::unordered_map<std::string_view, unsigned> get_id;
  15.     constexpr unsigned factor = 2;
  16.     get_id.reserve(factor * (reads.size() * (reads[0].size() - k) + 1));
  17.  
  18.     for (const auto & word : reads) {
  19.         for (unsigned i = 0; i <= word.size() - k; ++i) {
  20.             auto [it, inserted] = get_id.emplace(std::string_view{&word[i], k}, count);
  21.             if (inserted) {
  22.                 ++count;
  23.             }
  24.         }
  25.     }
  26.  
  27.     std::vector<std::vector<unsigned>> graph(count);
  28.     std::vector<std::string_view> id_to_string(count);
  29.     std::vector<unsigned> in_deg(count);
  30.     for (const auto & word : reads) {
  31.         std::string_view from{&word[0], k};
  32.         unsigned from_id = get_id[from];
  33.         id_to_string[from_id] = from;
  34.         for (unsigned i = 1; i <= word.size() - k; ++i) {
  35.             std::string_view to{&word[i], k};
  36.             const unsigned to_id = get_id[to];
  37.             id_to_string[to_id] = to;
  38.             graph[from_id].emplace_back(to_id);
  39.             ++in_deg[to_id];
  40.             from_id = to_id;
  41.         }
  42.     }
  43.  
  44.     unsigned start = 0;
  45.     for (unsigned i = 0; i < graph.size(); ++i) {
  46.         if ((graph[i].size() - in_deg[i]) == 1) {
  47.             start = i;
  48.             break;
  49.         }
  50.     }
  51.  
  52.     std::string genome;
  53.     genome.resize(reads[0].size() + ((reads.size() - 1) * (reads[0].size() - k)));
  54.     unsigned index_in_genome = genome.size() - 1;
  55.  
  56.     std::vector<unsigned> stack;
  57.     stack.reserve((reads[0].size() - k) * reads.size());
  58.     stack.emplace_back(start);
  59.  
  60.     while (!stack.empty()) {
  61.         const unsigned vert = stack.back();
  62.         if (!graph[vert].empty()) {
  63.             stack.emplace_back(graph[vert].back());
  64.             graph[vert].pop_back();
  65.         }
  66.         else {
  67.             stack.pop_back();
  68.             genome[index_in_genome] = id_to_string[vert].back();
  69.             --index_in_genome;
  70.         }
  71.     }
  72.  
  73.     id_to_string[start].copy(genome.data(), k - 1);
  74.     return genome;
  75. }
  76.  
  77. } // namespace genome
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement