Ghytro

file_dublicates.cpp

Sep 14th, 2021
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.72 KB | None | 0 0
  1. //This program needs to be compiled at least with -std=c++17
  2. //Cause of <filesystem>, which provides cross-platform directories access
  3.  
  4. #include <iostream>
  5. #include <filesystem>
  6. #include <fstream>
  7. #include <unordered_map>
  8. #include <algorithm>
  9.  
  10. int main(int argc, const char* argv[])
  11. {
  12.     struct file_amount
  13.     {
  14.         std::string filename;
  15.         unsigned amount;
  16.     };
  17.  
  18.     if (argc < 1)
  19.     {
  20.         std::cout << "Specify the directory to search files" << std::endl;
  21.         return 0;
  22.     }
  23.  
  24.     std::string path = argv[0];
  25.     std::hash<std::string> hash_func;
  26.  
  27.     //key is the hash of the file contents, vector contains amount of files with that contents
  28.     std::unordered_map<size_t, file_amount> occasions;
  29.     for (const auto& file: std::filesystem::recursive_directory_iterator(path))
  30.     {
  31.         //open in binary to read bytes
  32.         std::ifstream fin(file.path().string(), std::ios::binary);
  33.  
  34.         //the same thing as vector<char>, just stores bytes from file
  35.         std::string bytes(std::istreambuf_iterator<char>{fin}, std::istreambuf_iterator<char>{});
  36.         std::size_t file_hash = hash_func(std::move(bytes));
  37.  
  38.         fin.close();
  39.  
  40.         if (occasions.find(file_hash) != occasions.end())
  41.             ++occasions[file_hash].amount;
  42.         else
  43.             occasions[file_hash] = {file.path().string(), 1};
  44.     }
  45.  
  46.     file_amount max_copies = std::max_element(occasions.begin(), occasions.end(), [](const auto& a, const auto& b) {
  47.         return a.second.amount < b.second.amount;
  48.     })->second;
  49.  
  50.     std::cout << "File with maximum copies found: " << max_copies.filename << std::endl
  51.               << "Found " << max_copies.amount << "copies" << std::endl;
  52.     return 0;
  53. }
  54.  
Advertisement
Add Comment
Please, Sign In to add comment