Advertisement
Guest User

Untitled

a guest
Dec 5th, 2016
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.25 KB | None | 0 0
  1. // КДЗ-1 по дисциплине Алгоритмы и структуры данных
  2. // Ирхин Федор Денисович, БПИ154(1), 25.11.2016
  3. // Visual Studio 2015, состав проекта (файлы *.cpp и *.h)
  4. // Что сделано, а что нет
  5.  
  6. #include <iostream>
  7. #include <fstream>
  8. #include <vector>
  9. #include <string>
  10. #include <algorithm>
  11. #include <queue>
  12. #include <iterator>
  13. #include <stdexcept>
  14. #include <assert.h>
  15. #include <map>
  16. #include <windows.h>
  17. #include <clocale>
  18. #include <codecvt>
  19. #include <locale>
  20.  
  21. using namespace std;
  22. struct Node
  23. {
  24. bool isLeaf;
  25. wchar_t simbolChar;
  26. int freq;
  27. Node* left = nullptr;
  28. Node* right = nullptr;
  29.  
  30. //Comparing operators for sort
  31. bool operator < (const Node& str) const
  32. {
  33. return (freq < str.freq);
  34. }
  35. bool operator > (const Node& str) const
  36. {
  37. return (freq > str.freq);
  38. }
  39. };
  40.  
  41.  
  42.  
  43.  
  44. //Header
  45. void tableFilling(Node *root, std::wofstream& fout);
  46. void treeDelete(Node *);
  47. void writeCodedText(string pathFrom, std::wofstream& fout);
  48. std::vector<Node*> nodesFromString(string path);
  49.  
  50. Node* buildTreeHaff(vector<Node> nodes);
  51. Node* buildTreeSh(vector<Node> nodes);
  52. Node* split(pair<int, Node*> left, pair<int, Node*> right);
  53.  
  54.  
  55. std::wstring tableFreq[1024];
  56.  
  57. std::vector<Node*> nodesFromString(string path)
  58. {
  59. std::locale locale(std::locale(), new std::codecvt_utf8<wchar_t>);
  60. std::wifstream fin(path, std::ios::binary);
  61. fin.imbue(locale);
  62.  
  63. //Chechking is file exist
  64. if (!fin.is_open())
  65. {
  66. std::cout << "Input file not found" << std::endl;
  67. system("pause");
  68. throw std::logic_error("Input file not found");
  69. }
  70.  
  71. //Vector of different Chars
  72. std::vector<wchar_t> charSet;
  73. //Vector of Nodes, which containt char and it's frequency
  74. std::vector<Node*> nodes;
  75. wchar_t currentChar;
  76. int j = 0;
  77.  
  78. //Filling nodes vector, counting frequency of every node
  79. while (!fin.eof())
  80. {
  81. fin.get(currentChar);
  82. if (!fin.eof())
  83. {
  84. int i;
  85. for (i = 0; i < charSet.size() && charSet[i] != currentChar; i++);
  86.  
  87. //Add new char
  88. if (i == charSet.size())
  89. {
  90. Node* tmp = new Node();
  91.  
  92. charSet.push_back(currentChar);
  93.  
  94. tmp->simbolChar = currentChar;
  95. tmp->freq = 0;
  96. tmp->isLeaf = true;
  97.  
  98. nodes.push_back(tmp);
  99.  
  100. nodes[j]->freq++;
  101. j++;
  102. }
  103. //Plus frequency if char is already exist
  104. else
  105. {
  106. Node tmp;
  107.  
  108. tmp.simbolChar = currentChar;
  109. tmp.freq = 0;
  110. tmp.isLeaf = true;
  111.  
  112. std::vector<Node*>::iterator it = nodes.begin();
  113.  
  114. int i = 0;
  115. while (it < nodes.end())
  116. {
  117. if ((*it)->simbolChar == currentChar)
  118. {
  119. break;
  120. }
  121. i++;
  122. it++;
  123. }
  124. nodes[i]->freq++;
  125. }
  126. }
  127. }
  128. return nodes;
  129. }
  130. void decodeString(Node *root, string encoded_str, string outPath);
  131. void decodeString(Node *root, string encoded_str, string outPath)
  132. {
  133. wstring res = L"";
  134. Node* node = root;
  135. for (int i = 0; i != encoded_str.size(); ++i)
  136. {
  137. if (encoded_str[i] == '0') { // left child
  138.  
  139. node = node->left;
  140.  
  141. }
  142. else { // rigth child
  143. node = node->right;
  144. }
  145. if (node->isLeaf)
  146. {
  147. res += node->simbolChar;
  148. node = root;
  149. }
  150. }
  151.  
  152. std::wofstream foutStream(outPath, std::ios_base::binary);
  153. foutStream << res;
  154. }
  155.  
  156. void write_tree(wofstream& foutTree, Node* root);
  157. void write_tree(wofstream& foutTree, Node* root)
  158. {
  159. unsigned char buf = 0;
  160. unsigned char count = 0;
  161. bool bit;
  162. // std::wofstream foutTree(path);
  163. if (root->isLeaf)
  164. {
  165. bit = 1;
  166. buf = buf | bit << (7 - count);
  167. count++;
  168. if (count > 7)
  169. count = 0;
  170. foutTree << '1';
  171. foutTree << root->simbolChar;
  172. }
  173. else
  174. {
  175. bit = 0;
  176. buf = buf | bit << (7 - count);
  177. count++;
  178. if (count > 7)
  179. count = 0;
  180. foutTree << '0';
  181. write_tree(foutTree, root->left);
  182. write_tree(foutTree, root->right);
  183. }
  184. //foutTree.close();
  185. }
  186.  
  187. static string read_string(basic_ifstream<wchar_t>& fin)
  188. {
  189. string str = "";
  190. wchar_t currentChar;
  191.  
  192.  
  193. while (!fin.eof())
  194. {
  195. fin.get(currentChar);
  196. if (!fin.eof())
  197. str += currentChar;
  198. }
  199. return str;
  200. }
  201. static Node* read_tree(basic_ifstream<wchar_t>& fin)
  202. {
  203. wchar_t currentChar;
  204. //Filling nodes vector, counting frequency of every node
  205. if (fin.is_open());
  206. {
  207. fin.get(currentChar);
  208. if (currentChar == '0')
  209. {
  210. Node* left = read_tree(fin);
  211. Node* right = read_tree(fin);
  212. Node* newNode = new Node;
  213. newNode->left = left;
  214. newNode->right = right;
  215. newNode->isLeaf = false;
  216. return newNode;
  217. }
  218. else
  219. {
  220. if (currentChar == '1')
  221. {
  222. Node* newNode = new Node;
  223.  
  224. if (true)
  225. {
  226. fin.get(currentChar);
  227. newNode->simbolChar = currentChar;
  228. newNode->freq = 0;
  229. newNode->isLeaf = true;
  230. return newNode;
  231. }
  232. else
  233. {
  234. throw std::logic_error("Tree reading went wrong");
  235. }
  236. }
  237. }
  238. }
  239.  
  240. }
  241.  
  242.  
  243. Node* split(Node** left, Node** right)
  244. {
  245. if (left == right)
  246. {
  247. (*left)->isLeaf = true;
  248. return *left;
  249. }
  250. int allSum = 0;
  251. for (Node** i = left; i <= right; i++)
  252. allSum += (*i)->freq;
  253.  
  254. int halfSum = (*left)->freq;
  255. Node** nextRight = left + 1;
  256. while (abs(halfSum - allSum / 2 + (*nextRight)->freq) < abs(halfSum - allSum / 2))
  257. {
  258. halfSum += (*nextRight)->freq;
  259. nextRight++;
  260. }
  261.  
  262. Node* rootNode = new Node;
  263. rootNode->left = split(left, nextRight - 1);
  264. rootNode->right = split(nextRight, right);
  265. rootNode->isLeaf = false;
  266. // rootNode->simbolChar = (*left)->simbolChar;
  267.  
  268. return rootNode;
  269.  
  270. }
  271.  
  272. bool Compare(const Node* a, const Node* b) { return a->freq < b->freq; }
  273. Node* buildTreeSh(vector<Node*> nodes)
  274. {
  275. std::sort(nodes.rbegin(), nodes.rend(), Compare);
  276.  
  277. return split(&nodes[0], &nodes[nodes.size() - 1]);
  278. }
  279. Node* buildTreeHaff(vector<Node*> nodes)
  280. {
  281.  
  282. std::multimap<int, Node*> tree;
  283. for (int i = 0; i < nodes.size(); i++)
  284. {
  285. tree.insert(std::make_pair(nodes[i]->freq, nodes[i]));
  286. }
  287. //Writing Info
  288. //for (int i = 0; i < tree.size() - 1; i++)
  289. // fout << "'" << tree[i]->simbolChar << "'" << tree[i]->freq << ", ";
  290. //fout << "'" << tree[tree.size() - 1]->simbolChar << "'" << tree[tree.size() - 1]->freq << std::endl << std::endl;
  291.  
  292. //Building a tree
  293. while (tree.size() > 1)
  294. {
  295. std::map<int, Node*>::iterator it = tree.begin();
  296. Node* firstNode = it->second;
  297. tree.erase(it);
  298.  
  299. it = tree.begin();
  300. Node* secondNode = it->second;
  301. tree.erase(it);
  302.  
  303. Node* newNode = new Node();
  304. newNode->left = firstNode;
  305. newNode->right = secondNode;
  306. newNode->freq = firstNode->freq + secondNode->freq;
  307.  
  308. tree.insert(std::make_pair(newNode->freq, newNode));
  309. }
  310.  
  311. return tree.begin()->second;
  312. }
  313.  
  314. //std::ofstream foutHaff("output.haff");
  315. void codeHaff(string inPath);
  316. void codeHaff(string inPath)
  317. {
  318. string outputPath = inPath;
  319. outputPath = outputPath.erase(inPath.size() - 4, 4) + ".haff";
  320.  
  321. std::vector<Node*> nodes = nodesFromString(inPath);
  322. Node* treeHaff = buildTreeHaff(nodes);
  323. std::wofstream foutStream(outputPath, std::ios_base::binary);
  324.  
  325. write_tree(foutStream, treeHaff);
  326. tableFilling(treeHaff, foutStream);
  327.  
  328. writeCodedText(inPath, foutStream);
  329.  
  330. treeDelete(treeHaff);
  331. foutStream.close();
  332. }
  333.  
  334. void codeShan(string inPath);
  335. void codeShan(string inPath)
  336. {
  337. string outputPath = inPath;
  338. outputPath = outputPath.erase(inPath.size() - 4, 4) + ".shan";
  339.  
  340. std::vector<Node*> nodes = nodesFromString(inPath);
  341. Node* treeShan = buildTreeSh(nodes);
  342. std::wofstream foutStream(outputPath, std::ios_base::binary);
  343.  
  344. write_tree(foutStream, treeShan);
  345.  
  346. foutStream << std::endl;
  347.  
  348. tableFilling(treeShan, foutStream);
  349.  
  350. writeCodedText(inPath, foutStream);
  351.  
  352. treeDelete(treeShan);
  353. foutStream.close();
  354. }
  355.  
  356. void decode(string inPath);
  357. void decode(string inPath)
  358. {
  359. string outputPath = inPath;
  360. if (inPath.size() < 4)
  361. throw std::logic_error("Error in file name");
  362. char type = inPath[inPath.size() - 4];
  363.  
  364.  
  365. if (type == 'h')
  366. outputPath = outputPath.erase(inPath.size() - 5, 5) + "-unz-h.txt";
  367. else
  368. {
  369. if (type == 's')
  370. outputPath = outputPath.erase(inPath.size() - 5, 5) + "-unz-s.txt";
  371. else
  372. {
  373. throw std::logic_error("Error in file name");
  374. }
  375. }
  376.  
  377. //Reading tree Root
  378. basic_ifstream<wchar_t> fin(inPath, std::ios_base::in | std::ios::binary);
  379. std::locale locale(std::locale(), new std::codecvt_utf8<wchar_t>);
  380. fin.imbue(locale);
  381. //Chechking is file exist
  382. if (!fin.is_open())
  383. {
  384. std::cout << "Input file not found" << std::endl;
  385. system("pause");
  386. throw std::logic_error("Input file not found");
  387. }
  388. Node* treeRoot = read_tree(fin);
  389.  
  390. //
  391. std::wofstream foutStream("test.txt", std::ios_base::binary);
  392.  
  393. write_tree(foutStream, treeRoot);
  394. foutStream.close();
  395.  
  396. //Reading coded string
  397. string stringToDecode = read_string(fin);
  398.  
  399. //Decode
  400. decodeString(treeRoot, stringToDecode, outputPath);
  401.  
  402. }
  403. void tableFilling(Node *root, std::wofstream& foutHaff)
  404. {
  405. static std::wstring s = L"";
  406. if (!root->isLeaf)
  407. {
  408. s += L"1";
  409.  
  410. tableFilling(root->right, foutHaff);
  411. s.erase(s.size() - 1, 1);
  412.  
  413. s += L"0";
  414.  
  415. tableFilling(root->left, foutHaff);
  416. //
  417. s.erase(s.size() - 1, 1);
  418. }
  419. else
  420. {
  421. // foutHaff << "'" << root->simbolChar << "'" << " = " << s << std::endl;
  422. tableFreq[root->simbolChar] = s;
  423. }
  424. }
  425.  
  426. void treeDelete(Node *root)
  427. {
  428. if (!root->isLeaf)
  429. {
  430. treeDelete(root->right);
  431. //tree_del(root->br[1]);
  432.  
  433. treeDelete(root->left);
  434. }
  435. else
  436. delete root;
  437. }
  438.  
  439. void writeCodedText(string pathFrom, wofstream& fout)
  440. {
  441. basic_ifstream<wchar_t> fin(pathFrom, std::ios_base::binary);
  442. wchar_t ch;
  443.  
  444. while (!fin.eof())
  445. {
  446. fin.get(ch);
  447. if (!fin.eof())
  448. fout << tableFreq[ch];
  449. }
  450. }
  451.  
  452. int main(int argc, char** argv)
  453. {
  454. //string filename = "testrus.txt";
  455. //std::locale locale(std::locale(), new std::codecvt_utf8<wchar_t>);
  456. //std::wifstream stream(filename, std::ios::binary);
  457. //stream.imbue(locale);
  458. //wchar_t ch;
  459. //stream.get(ch);
  460.  
  461. //std::locale::global(std::locale("rus"));
  462.  
  463. string filePath = string(argv[2]) + ".txt";
  464. // if (argv[1] == "-compress_h")
  465. codeHaff(filePath);
  466. // if (argv[1] == "-compress_s")
  467. codeShan(filePath);
  468.  
  469. // if (argv[1] == "-decode_h")
  470. // {
  471. string filePathDecode = string(argv[2]) + ".haff";
  472. decode(filePathDecode);
  473. // }
  474. // if (argv[1] == "-decode_s")
  475. // {
  476. filePathDecode = string(argv[2]) + ".shan";
  477. decode(filePathDecode);
  478. // }
  479.  
  480. return 0;
  481. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement