Advertisement
Guest User

Untitled

a guest
Apr 16th, 2014
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.33 KB | None | 0 0
  1. // Parser.cpp : Defines the entry point for the console application.
  2. //
  3.  
  4. #include "stdafx.h"
  5. #include <vector>
  6. #include <string>
  7. #include <sstream>
  8. #include <fstream>
  9. #include <iostream>
  10. #include <regex>
  11.  
  12. const std::string START_FROM = "\"extract\":\"";
  13.  
  14. std::string GetData() {
  15. std::ifstream t("E:\\data.txt");
  16. std::stringstream buffer;
  17. buffer << t.rdbuf();
  18. std::string input = buffer.str();
  19.  
  20. std::string::size_type loc = input.find(START_FROM, 0);
  21. input = input.substr(loc + START_FROM.size(), input.size() - loc - START_FROM.size() - 5);
  22. return input;
  23. }
  24.  
  25. typedef std::string String;
  26.  
  27. static enum Tags {
  28. CLOSE = -1,
  29. P,
  30. H2,
  31. H3
  32. };
  33.  
  34. short tagDetect(char * ptr){
  35. if (*ptr == '/') {
  36. return 0;
  37. }
  38.  
  39. if (*ptr == 'p') {
  40. return 1;
  41. }
  42.  
  43. if (*(ptr + 1) == '2')
  44. return 2;
  45.  
  46. if (*(ptr + 1) == '3')
  47. return 3;
  48.  
  49. return -1;
  50. }
  51.  
  52.  
  53. struct Node {
  54. short tag;
  55. std::string data;
  56.  
  57. Node(std::string input, short tagId) {
  58. tag = tagId;
  59. data = input;
  60. }
  61. };
  62.  
  63. int _tmain(int argc, _TCHAR* argv[])
  64. {
  65. std::string input = GetData();
  66. std::vector<Node> elems;
  67.  
  68. String::size_type pos = 0;
  69. char pattern = '<';
  70.  
  71. int openPos;
  72. short tagID, lastTag;
  73.  
  74. double duration;
  75. clock_t start = clock();
  76.  
  77. for (int i = 0; i < 20000; i++) {
  78. elems.clear();
  79.  
  80. pos = 0;
  81. while ((pos = input.find(pattern, pos)) != std::string::npos) {
  82. pos++;
  83. tagID = tagDetect(&input[pos]);
  84. switch (tagID) {
  85. case 0:
  86. if (tagID = tagDetect(&input[pos + 1]) == lastTag && pos - openPos > 10) {
  87. elems.push_back(Node(input.substr(openPos + (lastTag > 1 ? 3 : 2), pos - openPos - (lastTag > 1 ? 3 : 2) - 1), lastTag));
  88. }
  89.  
  90. break;
  91. case 1:
  92. case 2:
  93. case 3:
  94. openPos = pos;
  95. lastTag = tagID;
  96. break;
  97. }
  98. }
  99.  
  100. }
  101.  
  102. duration = (double)(clock() - start) / CLOCKS_PER_SEC;
  103. printf("%2.1f seconds\n", duration);
  104.  
  105. getchar();
  106.  
  107. for (int i = 0; i < elems.size(); i++) {
  108. std::cout << elems[i].data << " - " << elems[i].tag << std::endl;
  109. }
  110.  
  111. getchar();
  112.  
  113. return 0;
  114. }
  115.  
  116.  
  117. /*
  118. void clock() {
  119. long i = 600000000L;
  120. clock_t start, finish;
  121. double duration;
  122.  
  123. printf("Time to do %ld empty loops is ", i);
  124. start = clock();
  125. while (i--)
  126. ;
  127. finish = clock();
  128. duration = (double)(finish - start) / CLOCKS_PER_SEC;
  129. printf("%2.1f seconds\n", duration);
  130. }
  131. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement