Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Parser.cpp : Defines the entry point for the console application.
- //
- #include "stdafx.h"
- #include <vector>
- #include <string>
- #include <sstream>
- #include <fstream>
- #include <iostream>
- #include <regex>
- const std::string START_FROM = "\"extract\":\"";
- std::string GetData() {
- std::ifstream t("E:\\data.txt");
- std::stringstream buffer;
- buffer << t.rdbuf();
- std::string input = buffer.str();
- std::string::size_type loc = input.find(START_FROM, 0);
- input = input.substr(loc + START_FROM.size(), input.size() - loc - START_FROM.size() - 5);
- return input;
- }
- typedef std::string String;
- static enum Tags {
- CLOSE = -1,
- P,
- H2,
- H3
- };
- short tagDetect(char * ptr){
- if (*ptr == '/') {
- return 0;
- }
- if (*ptr == 'p') {
- return 1;
- }
- if (*(ptr + 1) == '2')
- return 2;
- if (*(ptr + 1) == '3')
- return 3;
- return -1;
- }
- struct Node {
- short tag;
- std::string data;
- Node(std::string input, short tagId) {
- tag = tagId;
- data = input;
- }
- };
- int _tmain(int argc, _TCHAR* argv[])
- {
- std::string input = GetData();
- std::vector<Node> elems;
- String::size_type pos = 0;
- char pattern = '<';
- int openPos;
- short tagID, lastTag;
- double duration;
- clock_t start = clock();
- for (int i = 0; i < 20000; i++) {
- elems.clear();
- pos = 0;
- while ((pos = input.find(pattern, pos)) != std::string::npos) {
- pos++;
- tagID = tagDetect(&input[pos]);
- switch (tagID) {
- case 0:
- if (tagID = tagDetect(&input[pos + 1]) == lastTag && pos - openPos > 10) {
- elems.push_back(Node(input.substr(openPos + (lastTag > 1 ? 3 : 2), pos - openPos - (lastTag > 1 ? 3 : 2) - 1), lastTag));
- }
- break;
- case 1:
- case 2:
- case 3:
- openPos = pos;
- lastTag = tagID;
- break;
- }
- }
- }
- duration = (double)(clock() - start) / CLOCKS_PER_SEC;
- printf("%2.1f seconds\n", duration);
- getchar();
- for (int i = 0; i < elems.size(); i++) {
- std::cout << elems[i].data << " - " << elems[i].tag << std::endl;
- }
- getchar();
- return 0;
- }
- /*
- void clock() {
- long i = 600000000L;
- clock_t start, finish;
- double duration;
- printf("Time to do %ld empty loops is ", i);
- start = clock();
- while (i--)
- ;
- finish = clock();
- duration = (double)(finish - start) / CLOCKS_PER_SEC;
- printf("%2.1f seconds\n", duration);
- }
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement