Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.33 KB | None | 0 0
  1. (?i:<imgs[^>]*srcs*=s*[""']([^<][^""']+)[^>]*s*/*>)
  2.  
  3. sehe@natty:/tmp$ time ./expressive < bench > /dev/null
  4.  
  5. real 0m2.146s
  6. user 0m2.110s
  7. sys 0m0.030s
  8.  
  9. typedef std::string::const_iterator It;
  10.  
  11. int main(int argc, const char *argv[])
  12. {
  13. using namespace boost::xpressive;
  14. #if DYNAMIC
  15. const sregex re = sregex::compile
  16. ("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
  17. #else
  18. const sregex re = "<img" >> +_s >> -*(~(set = '\','>')) >>
  19. "src" >> *_s >> '=' >> *_s
  20. >> (s1 = as_xpr('"') | ''') >> (s2 = -*_) >> s1;
  21. #endif
  22.  
  23. std::string s;
  24. smatch what;
  25.  
  26. while (std::getline(std::cin, s))
  27. {
  28. It f = s.begin(), l = s.end();
  29.  
  30. do
  31. {
  32. if (!regex_search(f, l, what, re))
  33. break;
  34.  
  35. handle_attr("img", "src", what[2]);
  36. f = what[0].second;
  37. } while (f!=s.end());
  38. }
  39.  
  40. return 0;
  41. }
  42.  
  43. sehe@natty:/tmp$ time ./spirit < bench > /dev/null
  44.  
  45. real 0m3.895s
  46. user 0m3.820s
  47. sys 0m0.070s
  48.  
  49. //#define BOOST_SPIRIT_DEBUG
  50. #include <string>
  51. #include <iostream>
  52. #include <boost/spirit/include/qi.hpp>
  53. #include <boost/spirit/include/phoenix.hpp>
  54.  
  55. namespace qi = boost::spirit::qi;
  56. namespace phx = boost::phoenix;
  57.  
  58. void handle_attr(
  59. const std::string& elem,
  60. const std::string& attr,
  61. const std::string& value)
  62. {
  63. if (elem == "img" && attr == "src")
  64. std::cout << "value : " << value << std::endl;
  65. }
  66.  
  67. typedef std::string::const_iterator It;
  68. typedef qi::space_type Skipper;
  69.  
  70. struct grammar : qi::grammar<It, Skipper>
  71. {
  72. grammar() : grammar::base_type(html)
  73. {
  74. using namespace boost::spirit::qi;
  75. using phx::bind;
  76.  
  77. attr = as_string [ +~char_("= trn/>") ] [ _a = _1 ]
  78. >> '=' >> (
  79. as_string [ '"' >> lexeme [ *~char_('"') ] >> '"' ]
  80. | as_string [ "'" >> lexeme [ *~char_("'") ] >> "'" ]
  81. ) [ bind(handle_attr, _r1, _a, _1) ]
  82. ;
  83.  
  84. elem = lit('<')
  85. >> as_string [ lexeme [ ~char_("-/>") >> *(char_ - space - char_("/>")) ] ] [ _a = _1 ]
  86. >> *attr(_a);
  87.  
  88. html = (-elem) % +("</" | (char_ - '<'));
  89.  
  90. BOOST_SPIRIT_DEBUG_NODE(html);
  91. BOOST_SPIRIT_DEBUG_NODE(elem);
  92. BOOST_SPIRIT_DEBUG_NODE(attr);
  93. }
  94.  
  95. qi::rule<It, Skipper> html;
  96. qi::rule<It, Skipper, qi::locals<std::string> > elem;
  97. qi::rule<It, qi::unused_type(std::string), Skipper, qi::locals<std::string> > attr;
  98. };
  99.  
  100. int main(int argc, const char *argv[])
  101. {
  102. std::string s;
  103.  
  104. const static grammar html_;
  105.  
  106. while (std::getline(std::cin, s))
  107. {
  108. It f = s.begin(),
  109. l = s.end();
  110.  
  111. if (!phrase_parse(f, l, html_, qi::space) || (f!=l))
  112. std::cerr << "unparsed: " << std::string(f,l) << std::endl;
  113. }
  114.  
  115. return 0;
  116. }
  117.  
  118. typedef std::string::const_iterator It;
  119.  
  120. int main(int argc, const char *argv[])
  121. {
  122. const boost::regex re("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
  123.  
  124. std::string s;
  125. boost::smatch what;
  126.  
  127. while (std::getline(std::cin, s))
  128. {
  129. It f = s.begin(), l = s.end();
  130.  
  131. do
  132. {
  133. if (!boost::regex_search(f, l, what, re))
  134. break;
  135.  
  136. handle_attr("img", "src", what[2]);
  137. f = what[0].second;
  138. } while (f!=s.end());
  139. }
  140.  
  141. return 0;
  142. }
  143.  
  144. ./test < index.htm
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement