SHARE
TWEET

Untitled

a guest Jun 18th, 2019 70 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. (?i:<imgs[^>]*srcs*=s*[""']([^<][^""']+)[^>]*s*/*>)
  2.      
  3. sehe@natty:/tmp$ time ./expressive < bench > /dev/null
  4.  
  5. real    0m2.146s
  6. user    0m2.110s
  7. sys 0m0.030s
  8.      
  9. typedef std::string::const_iterator It;
  10.  
  11. int main(int argc, const char *argv[])
  12. {
  13.     using namespace boost::xpressive;
  14. #if DYNAMIC
  15.     const sregex re = sregex::compile
  16.          ("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
  17. #else
  18.     const sregex re = "<img" >> +_s >> -*(~(set = '\','>')) >>
  19.         "src" >> *_s >> '=' >> *_s
  20.         >> (s1 = as_xpr('"') | ''') >> (s2 = -*_) >> s1;
  21. #endif
  22.  
  23.     std::string s;
  24.     smatch what;
  25.  
  26.     while (std::getline(std::cin, s))
  27.     {
  28.         It f = s.begin(), l = s.end();
  29.  
  30.         do
  31.         {
  32.             if (!regex_search(f, l, what, re))
  33.                 break;
  34.  
  35.             handle_attr("img", "src", what[2]);
  36.             f = what[0].second;
  37.         } while (f!=s.end());
  38.     }
  39.  
  40.     return 0;
  41. }
  42.      
  43. sehe@natty:/tmp$ time ./spirit < bench > /dev/null
  44.  
  45. real    0m3.895s
  46. user    0m3.820s
  47. sys 0m0.070s
  48.      
  49. //#define BOOST_SPIRIT_DEBUG
  50. #include <string>
  51. #include <iostream>
  52. #include <boost/spirit/include/qi.hpp>
  53. #include <boost/spirit/include/phoenix.hpp>
  54.  
  55. namespace qi  = boost::spirit::qi;
  56. namespace phx = boost::phoenix;
  57.  
  58. void handle_attr(
  59.         const std::string& elem,
  60.         const std::string& attr,
  61.         const std::string& value)
  62. {
  63.     if (elem == "img" && attr == "src")
  64.         std::cout << "value : " << value << std::endl;
  65. }
  66.  
  67. typedef std::string::const_iterator It;
  68. typedef qi::space_type Skipper;
  69.  
  70. struct grammar : qi::grammar<It, Skipper>
  71. {
  72.     grammar() : grammar::base_type(html)
  73.     {
  74.         using namespace boost::spirit::qi;
  75.         using phx::bind;
  76.  
  77.         attr = as_string [ +~char_("= trn/>") ] [ _a = _1 ]
  78.                 >> '=' >> (
  79.                     as_string [ '"' >> lexeme [ *~char_('"') ] >> '"' ]
  80.                   | as_string [ "'" >> lexeme [ *~char_("'") ] >> "'" ]
  81.                   ) [ bind(handle_attr, _r1, _a, _1) ]
  82.             ;
  83.  
  84.         elem = lit('<')
  85.             >> as_string [ lexeme [ ~char_("-/>") >> *(char_ - space - char_("/>")) ] ] [ _a = _1 ]
  86.             >> *attr(_a);
  87.  
  88.         html = (-elem) % +("</" | (char_ - '<'));
  89.  
  90.         BOOST_SPIRIT_DEBUG_NODE(html);
  91.         BOOST_SPIRIT_DEBUG_NODE(elem);
  92.         BOOST_SPIRIT_DEBUG_NODE(attr);
  93.     }
  94.  
  95.     qi::rule<It, Skipper> html;
  96.     qi::rule<It, Skipper, qi::locals<std::string> > elem;
  97.     qi::rule<It, qi::unused_type(std::string), Skipper, qi::locals<std::string> > attr;
  98. };
  99.  
  100. int main(int argc, const char *argv[])
  101. {
  102.     std::string s;
  103.  
  104.     const static grammar html_;
  105.  
  106.     while (std::getline(std::cin, s))
  107.     {
  108.         It f = s.begin(),
  109.            l = s.end();
  110.  
  111.         if (!phrase_parse(f, l, html_, qi::space) || (f!=l))
  112.             std::cerr << "unparsed: " << std::string(f,l) << std::endl;
  113.     }
  114.  
  115.     return 0;
  116. }
  117.      
  118. typedef std::string::const_iterator It;
  119.  
  120. int main(int argc, const char *argv[])
  121. {
  122.     const boost::regex re("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
  123.  
  124.     std::string s;
  125.     boost::smatch what;
  126.  
  127.     while (std::getline(std::cin, s))
  128.     {
  129.         It f = s.begin(), l = s.end();
  130.  
  131.         do
  132.         {
  133.             if (!boost::regex_search(f, l, what, re))
  134.                 break;
  135.  
  136.             handle_attr("img", "src", what[2]);
  137.             f = what[0].second;
  138.         } while (f!=s.end());
  139.     }
  140.  
  141.     return 0;
  142. }
  143.      
  144. ./test < index.htm
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top