Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (?i:<imgs[^>]*srcs*=s*[""']([^<][^""']+)[^>]*s*/*>)
- sehe@natty:/tmp$ time ./expressive < bench > /dev/null
- real 0m2.146s
- user 0m2.110s
- sys 0m0.030s
- typedef std::string::const_iterator It;
- int main(int argc, const char *argv[])
- {
- using namespace boost::xpressive;
- #if DYNAMIC
- const sregex re = sregex::compile
- ("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
- #else
- const sregex re = "<img" >> +_s >> -*(~(set = '\','>')) >>
- "src" >> *_s >> '=' >> *_s
- >> (s1 = as_xpr('"') | ''') >> (s2 = -*_) >> s1;
- #endif
- std::string s;
- smatch what;
- while (std::getline(std::cin, s))
- {
- It f = s.begin(), l = s.end();
- do
- {
- if (!regex_search(f, l, what, re))
- break;
- handle_attr("img", "src", what[2]);
- f = what[0].second;
- } while (f!=s.end());
- }
- return 0;
- }
- sehe@natty:/tmp$ time ./spirit < bench > /dev/null
- real 0m3.895s
- user 0m3.820s
- sys 0m0.070s
- //#define BOOST_SPIRIT_DEBUG
- #include <string>
- #include <iostream>
- #include <boost/spirit/include/qi.hpp>
- #include <boost/spirit/include/phoenix.hpp>
- namespace qi = boost::spirit::qi;
- namespace phx = boost::phoenix;
- void handle_attr(
- const std::string& elem,
- const std::string& attr,
- const std::string& value)
- {
- if (elem == "img" && attr == "src")
- std::cout << "value : " << value << std::endl;
- }
- typedef std::string::const_iterator It;
- typedef qi::space_type Skipper;
- struct grammar : qi::grammar<It, Skipper>
- {
- grammar() : grammar::base_type(html)
- {
- using namespace boost::spirit::qi;
- using phx::bind;
- attr = as_string [ +~char_("= trn/>") ] [ _a = _1 ]
- >> '=' >> (
- as_string [ '"' >> lexeme [ *~char_('"') ] >> '"' ]
- | as_string [ "'" >> lexeme [ *~char_("'") ] >> "'" ]
- ) [ bind(handle_attr, _r1, _a, _1) ]
- ;
- elem = lit('<')
- >> as_string [ lexeme [ ~char_("-/>") >> *(char_ - space - char_("/>")) ] ] [ _a = _1 ]
- >> *attr(_a);
- html = (-elem) % +("</" | (char_ - '<'));
- BOOST_SPIRIT_DEBUG_NODE(html);
- BOOST_SPIRIT_DEBUG_NODE(elem);
- BOOST_SPIRIT_DEBUG_NODE(attr);
- }
- qi::rule<It, Skipper> html;
- qi::rule<It, Skipper, qi::locals<std::string> > elem;
- qi::rule<It, qi::unused_type(std::string), Skipper, qi::locals<std::string> > attr;
- };
- int main(int argc, const char *argv[])
- {
- std::string s;
- const static grammar html_;
- while (std::getline(std::cin, s))
- {
- It f = s.begin(),
- l = s.end();
- if (!phrase_parse(f, l, html_, qi::space) || (f!=l))
- std::cerr << "unparsed: " << std::string(f,l) << std::endl;
- }
- return 0;
- }
- typedef std::string::const_iterator It;
- int main(int argc, const char *argv[])
- {
- const boost::regex re("<img\s+[^\>]*?src\s*=\s*(["'])(.*?)\1");
- std::string s;
- boost::smatch what;
- while (std::getline(std::cin, s))
- {
- It f = s.begin(), l = s.end();
- do
- {
- if (!boost::regex_search(f, l, what, re))
- break;
- handle_attr("img", "src", what[2]);
- f = what[0].second;
- } while (f!=s.end());
- }
- return 0;
- }
- ./test < index.htm
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement