Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- extern crate html5ever;
- extern crate reqwest;
- use std::default::Default;
- use std::io::Read;
- use html5ever::tendril::*;
- use html5ever::tokenizer::BufferQueue;
- use html5ever::tokenizer::{StartTag, TagToken};
- use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,};
- use html5ever::interface::QualName;
- use html5ever::{ns, namespace_url, LocalName};
- #[derive(Copy, Clone)]
- struct TokenPrinter {}
- impl TokenSink for TokenPrinter {
- type Handle = ();
- fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
- let link_name = QualName::new(
- None,
- ns!(),
- LocalName::from("href"),
- );
- match token {
- TagToken(tag) => {
- if tag.kind == StartTag && tag.name.to_string()=="a" {
- let attrs = tag.attrs;
- for attr in attrs {
- if attr.name == link_name {
- println!("link to: {}", attr.value);
- }
- }
- }
- },
- _ => {
- },
- }
- TokenSinkResult::Continue
- }
- }
- fn main() {
- let sink = TokenPrinter {};
- // Use reqwest to get the HTML content
- let mut res = reqwest::get("https://example.com").unwrap();
- assert!(res.status().is_success());
- let mut body = String::new();
- res.read_to_string(&mut body).unwrap();
- let mut chunk = ByteTendril::from(&body);
- let mut input = BufferQueue::new();
- input.push_back(chunk.try_reinterpret::<fmt::UTF8>().unwrap());
- let mut tok = Tokenizer::new(
- sink,
- TokenizerOpts::default(),
- );
- let _ = tok.feed(&mut input);
- assert!(input.is_empty());
- tok.end();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement