Advertisement
dereksir

Untitled

May 13th, 2024
32
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.81 KB | None | 0 0
  1. extern crate html5ever;
  2. extern crate reqwest;
  3.  
  4. use std::default::Default;
  5. use std::io::Read;
  6.  
  7. use html5ever::tendril::*;
  8. use html5ever::tokenizer::BufferQueue;
  9. use html5ever::tokenizer::{StartTag, TagToken};
  10. use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,};
  11. use html5ever::interface::QualName;
  12. use html5ever::{ns, namespace_url, LocalName};
  13.  
  14. #[derive(Copy, Clone)]
  15. struct TokenPrinter {}
  16.  
  17. impl TokenSink for TokenPrinter {
  18. type Handle = ();
  19.  
  20. fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
  21. let link_name = QualName::new(
  22. None,
  23. ns!(),
  24. LocalName::from("href"),
  25. );
  26. match token {
  27. TagToken(tag) => {
  28. if tag.kind == StartTag && tag.name.to_string()=="a" {
  29. let attrs = tag.attrs;
  30. for attr in attrs {
  31. if attr.name == link_name {
  32. println!("link to: {}", attr.value);
  33. }
  34. }
  35. }
  36. },
  37. _ => {
  38. },
  39. }
  40. TokenSinkResult::Continue
  41. }
  42. }
  43.  
  44. fn main() {
  45. let sink = TokenPrinter {};
  46.  
  47. // Use reqwest to get the HTML content
  48. let mut res = reqwest::get("https://example.com").unwrap();
  49. assert!(res.status().is_success());
  50.  
  51. let mut body = String::new();
  52. res.read_to_string(&mut body).unwrap();
  53.  
  54. let mut chunk = ByteTendril::from(&body);
  55. let mut input = BufferQueue::new();
  56. input.push_back(chunk.try_reinterpret::<fmt::UTF8>().unwrap());
  57.  
  58. let mut tok = Tokenizer::new(
  59. sink,
  60. TokenizerOpts::default(),
  61. );
  62. let _ = tok.feed(&mut input);
  63. assert!(input.is_empty());
  64. tok.end();
  65. }
  66.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement