Advertisement
Guest User

Untitled

a guest
Nov 13th, 2019
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Go 2.35 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4.     "fmt"
  5.     "github.com/ivahaev/go-logger"
  6.     "golang.org/x/net/html"
  7.     "net/http"
  8. )
  9.  
  10. func getChildren(node *html.Node) []*html.Node {
  11.     var children []*html.Node
  12.     for c := node.FirstChild; c != nil; c = c.NextSibling {
  13.         children = append(children, c)
  14.     }
  15.     return children
  16. }
  17.  
  18. func getAttr(node *html.Node, key string) string {
  19.     for _, attr := range node.Attr {
  20.         if attr.Key == key {
  21.             return attr.Val
  22.         }
  23.     }
  24.     return ""
  25. }
  26.  
  27. func isText(node *html.Node) bool {
  28.     return node != nil && node.Type == html.TextNode
  29. }
  30.  
  31. func isElem(node *html.Node, tag string) bool {
  32.     return node != nil && node.Type == html.ElementNode && node.Data == tag
  33. }
  34.  
  35. func isDiv(node *html.Node, class string) bool {
  36.     return isElem(node, "div") && getAttr(node, "class") == class
  37. }
  38.  
  39. func readItem(item *html.Node) *Item {
  40.     if a := item.FirstChild; isElem(a, "a") {
  41.         if cs := getChildren(a); isText(cs[0]) {
  42.             return &Item{
  43.                 Ref:   getAttr(a, "href"),
  44.                 Time:  getAttr(cs[0], "title"),
  45.                 Title: cs[0].Data,
  46.             }
  47.         }
  48.     }
  49.     return nil
  50. }
  51.  
  52. type Item struct {
  53.     Ref, Time, Title string
  54. }
  55.  
  56.  
  57. func downloadNews() []*Item {
  58.     logger.Info("sending request to lenta.ru")
  59.     if response, err := http.Get("http://lenta.ru"); err != nil {
  60.         logger.Error("request to lenta.ru failed", "error", err)
  61.     } else {
  62.         defer response.Body.Close()
  63.         status := response.StatusCode
  64.         logger.Info("got response from lenta.ru", "status", status)
  65.         if status == http.StatusOK {
  66.             if doc, err := html.Parse(response.Body); err != nil {
  67.                 logger.Error("invalid HTML from lenta.ru", "error", err)
  68.             } else {
  69.  
  70.                 logger.Info("HTML from lenta.ru parsed successfully")
  71.                 items := search(doc)
  72.                 return items
  73.             }
  74.         }
  75.     }
  76.     return nil
  77. }
  78.  
  79. func search(node *html.Node) []*Item {
  80.     if isDiv(node, "b-yellow-box__wrap") {
  81.         logger.Info("====b-yellow-box__wrap check + ====")
  82.         var items []*Item
  83.         for c := node.FirstChild; c != nil; c = c.NextSibling {
  84.             if isDiv(c, "item") {
  85.                 if item := readItem(c); item != nil {
  86.                     items = append(items, item)
  87.                 }
  88.             }
  89.         }
  90.         return items
  91.     }
  92.     for c := node.FirstChild; c != nil; c = c.NextSibling {
  93.         if items := search(c); items != nil {
  94.             return items
  95.         }
  96.     }
  97.     return nil
  98. }
  99.  
  100. func main() {
  101.     logger.Info("Downloader started")
  102.     items := downloadNews()
  103.     for _, v:= range items{
  104.         fmt.Println(v.Title)
  105.     }
  106. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement