Advertisement
Guest User

news.go

a guest
Nov 13th, 2018
274
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Go 3.20 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4.     "net/http"
  5.     "html/template"
  6.     "golang.org/x/net/html"
  7.     "github.com/mgutz/logxi/v1"
  8. )
  9.  
  10. type Item struct { Title, Ref string }
  11. var it []Item
  12.  
  13. func getChildren(node *html.Node) []*html.Node {
  14.     var children []*html.Node
  15.     for c := node.FirstChild; c != nil; c = c.NextSibling {
  16.         children = append(children, c)
  17.     }
  18.     return children
  19. }
  20.  
  21. func getAttr(node *html.Node, key string) string {
  22.     for _, attr := range node.Attr {
  23.         if attr.Key == key {
  24.             return attr.Val
  25.         }
  26.     }
  27.     return ""
  28. }
  29.  
  30. func isText(node *html.Node) bool {
  31.     return node != nil && node.Type == html.TextNode
  32. }
  33.  
  34. func isElem(node *html.Node, tag string) bool {
  35.     return node != nil && node.Type == html.ElementNode && node.Data == tag
  36. }
  37.  
  38. func isDiv(node *html.Node, class string) bool {
  39.     return isElem(node, "div") && getAttr(node, "class") == class
  40. }
  41.  
  42. func readItem(item *html.Node) *Item {
  43.     if a := getChildren(item); isElem(a[0], "li") {
  44.         for i := 0; i < len(a); i++ {
  45.             child := a[i].FirstChild
  46.             if isElem(child, "a") {
  47.                 href := getAttr(child, "href")
  48.                 if (href[0] == '/') {
  49.                     href = "https://news.mail.ru" + href
  50.                 }
  51.                 it = append(it, Item { Ref: href, Title: child.FirstChild.Data, })
  52.                 log.Info("mail.ru", href, child.FirstChild.Data)
  53.             }            
  54.         }            
  55.     }
  56.    
  57.     return nil
  58. }
  59.  
  60. func search(node *html.Node) []*Item {
  61.     if isDiv(node, "js-module") {
  62.         var items []*Item
  63.         for c := node.FirstChild; c != nil; c = c.NextSibling {
  64.             if isElem(c, "ul") && getAttr(c, "class") == "list list_type_square list_half js-module" {
  65.                 if item := readItem(c); item != nil {
  66.                     items = append(items, item)
  67.                 }
  68.             }
  69.         }
  70.         return items
  71.     }
  72.    
  73.     for c := node.FirstChild; c != nil; c = c.NextSibling {
  74.         if items := search(c); items != nil {
  75.             return items
  76.         }
  77.     }
  78.     return nil
  79. }
  80.  
  81. func downloadNews() []*Item {
  82.     log.Info("sending request to mail.ru")
  83.     if response, err := http.Get("https://news.mail.ru/"); err != nil {
  84.         log.Error("request to mail.ru failed", "error", err)
  85.     } else {
  86.         defer response.Body.Close()
  87.         status := response.StatusCode
  88.         log.Info("got response from mail.ru", "status", status)
  89.         if status == http.StatusOK {
  90.             if doc, err := html.Parse(response.Body); err != nil {
  91.                 log.Error("invalid HTML from mail.ru", "error", err)
  92.             } else {
  93.                 log.Info("HTML from mail.ru parsed successfully")
  94.                 log.Info("================================")
  95.                 return search(doc)
  96.             }
  97.         }
  98.     }
  99.     return nil
  100. }
  101.  
  102. func connectionHandler(w http.ResponseWriter, r *http.Request) {
  103.     tmpl := template.Must(template.ParseFiles("tmpl.html"))
  104.     tmpl.Execute(w, []Item(it))
  105. }
  106.  
  107. func main() {
  108.     log.Info("Downloader started")
  109.     downloadNews()
  110.     http.HandleFunc("/", connectionHandler)
  111.     http.ListenAndServe(":9021", nil)
  112. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement