Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.53 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "github.com/mgutz/logxi/v1"
  5. "golang.org/x/net/html"
  6. "net/http"
  7. )
  8.  
  9. func getChildren(node *html.Node) []*html.Node {
  10. var children []*html.Node
  11. for c := node.FirstChild; c != nil; c = c.NextSibling {
  12. children = append(children, c)
  13. }
  14. return children
  15. }
  16.  
  17. func getAttr(node *html.Node, key string) string {
  18. for _, attr := range node.Attr {
  19. if attr.Key == key {
  20. return attr.Val
  21. }
  22. }
  23. return ""
  24. }
  25.  
  26. func isText(node *html.Node) bool {
  27. return node != nil && node.Type == html.TextNode
  28. }
  29.  
  30. func isElem(node *html.Node, tag string) bool {
  31. return node != nil && node.Type == html.ElementNode && node.Data == tag
  32. }
  33.  
  34. func isDiv(node *html.Node, class string) bool {
  35. return isElem(node, "div") && getAttr(node, "class") == class
  36. }
  37.  
  38. func readItem(item *html.Node) *Item {
  39. if a := item.FirstChild; isElem(a, "a") {
  40. if cs := getChildren(a); len(cs) == 1 && isText(cs[0]){
  41. return &Item{
  42. Ref: getAttr(a, "href"),
  43. Title: cs[0].Data,
  44. }
  45. }
  46. }
  47. return nil
  48. }
  49.  
  50. type Item struct {
  51. Ref, Time, Title string
  52. }
  53.  
  54.  
  55. func downloadNews() []*Item {
  56. log.Info("sending request to lenta.ru")
  57. if response, err := http.Get("http://lenta.ru"); err != nil {
  58. log.Error("request to lenta.ru failed", "error", err)
  59. } else {
  60. defer response.Body.Close()
  61. status := response.StatusCode
  62. log.Info("got response from lenta.ru", "status", status)
  63. if status == http.StatusOK {
  64. if doc, err := html.Parse(response.Body); err != nil {
  65. log.Error("invalid HTML from lenta.ru", "error", err)
  66. } else {
  67. log.Info("HTML from lenta.ru parsed successfully")
  68. items:=search(doc)
  69. //log.Info("len(items)", "a:", len(items))
  70. for i:=0; i<len(items); i++{
  71. log.Info("-- In readItem -->");
  72. log.Info("<a href='a'>", "a:", items[i].Ref)
  73. log.Info("NEWS", "NEWS:", items[i].Title)
  74. }
  75. return search(doc)
  76. }
  77. }
  78. }
  79. return nil
  80. }
  81.  
  82. func search(node *html.Node) []*Item {
  83. if isDiv(node, "b-yellow-box__wrap"){
  84. var items []*Item
  85. for c := node.FirstChild; c != nil; c = c.NextSibling {
  86. if isDiv(c, "item") {
  87. if item := readItem(c); item != nil {
  88. items = append(items, item)
  89. }
  90. }
  91. }
  92. return items
  93. }
  94. for c := node.FirstChild; c != nil; c = c.NextSibling {
  95. if items := search(c); items != nil {
  96. return items
  97. }
  98. }
  99. return nil
  100. }
  101.  
  102.  
  103.  
  104.  
  105.  
  106. //===================================================================================================
  107.  
  108.  
  109.  
  110. func main() {
  111.  
  112.  
  113. log.Info("Downloader started")
  114. downloadNews()
  115.  
  116.  
  117.  
  118. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement