Advertisement
Pug_coder

lab2

Oct 8th, 2021
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.15 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "fmt"
  5. "github.com/mgutz/logxi/v1"
  6. "golang.org/x/net/html"
  7. "net/http"
  8. )
  9.  
  10. func getAttr(node *html.Node, key string) string {
  11. for _, attr := range node.Attr {
  12. if attr.Key == key {
  13. return attr.Val
  14. }
  15. }
  16. return ""
  17. }
  18.  
  19. func getChildren(node *html.Node) []*html.Node {
  20. var children []*html.Node
  21. for c := node.FirstChild; c != nil; c = c.NextSibling {
  22. children = append(children, c)
  23. }
  24. return children
  25. }
  26.  
  27. func isElem(node *html.Node, tag string) bool {
  28. return node != nil && node.Type == html.ElementNode && node.Data == tag
  29. }
  30.  
  31. func isText(node *html.Node) bool {
  32. return node != nil && node.Type == html.TextNode
  33. }
  34.  
  35. func isDiv(node *html.Node, class string) bool {
  36. return isElem(node, "div") && getAttr(node, "class") == class
  37. }
  38.  
  39. type Item struct {
  40. Ref, Price, Title string
  41. }
  42.  
  43. func readItem(item *html.Node) *Item {
  44. if a := item.FirstChild; isElem(a, "a") {
  45. if cs := getChildren(a); len(cs) == 1 && isText(cs[0]) {
  46. return &Item{
  47. Ref: getAttr(a, "href"),
  48. Title: cs[0].Data,
  49. }
  50. }
  51. }
  52. return nil
  53. }
  54.  
  55. func getElementsByClassName(node *html.Node, className string) []*html.Node {
  56. var nodes []*html.Node
  57. if getAttr(node, "class") == className {
  58. nodes = append(nodes, node)
  59. }
  60. for c := node.FirstChild; c != nil; c = c.NextSibling {
  61. if found := getElementsByClassName(c, className); found != nil {
  62. nodes = append(nodes, found...)
  63. }
  64. }
  65. return nodes
  66. }
  67. func search(doc *html.Node) []*Item{
  68. founds := getElementsByClassName(doc, "product_data__gtm-js product_data__pageevents-js ProductCardHorizontal js--ProductCardInListing js--ProductCardInWishlist")
  69. var items []*Item
  70. for _, elem := range founds {
  71. links := getElementsByClassName(elem, "ProductCardHorizontal__title Link js--Link Link_type_default")
  72.  
  73. if len(links) != 1 {
  74. continue
  75. }
  76. prices := getElementsByClassName(elem, "ProductCardHorizontal__price_current-price js--ProductCardHorizontal__price_current-price ")
  77.  
  78. if len(prices) != 1 {
  79. continue
  80. }
  81. ref := getAttr(links[0], "href")
  82. fmt.Println(links[0].Data)
  83. if ref == "" {
  84. continue
  85. }
  86. fmt.Println(!isText(links[0].FirstChild))
  87. if !isText(links[0].FirstChild){
  88. continue
  89. }
  90. title := links[0].FirstChild.Data
  91. fmt.Println(!isText(prices[0].FirstChild))
  92. if !isText(prices[0].FirstChild) {
  93. continue
  94. }
  95. price := prices[0].FirstChild.Data
  96. items = append(items, &Item{
  97. Ref: ref,
  98. Title: title,
  99. Price: price,
  100. })
  101.  
  102. }
  103. fmt.Println(items)
  104. return items
  105. }
  106. func downloadNews() []*Item {
  107. log.Info("sending request to citilink.ru")
  108. if response, err := http.Get("https://www.citilink.ru/catalog/processory/"); err != nil {
  109. log.Error("request to citilink.ru failed", "error", err)
  110. } else {
  111. defer response.Body.Close()
  112. status := response.StatusCode
  113. log.Info("got response from citilink.ru", "status", status)
  114. if status == http.StatusOK {
  115. if doc, err := html.Parse(response.Body); err != nil {
  116. log.Error("invalid HTML from citilink.ru", "error", err)
  117. } else {
  118. log.Info("HTML from citilink.ru parsed successfully")
  119.  
  120. return search(doc)
  121. }
  122. }
  123. }
  124. return nil
  125. }
  126.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement