Advertisement
lobaev

Untitled

Dec 26th, 2019
204
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.83 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "encoding/xml"
  5. "fmt"
  6. "golang.org/x/net/html"
  7. "io/ioutil"
  8. "net/http"
  9. "os"
  10. "strings"
  11. )
  12.  
  13. var (
  14. links map[string]int
  15. count = 0
  16. links2 map[string]int
  17. count2 = 0
  18. )
  19.  
  20. type Urlset struct {
  21. XMLName xml.Name `xml:"urlset"`
  22. Xmlns string `xml:"xmlns,attr"`
  23. Image string `xml:"image, attr"`
  24. Url []Url `xml:"url"`
  25. }
  26.  
  27. type Url struct {
  28. XMLName xml.Name `xml:"url"`
  29. Loc string `xml:"loc"`
  30. Changefreq string `xml:"changefreq"`
  31. }
  32.  
  33. func search(node *html.Node) {
  34. for c := node.FirstChild; c != nil; c = c.NextSibling {
  35. if c.Attr != nil {
  36. for _, b := range c.Attr {
  37. if b.Key == "href" {
  38. l := b.Val
  39. if strings.Contains(l, "uslugi") {
  40. _, ok := links[l]
  41. if !ok {
  42. links[l] = 0
  43. count++
  44. }
  45. }
  46. }
  47. }
  48. }
  49. search(c)
  50. }
  51. return
  52. }
  53.  
  54. func downloadNews(adr string) {
  55. if response, err := http.Get(adr); err != nil { //данные страницы
  56. fmt.Println(err)
  57. } else {
  58. defer response.Body.Close()
  59. status := response.StatusCode
  60. if status == http.StatusOK {
  61. if doc, err := html.Parse(response.Body); err != nil {
  62. fmt.Printf("invalid HTML from %s\n", adr)
  63. fmt.Println(err)
  64. } else {
  65. fmt.Printf( "HTML from %s parsed successfully\n", adr)
  66. search(doc)
  67. return
  68. }
  69. }
  70. }
  71. return
  72. }
  73.  
  74. func main2() {
  75. links2 = make(map[string]int)
  76. // Open our xmlFile
  77. xmlFile, err := os.Open("lab-sud.xml")
  78. // if we os.Open returns an error then handle it
  79. if err != nil {
  80. fmt.Println(err)
  81. }
  82. fmt.Println("Successfully Opened http://lab-sud.ru/sitemap.xml")
  83. // defer the closing of our xmlFile so that we can parse it later on
  84. defer xmlFile.Close()
  85. // read our opened xmlFile as a byte array.
  86. byteValue, _ := ioutil.ReadAll(xmlFile)
  87. // we initialize our Users array
  88. var urlset Urlset
  89. // we unmarshal our byteArray which contains our
  90. // xmlFiles content into 'users' which we defined above
  91. xml.Unmarshal(byteValue, &urlset)
  92. // we iterate through every user within our users array and
  93. // print out the user Type, their name, and their facebook url
  94. // as just an example
  95. for i := 0; i < len(urlset.Url); i++ {
  96. k := urlset.Url[i].Loc
  97. if strings.Contains(k, "uslugi") {
  98. fmt.Println("Loc Type: " + urlset.Url[i].Loc)
  99. count2++
  100. links2[k] = 0
  101. }
  102. }
  103. fmt.Println(count2)
  104. }
  105.  
  106. func main1() {
  107. links = make(map[string]int)
  108. downloadNews("http://lab-sud.ru")
  109. b := true
  110. for b {
  111. b = false
  112. for k, v := range links {
  113. if v == 0 {
  114. b = true
  115. links[k]=1
  116. downloadNews("http://lab-sud.ru" + k)
  117. }
  118. }
  119. }
  120. for k, v := range links {
  121. fmt.Print(k + " ")
  122. fmt.Println(v)
  123. }
  124. fmt.Println(count)
  125. }
  126.  
  127. func main() {
  128. main1()
  129. main2()
  130. fmt.Println()
  131. fmt.Println("My count:")
  132. fmt.Println(count)
  133. fmt.Println("From .xml count:")
  134. fmt.Println(count2)
  135. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement