Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "encoding/xml"
- "fmt"
- "golang.org/x/net/html"
- "io/ioutil"
- "net/http"
- "os"
- "strings"
- )
- var (
- links map[string]int
- count = 0
- links2 map[string]int
- count2 = 0
- )
- type Urlset struct {
- XMLName xml.Name `xml:"urlset"`
- Xmlns string `xml:"xmlns,attr"`
- Image string `xml:"image, attr"`
- Url []Url `xml:"url"`
- }
- type Url struct {
- XMLName xml.Name `xml:"url"`
- Loc string `xml:"loc"`
- Changefreq string `xml:"changefreq"`
- }
- func search(node *html.Node) {
- for c := node.FirstChild; c != nil; c = c.NextSibling {
- if c.Attr != nil {
- for _, b := range c.Attr {
- if b.Key == "href" {
- l := b.Val
- if strings.Contains(l, "uslugi") {
- _, ok := links[l]
- if !ok {
- links[l] = 0
- count++
- }
- }
- }
- }
- }
- search(c)
- }
- return
- }
- func downloadNews(adr string) {
- if response, err := http.Get(adr); err != nil { //данные страницы
- fmt.Println(err)
- } else {
- defer response.Body.Close()
- status := response.StatusCode
- if status == http.StatusOK {
- if doc, err := html.Parse(response.Body); err != nil {
- fmt.Printf("invalid HTML from %s\n", adr)
- fmt.Println(err)
- } else {
- fmt.Printf( "HTML from %s parsed successfully\n", adr)
- search(doc)
- return
- }
- }
- }
- return
- }
- func main2() {
- links2 = make(map[string]int)
- // Open our xmlFile
- xmlFile, err := os.Open("lab-sud.xml")
- // if we os.Open returns an error then handle it
- if err != nil {
- fmt.Println(err)
- }
- fmt.Println("Successfully Opened http://lab-sud.ru/sitemap.xml")
- // defer the closing of our xmlFile so that we can parse it later on
- defer xmlFile.Close()
- // read our opened xmlFile as a byte array.
- byteValue, _ := ioutil.ReadAll(xmlFile)
- // we initialize our Users array
- var urlset Urlset
- // we unmarshal our byteArray which contains our
- // xmlFiles content into 'users' which we defined above
- xml.Unmarshal(byteValue, &urlset)
- // we iterate through every user within our users array and
- // print out the user Type, their name, and their facebook url
- // as just an example
- for i := 0; i < len(urlset.Url); i++ {
- k := urlset.Url[i].Loc
- if strings.Contains(k, "uslugi") {
- fmt.Println("Loc Type: " + urlset.Url[i].Loc)
- count2++
- links2[k] = 0
- }
- }
- fmt.Println(count2)
- }
- func main1() {
- links = make(map[string]int)
- downloadNews("http://lab-sud.ru")
- b := true
- for b {
- b = false
- for k, v := range links {
- if v == 0 {
- b = true
- links[k]=1
- downloadNews("http://lab-sud.ru" + k)
- }
- }
- }
- for k, v := range links {
- fmt.Print(k + " ")
- fmt.Println(v)
- }
- fmt.Println(count)
- }
- func main() {
- main1()
- main2()
- fmt.Println()
- fmt.Println("My count:")
- fmt.Println(count)
- fmt.Println("From .xml count:")
- fmt.Println(count2)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement