Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "net/http"
- "html/template"
- "golang.org/x/net/html"
- "github.com/mgutz/logxi/v1"
- )
- type Item struct { Title, Ref string }
- var it []Item
- func getChildren(node *html.Node) []*html.Node {
- var children []*html.Node
- for c := node.FirstChild; c != nil; c = c.NextSibling {
- children = append(children, c)
- }
- return children
- }
- func getAttr(node *html.Node, key string) string {
- for _, attr := range node.Attr {
- if attr.Key == key {
- return attr.Val
- }
- }
- return ""
- }
- func isText(node *html.Node) bool {
- return node != nil && node.Type == html.TextNode
- }
- func isElem(node *html.Node, tag string) bool {
- return node != nil && node.Type == html.ElementNode && node.Data == tag
- }
- func isDiv(node *html.Node, class string) bool {
- return isElem(node, "div") && getAttr(node, "class") == class
- }
- func readItem(item *html.Node) *Item {
- if a := getChildren(item); isElem(a[0], "li") {
- for i := 0; i < len(a); i++ {
- child := a[i].FirstChild
- if isElem(child, "a") {
- href := getAttr(child, "href")
- if (href[0] == '/') {
- href = "https://news.mail.ru" + href
- }
- it = append(it, Item { Ref: href, Title: child.FirstChild.Data, })
- log.Info("mail.ru", href, child.FirstChild.Data)
- }
- }
- }
- return nil
- }
- func search(node *html.Node) []*Item {
- if isDiv(node, "js-module") {
- var items []*Item
- for c := node.FirstChild; c != nil; c = c.NextSibling {
- if isElem(c, "ul") && getAttr(c, "class") == "list list_type_square list_half js-module" {
- if item := readItem(c); item != nil {
- items = append(items, item)
- }
- }
- }
- return items
- }
- for c := node.FirstChild; c != nil; c = c.NextSibling {
- if items := search(c); items != nil {
- return items
- }
- }
- return nil
- }
- func downloadNews() []*Item {
- log.Info("sending request to mail.ru")
- if response, err := http.Get("https://news.mail.ru/"); err != nil {
- log.Error("request to mail.ru failed", "error", err)
- } else {
- defer response.Body.Close()
- status := response.StatusCode
- log.Info("got response from mail.ru", "status", status)
- if status == http.StatusOK {
- if doc, err := html.Parse(response.Body); err != nil {
- log.Error("invalid HTML from mail.ru", "error", err)
- } else {
- log.Info("HTML from mail.ru parsed successfully")
- log.Info("================================")
- return search(doc)
- }
- }
- }
- return nil
- }
- func connectionHandler(w http.ResponseWriter, r *http.Request) {
- tmpl := template.Must(template.ParseFiles("tmpl.html"))
- tmpl.Execute(w, []Item(it))
- }
- func main() {
- log.Info("Downloader started")
- downloadNews()
- http.HandleFunc("/", connectionHandler)
- http.ListenAndServe(":9021", nil)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement