Advertisement
Guest User

WebScraper.go

a guest
Oct 17th, 2019
291
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Go 1.37 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4.     "bufio"
  5.     "encoding/xml"
  6.     "fmt"
  7.     "io/ioutil"
  8.     "net/http"
  9.     "os"
  10.     "strings"
  11. )
  12.  
  13. type html struct {
  14.     Body body `xml:"body"`
  15. }
  16.  
  17. type body struct {
  18.     Content string `xml:",innerxml"`
  19. }
  20.  
  21. func main() {
  22.     reader := bufio.NewReader(os.Stdin)
  23.  
  24.     fmt.Println("Go Web Scraper v1.0")
  25.     fmt.Println("---------------------------------------------------------------------------------------------------------")
  26.     fmt.Println("Input a website URL")
  27.  
  28.     for {
  29.         fmt.Print("$> ")
  30.         input, _ := reader.ReadString('\n')
  31.         input = strings.Replace(input, "\r\n", "", -1)
  32.         if strings.Compare(input, "quit") == 0 {
  33.             break
  34.         } else if strings.HasPrefix(input, "body ") {
  35.             fmt.Println("Body:")
  36.             ShowBody(strings.TrimPrefix(input, "body "))
  37.             break
  38.         } else {
  39.             fmt.Println("Results:")
  40.             ShowPage(input)
  41.             fmt.Println("New location:")
  42.         }
  43.     }
  44. }
  45.  
  46. //ShowPage prints the page to console
  47. func ShowPage(url string) {
  48.     fmt.Println(string(GetHTTP(url)))
  49. }
  50.  
  51. //ShowBody prints the body to console
  52. func ShowBody(url string) {
  53.     html := html{}
  54.  
  55.     xml.Unmarshal(GetHTTP(url), &html)
  56.     fmt.Println(html.Body.Content)
  57. }
  58.  
  59. //GetHTTP sends HTTP Get request and returns the page as a slice of bytes
  60. func GetHTTP(url string) []byte {
  61.     resp, err := http.Get(url)
  62.     if err != nil {
  63.         fmt.Println(err)
  64.     }
  65.     bytes, _ := ioutil.ReadAll(resp.Body)
  66.  
  67.     return bytes
  68. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement