Advertisement
Guest User

Untitled

a guest
Feb 20th, 2019
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.45 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "fmt"
  5. "strings"
  6. "github.com/gocolly/colly"
  7. "log"
  8. "os"
  9. "os/exec"
  10. "code.sajari.com/docconv"
  11. )
  12.  
  13. func main() {
  14. /* only navigate to links within these paths */
  15. tld1 := "vinfo/us/security/research-and-analysis/threat-reports"
  16. tld2 := "assets/rpt/"
  17.  
  18. var pdfs []string
  19.  
  20. c := colly.NewCollector(
  21. colly.AllowedDomains("www.trendmicro.com", "www.documents.trendmicro.com"),
  22. )
  23.  
  24. c.OnHTML("a[href]", func(e *colly.HTMLElement) {
  25. link := e.Attr("href")
  26. if strings.Contains(link, tld1) {
  27. c.Visit(e.Request.AbsoluteURL(link))
  28. }
  29. if strings.Contains(link, tld2) {
  30. pdfName := strings.Replace(link, "https://documents.trendmicro.com/assets/rpt/", "", -1)
  31. fmt.Printf("PDF name: %s\n", pdfName)
  32. existingPdf := 0;
  33. if len(pdfs) > 0 {
  34. for i := 0; i < len(pdfs); i++ {
  35. if pdfs[i] == pdfName {
  36. existingPdf = 1;
  37. }
  38. }
  39. }
  40. if existingPdf == 0 {
  41. pdfs = append(pdfs, pdfName)
  42. command := "wget " + link + "> /dev/null 2>&1"
  43. cmd := exec.Command("/bin/bash", "-c", command)
  44. cmd.Stdout = os.Stdout
  45. cmd.Stderr = os.Stderr
  46. cmd.Run()
  47. res, err := docconv.ConvertPath(pdfName)
  48. if err != nil {
  49. log.Fatal(err)
  50. }
  51. fmt.Println(res)
  52. }
  53. }
  54. })
  55.  
  56. c.OnRequest(func(r * colly.Request) {
  57. fmt.Println("Visiting", r.URL.String())
  58. })
  59.  
  60. c.Visit("https://www.trendmicro.com/vinfo/us/security/research-and-analysis/threat-reports")
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement