Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "fmt"
- "strings"
- "github.com/gocolly/colly"
- "log"
- "os"
- "os/exec"
- "code.sajari.com/docconv"
- )
- func main() {
- /* only navigate to links within these paths */
- tld1 := "vinfo/us/security/research-and-analysis/threat-reports"
- tld2 := "assets/rpt/"
- var pdfs []string
- c := colly.NewCollector(
- colly.AllowedDomains("www.trendmicro.com", "www.documents.trendmicro.com"),
- )
- c.OnHTML("a[href]", func(e *colly.HTMLElement) {
- link := e.Attr("href")
- if strings.Contains(link, tld1) {
- c.Visit(e.Request.AbsoluteURL(link))
- }
- if strings.Contains(link, tld2) {
- pdfName := strings.Replace(link, "https://documents.trendmicro.com/assets/rpt/", "", -1)
- fmt.Printf("PDF name: %s\n", pdfName)
- existingPdf := 0;
- if len(pdfs) > 0 {
- for i := 0; i < len(pdfs); i++ {
- if pdfs[i] == pdfName {
- existingPdf = 1;
- }
- }
- }
- if existingPdf == 0 {
- pdfs = append(pdfs, pdfName)
- command := "wget " + link + "> /dev/null 2>&1"
- cmd := exec.Command("/bin/bash", "-c", command)
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- cmd.Run()
- res, err := docconv.ConvertPath(pdfName)
- if err != nil {
- log.Fatal(err)
- }
- fmt.Println(res)
- }
- }
- })
- c.OnRequest(func(r * colly.Request) {
- fmt.Println("Visiting", r.URL.String())
- })
- c.Visit("https://www.trendmicro.com/vinfo/us/security/research-and-analysis/threat-reports")
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement