SHARE
TWEET

Untitled

a guest Jan 18th, 2020 62 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. package main
  2.  
  3. import (
  4.     "bufio"
  5.     "encoding/csv"
  6.     "errors"
  7.     "fmt"
  8.     "net/http"
  9.     "os"
  10.     "strconv"
  11.     "strings"
  12.     "sync"
  13.     "time"
  14. )
  15.  
  16. type output struct {
  17.     url        string
  18.     statusCode int
  19.     success    bool
  20. }
  21.  
  22. func main() {
  23.     readWorkers, err := strconv.Atoi(os.Getenv("READ_WORKERS"))
  24.     if err != nil || readWorkers == 0 {
  25.         readWorkers = 10
  26.     }
  27.  
  28.     writeWorkers, err := strconv.Atoi(os.Getenv("WRITE_WORKERS"))
  29.     if err != nil || writeWorkers == 0 {
  30.         writeWorkers = 10
  31.     }
  32.  
  33.     t := time.Now()
  34.  
  35.     file, err := os.Open("urls20.txt")
  36.     if err != nil {
  37.         println(err)
  38.     }
  39.     defer file.Close()
  40.     scanner := bufio.NewScanner(file)
  41.     maxBufferSize := 1024 * 20
  42.     buf := make([]byte, 0, maxBufferSize)
  43.     scanner.Buffer(buf, 10*1024)
  44.  
  45.     resultsFile, err := os.OpenFile("results.csv", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
  46.     if err != nil {
  47.         panic(fmt.Sprintf("Cannot open/create file. Error: %s", err))
  48.     }
  49.     defer resultsFile.Close()
  50.     writer := csv.NewWriter(resultsFile)
  51.     defer writer.Flush()
  52.  
  53.     urls := make(chan string)
  54.     results := make(chan output)
  55.  
  56.     client := http.Client{
  57.         Timeout: time.Second * 10,
  58.     }
  59.  
  60.     var mu sync.Mutex
  61.  
  62.     wg1 := sync.WaitGroup{}
  63.     wg2 := sync.WaitGroup{}
  64.     go func() {
  65.         for scanner.Scan() {
  66.             urls <- strings.TrimSpace(scanner.Text())
  67.         }
  68.         close(urls)
  69.     }()
  70.  
  71.     for i := 0; i < readWorkers; i++ {
  72.         wg1.Add(1)
  73.         go func() {
  74.             defer wg1.Done()
  75.             count := 0
  76.             for url := range urls {
  77.                 count++
  78.                 results <- calculateResult(url, client)
  79.             }
  80.         }()
  81.     }
  82.     for i := 0; i < writeWorkers; i++ {
  83.         wg2.Add(1)
  84.         go func() {
  85.             defer wg2.Done()
  86.             count := 0
  87.             for result := range results {
  88.                 count++
  89.                 err = writeResultToCsv(result, writer, mu)
  90.                 if err != nil {
  91.                     fmt.Println(fmt.Sprintf("Error while writing url %s with message '%s'. Putting back to queue", result.url, err))
  92.                     results <- result
  93.                 }
  94.             }
  95.         }()
  96.     }
  97.  
  98.     wg1.Wait()
  99.     close(results)
  100.     wg2.Wait()
  101.  
  102.     elapsed := time.Since(t)
  103.     fmt.Println("Finished in", elapsed)
  104. }
  105.  
  106. func writeResultToCsv(result output, writer *csv.Writer, mu sync.Mutex) error {
  107.     mu.Lock()
  108.     defer mu.Unlock()
  109.     var b string
  110.     if result.success {
  111.         b = "true"
  112.     } else {
  113.         b = "false"
  114.     }
  115.     var data = [][]string{{result.url, fmt.Sprintf("%d", result.statusCode), b}}
  116.     err := writer.WriteAll(data)
  117.     if err != nil {
  118.         return errors.New(fmt.Sprintf("Cannot write results for url %s, message: %s", result.url, err))
  119.     }
  120.     writer.Flush()
  121.     return nil
  122. }
  123.  
  124. func calculateResult(url string, client http.Client) output {
  125.     var result output
  126.     req, err := http.NewRequest("HEAD", url, nil)
  127.     if err != nil {
  128.         panic(fmt.Sprintf("Preparing request failed. Error: %s", err))
  129.     }
  130.     resp, err := client.Do(req)
  131.     if err != nil {
  132.         result.success = false
  133.         result.statusCode = -1
  134.         result.url = url
  135.         return result
  136.     }
  137.  
  138.     if resp.StatusCode < 300 && resp.StatusCode > 199 {
  139.         result.success = true
  140.     } else {
  141.         result.success = false
  142.     }
  143.     result.statusCode = resp.StatusCode
  144.     result.url = url
  145.  
  146.     return result
  147. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top