Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "context"
- "fmt"
- "net/http"
- "os"
- "sync"
- "time"
- )
- const (
- bufferSize int = 128
- )
- type Requester struct {
- client *http.Client
- poolSize int
- buffer []string
- bufferIdx int
- attempts int
- }
- type RequesterConfig struct {
- PoolSize int
- Timeout time.Duration
- Attempts int
- }
- func New(config RequesterConfig) *Requester {
- requester := &Requester{
- client: &http.Client{
- Timeout: config.Timeout,
- },
- poolSize: config.PoolSize,
- buffer: make([]string, bufferSize),
- bufferIdx: 0,
- attempts: config.Attempts,
- }
- return requester
- }
- func (r *Requester) Request(ctx context.Context, urls []string, filename string) {
- outChan := make(chan string, len(urls))
- defer close(outChan)
- wg := r.asyncGet(ctx, urls, outChan)
- os.Remove(filename)
- for i := 0; i < len(urls); i++ {
- if r.bufferIdx == bufferSize {
- r.dumpToFile(filename)
- }
- select {
- case resp := <-outChan:
- r.buffer[r.bufferIdx] = resp
- r.bufferIdx++
- case <-ctx.Done():
- break
- }
- }
- wg.Wait()
- r.dumpToFile(filename)
- }
- func (r *Requester) asyncGet(ctx context.Context, urls []string, outChan chan<- string) *sync.WaitGroup {
- inChan := make(chan string)
- go func() {
- defer close(inChan)
- for _, url := range urls {
- select {
- case inChan <- url:
- case <-ctx.Done():
- return
- }
- }
- }()
- wg := &sync.WaitGroup{}
- for i := 0; i < r.poolSize; i++ {
- wg.Add(1)
- go r.worker(ctx, inChan, outChan, wg)
- }
- return wg
- }
- func (r *Requester) worker(ctx context.Context, inChan <-chan string, outChan chan<- string, wg *sync.WaitGroup) {
- defer wg.Done()
- for {
- select {
- case url, ok := <-inChan:
- if !ok {
- return
- }
- for i := 1; i <= r.attempts; i++ {
- request, err := http.NewRequestWithContext(ctx, "GET", url, nil)
- if err != nil {
- panic(err)
- }
- response, err := r.client.Do(request)
- if err == nil {
- defer response.Body.Close()
- outChan <- fmt.Sprintf("%s %d\n", url, response.StatusCode)
- if response.StatusCode < 500 {
- break
- }
- } else if err == context.Canceled {
- break
- }
- }
- case <-ctx.Done():
- return
- }
- }
- }
- func (r *Requester) dumpToFile(filename string) {
- outfile, err := os.OpenFile(filename, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
- if err != nil {
- panic(err)
- }
- defer outfile.Close()
- for i := 0; i < r.bufferIdx; i++ {
- outfile.WriteString(r.buffer[i])
- }
- r.bufferIdx = 0
- }
- func main() {
- sites := []string{
- "https://www.avito.ru/",
- "https://www.ozon.ru/",
- "https://vk.com/",
- "https://yandex.ru/",
- "https://www.google.com/",
- "https://mail.ru/",
- "https://ok.ru/",
- }
- requester := New(RequesterConfig{
- PoolSize: 2,
- Timeout: 2 * time.Second,
- Attempts: 3,
- })
- ctx, cancel := context.WithTimeout(context.Background(), 2000*time.Millisecond)
- defer cancel()
- requester.Request(ctx, sites, "./out.txt")
- }
Advertisement
Add Comment
Please, Sign In to add comment