Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "fmt"
- "sync"
- )
- type Fetcher interface {
- // Fetch returns the body of URL and
- // a slice of URLs found on that page.
- Fetch(url string) (body string, urls []string, err error)
- }
- // Crawl uses fetcher to recursively crawl
- // pages starting with url, to a maximum of depth.
- func Crawl(url string, depth int, fetcher Fetcher) {
- // TODO: Fetch URLs in parallel.
- // TODO: Don't fetch the same URL twice.
- // This implementation doesn't do either:
- type CachedData struct {
- body string
- urls []string
- }
- cached_urls_data := make(map[string]CachedData)
- var mutex sync.Mutex
- var walk func(url string, depth int, ch chan string)
- walk = func(url string, depth int, ch chan string) {
- defer close(ch)
- if depth <= 0 {
- return
- }
- var body string
- var urls []string
- var err error
- mutex.Lock()
- cached_data, was_stored := cached_urls_data[url]
- mutex.Unlock()
- if !was_stored {
- body, urls, err = fetcher.Fetch(url)
- if err != nil {
- ch <- err.Error()
- return
- }
- mutex.Lock()
- cached_urls_data[url] = CachedData{body, urls}
- mutex.Unlock()
- } else {
- urls = cached_data.urls
- body = cached_data.body
- }
- ch <- fmt.Sprintf("found: %s %q", url, body)
- results := make([]chan string, len(urls))
- for i, u := range urls {
- results[i] = make(chan string)
- go walk(u, depth-1, results[i])
- }
- for _, channel := range results {
- for res := range channel {
- ch <- res
- }
- }
- }
- ch := make(chan string)
- go walk(url, depth, ch)
- for found := range ch {
- fmt.Println(found)
- }
- }
- func main() {
- Crawl("https://golang.org/", 4, fetcher)
- }
- // fakeFetcher is Fetcher that returns canned results.
- type fakeFetcher map[string]*fakeResult
- type fakeResult struct {
- body string
- urls []string
- }
- func (f fakeFetcher) Fetch(url string) (string, []string, error) {
- if res, ok := f[url]; ok {
- return res.body, res.urls, nil
- }
- return "", nil, fmt.Errorf("not found: %s", url)
- }
- // fetcher is a populated fakeFetcher.
- var fetcher = fakeFetcher{
- "https://golang.org/": &fakeResult{
- "The Go Programming Language",
- []string{
- "https://golang.org/pkg/",
- "https://golang.org/cmd/",
- },
- },
- "https://golang.org/pkg/": &fakeResult{
- "Packages",
- []string{
- "https://golang.org/",
- "https://golang.org/cmd/",
- "https://golang.org/pkg/fmt/",
- "https://golang.org/pkg/os/",
- },
- },
- "https://golang.org/pkg/fmt/": &fakeResult{
- "Package fmt",
- []string{
- "https://golang.org/",
- "https://golang.org/pkg/",
- },
- },
- "https://golang.org/pkg/os/": &fakeResult{
- "Package os",
- []string{
- "https://golang.org/",
- "https://golang.org/pkg/",
- },
- },
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement