Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'socket'
- require 'open-uri'
- require 'rubygems'
- require 'servolux'
- require 'nokogiri'
- PATH = '/tmp/web_fetch.socket'
- # Create a UNIX socket at the tmp PATH
- # Runs once in the parent; all forked children inherit the socket's
- # file descriptor.
- $acceptor = UNIXServer.new(PATH)
- # This module defines the process our forked workers will run. It listens on
- # the socket and expects a single URL. It will then fetch this URL and parse
- # the contents using nokogiri.
- module WebFetch
- def execute
- if IO.select([$acceptor], nil, nil, 2)
- socket, addr = $acceptor.accept_nonblock
- url = socket.gets
- socket.close
- doc = Nokogiri::HTML(open(url)) { |config| config.noblanks.noent }
- $stderr.puts "child #$$ processed #{url}"
- $stderr.flush
- end
- rescue Errno::EAGAIN, Errno::ECONNABORTED, Errno::EPROTO, Errno::EINTR
- end
- def after_executing
- $acceptor.close
- end
- end
- # Spin up a pool of these workers
- pool = Servolux::Prefork.new(:module => WebFetch)
- pool.start 3
- # 'urls.txt' is a simple text file with one URL per line
- urls = File.readlines('urls.txt')
- begin
- # Keeping sending URLs to the workers until we have run out of URLs
- until urls.empty?
- client = UNIXSocket.open(PATH)
- client.puts urls.shift
- client.close
- end
- rescue Errno::ECONNREFUSED
- retry
- ensure
- # Give the workers time to complete their current task and then stop the pool
- sleep 5
- pool.stop
- $acceptor.close
- File.unlink if File.socket?(PATH)
- end
Add Comment
Please, Sign In to add comment