Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/ruby
- require 'rubygems'
- # Memcache client
- require 'dalli'
- # To divide an array
- require 'enumerator'
- s3cmdpath = "/usr/bin/s3cmd"
- bucketpath = "s3://bucketname/
- localpath = "/media/ephemeral0/targeting/"
- # This generates 256 values..from 00-ff. Each value is now considered a "HexPrefix"
- HexPrefixes = Array.new 256 do |i| sprintf("%02x", i) end
- # We're now going to subdivide the work into 8 queues, since this program is meant to run on an 8 core instance.
- Queue1, Queue2, Queue3, Queue4, Queue5, Queue6, Queue7, Queue8 = HexPrefixes.enum_slice((HexPrefixes.size / 8.0).round).to_a
- # This function synchronizes an array of directories, creates a hash based on MD5 filenames & data and pushes the data into Memcache.
- class SyncHashPush
- def initialize(hexprefixes)
- for i in hexprefixes
- # Generate a command that will call s3cmd and sync a "HexPrefix" directory. The result is a string, newline delimited, that represents all files that have changed.
- commandstring = s3cmdpath + " sync " + bucketpath + i + " " + localpath + " | grep \"targeting.prd1\" | awk -F\"/\" '{print $7}' | awk -F\".txt\" '{print $1}'"
- # Run the command
- s3cmd = `#{commandstring}`
- # This shows what is happening above.
- #s3cmd = `s3cmd sync s3://bucketname/00 /media/ephemeral0/targeting/ | grep "targeting.prd1" | awk -F"/" '{print $7}' | awk -F".txt" '{print $1}'`
- # Sample data for testing purposes. Faster than actually running s3cmd.
- #s3cmd = "fefe7e94b341f15186d92d46897e43a1\nfefe85da54d0044e554027a46ebe428b\nfefe8a29ecf31fe26768a7a51f4bc00f\nfefe9137d095f6dc3ae105ceb9aa9c2a\nfefe93418a8fe6f1f0e7688e6bf2f47e"
- # Convert the string into an array.
- s3cmdarray = s3cmd.split(/\n/)
- # Create a Hash that includes the "MD5 filename" and the file data.
- urlTransformHash = Hash.new
- for md5string in s3cmdarray
- prefix1 = md5string[0..1]
- prefix2 = md5string[2..3]
- filepath=localpath + prefix1 + "/" + prefix2 + "/" + md5string + ".txt"
- filedata = File.read(filepath)
- urlTransformHash[md5string] = filedata
- end
- # Push the contents of the hash into memcache.
- dalliclient = Dalli::Client.new('localhost:11211')
- urlTransformHash.each do|md5string,filedata|
- # Display the hash. Testing purposes.
- # puts "#{md5string}: #{filedata}"
- # Push the string into Memcache.
- dalliclient.set(md5string, filedata)
- end
- end
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement