Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- require 'nokogiri'
- require 'open-uri'
- require 'sequel'
- require 'sqlite3'
- require 'pry'
- require 'uri'
- require 'cgi'
- DB = Sequel.connect('sqlite:///Users/Ryan/workspace/testing/clubland-2/clublandlv.sqlite')
- genre_id_column = DB[:genres].select_map(:genre_id)
- subgenre_table = DB[:subgenres]
- index = genre_id_column
- index.each do |new_g_id|
- clubland = "http://www.clublandlv.com/forumdisplay.php?f=#{new_g_id}"
- puts "Running script for #{clubland}"
- doc = Nokogiri::HTML(open(clubland))
- doc.css("#forumbits").each do |grab|
- subgenres = grab.css(".forumtitle a").map(&:text)
- subgenre_links = grab.css(".forumtitle a"){[:href]}
- subgenre_links = subgenre_links.map do |links|
- link_id = links.attr("href")
- CGI.parse(URI.parse(link_id).query)['f'].first.to_i
- end
- subgenre_table.import([:subgenre, :subgenre_id], subgenres.zip(subgenre_links))
- #Problem is with the below code. It is overwriting the ENTIRE genres column with the latest new_g_id
- #I want it to only update the cells with the new_g_id of the genres currently being pulled in the loop.
- #So when the script moves onto the new ID, it does not overwrite.
- #Visual representation : http://snag.gy/q4LtH.jpg
- #Current script lists the entire genre column as 78, being that is the last ID in the table it is linking from.
- subgenre_table.update(:genre => new_g_id)
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment