SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/env ruby | |
2 | require 'nokogiri' | |
3 | require 'open-uri' | |
4 | require 'sequel' | |
5 | require 'sqlite3' | |
6 | require 'pry' | |
7 | require 'uri' | |
8 | require 'cgi' | |
9 | ||
10 | ||
11 | DB = Sequel.connect('sqlite:///Users/Ryan/workspace/testing/clubland-2/clublandlv.sqlite') | |
12 | genre_id_column = DB[:genres].select_map(:genre_id) | |
13 | subgenre_table = DB[:subgenres] | |
14 | ||
15 | ||
16 | index = genre_id_column | |
17 | index.each do |new_g_id| | |
18 | ||
19 | ||
20 | clubland = "http://www.clublandlv.com/forumdisplay.php?f=#{new_g_id}" | |
21 | puts "Running script for #{clubland}" | |
22 | ||
23 | doc = Nokogiri::HTML(open(clubland)) | |
24 | ||
25 | doc.css("#forumbits").each do |grab| | |
26 | subgenres = grab.css(".forumtitle a").map(&:text) | |
27 | subgenre_links = grab.css(".forumtitle a"){[:href]} | |
28 | ||
29 | ||
30 | subgenre_links = subgenre_links.map do |links| | |
31 | link_id = links.attr("href") | |
32 | CGI.parse(URI.parse(link_id).query)['f'].first.to_i | |
33 | end | |
34 | ||
35 | ||
36 | subgenre_table.import([:subgenre, :subgenre_id], subgenres.zip(subgenre_links)) | |
37 | ||
38 | #Problem is with the below code. It is overwriting the ENTIRE genres column with the latest new_g_id | |
39 | #I want it to only update the cells with the new_g_id of the genres currently being pulled in the loop. | |
40 | #So when the script moves onto the new ID, it does not overwrite. | |
41 | #Visual representation : http://snag.gy/q4LtH.jpg | |
42 | #Current script lists the entire genre column as 78, being that is the last ID in the table it is linking from. | |
43 | subgenre_table.update(:genre => new_g_id) | |
44 | end | |
45 | ||
46 | ||
47 | end |