View difference between Paste ID: 3f9PBqMT and 92LAW1us
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/env ruby
2
require 'nokogiri'
3
require 'open-uri'
4
require 'sequel'
5
require 'sqlite3'
6
require 'pry'
7
require 'uri'
8
require 'cgi'
9
10
11
  DB = Sequel.connect('sqlite:///Users/Ryan/workspace/testing/clubland-2/clublandlv.sqlite')
12
  genre_id_column = DB[:genres].select_map(:genre_id)
13
  subgenre_table = DB[:subgenres]
14
15
  
16
  index = genre_id_column
17
  index.each do |new_g_id|
18
    
19
    
20
    clubland = "http://www.clublandlv.com/forumdisplay.php?f=#{new_g_id}"
21
    puts "Running script for #{clubland}"
22
    
23
      doc = Nokogiri::HTML(open(clubland))
24
    
25
      doc.css("#forumbits").each do |grab|
26
        subgenres = grab.css(".forumtitle a").map(&:text)
27
        subgenre_links = grab.css(".forumtitle a"){[:href]}
28
        
29
    
30
         subgenre_links = subgenre_links.map do |links|
31
          link_id = links.attr("href")
32
          CGI.parse(URI.parse(link_id).query)['f'].first.to_i
33
          end
34
          
35
            
36
            subgenre_table.import([:subgenre, :subgenre_id], subgenres.zip(subgenre_links))
37
38
	#Problem is with the below code. It is overwriting the ENTIRE genres column with the latest new_g_id
39
	#I want it to only update the cells with the new_g_id of the genres currently being pulled in the loop.
40
	#So when the script moves onto the new ID, it does not overwrite.
41
	#Visual representation : http://snag.gy/q4LtH.jpg
42
	#Current script lists the entire genre column as 78, being that is the last ID in the table it is linking from.
43
            subgenre_table.update(:genre => new_g_id)
44
       end
45
       
46
47
    end