Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # encoding: utf-8
- require 'net/http'
- require 'threadpool'
- # require 'pry'
- # {{{ getlist(gall: String, mgall: bool, page: Integer) : 2D array - [num, name, ip, id, date]
- def getlist(gall, mgall, page)
- return [] if page.nil?
- result = []
- begin
- result = []
- try = 0
- begin
- source = Net::HTTP.get('gall.dcinside.com',(mgall ? "/mgallery" : "").concat('/board/lists/?id=').concat(gall).concat('&page=').concat(page.to_s))
- rescue
- try += 1
- retry if try < 5
- return []
- end
- source.scan(/ub-content.*?<\/tr>/m).each do |e|
- num = e.match(/#{gall}.*?no=(\d+)/).to_a[1]
- # title = e.match(/\/em>(.*?)<\/a>/).to_a[1]
- name = e.match(/data-nick="(.*?)"/).to_a[1]
- ip = e.match(/data-ip="(.*?)"/).to_a[1]
- id = e.match(/data-uid="(.*?)"/).to_a[1]
- date = e.match(/gall_date" title="(.*?)"/).to_a[1]
- #
- result << [num, name, ip, id, date]
- end
- if result.length == 0
- # puts page
- return []
- # puts page
- # puts source
- if source =~ Regexp.new("등록된".force_encoding("ASCII-8BIT"))
- return result
- else
- raise Exception
- end
- end
- rescue Exception => e
- puts e
- retry
- end
- return result
- end
- # }}}
- puts '디시인사이드 갤러리 잉여랭킹 프로그램'
- print '갤러리 이름을 입력하세요. (ex.girlgroup) : '
- gall = gets.chomp
- mgall = true
- loop do
- print '마이너 갤러리 입니까? (y/n) : '
- break if (mgall = gets.chomp) =~ /y|n/
- end
- mgall = (mgall == 'y') ? true : false
- print '시작 페이지를 입력하세요. (ex.1) : '
- _start = gets.chomp.to_i
- print '마지막 페이지를 입력하세요. (ex.1000) : '
- _end = gets.chomp.to_i
- print '날짜를 입력하세요. (ex. 2018-01-01 00:00:00) '
- date = /^#{gets.chomp}/
- result = []
- mutex = Mutex.new
- nowpage = 0
- total = _end - _start + 1
- threads = []
- slice = 1000
- if total <= slice
- enum = (_start.._end).map{|p| [p]}
- else
- enum = (_start.._end).each_slice(slice).reduce(&:zip).map(&:flatten)
- end
- enum.each do |nums|
- threads << Thread.new do
- nums.each do |page|
- next if page.nil?
- arr = getlist(gall, mgall, page)
- mutex.lock
- result.concat arr
- #
- # puts page if arr.length == 0
- nowpage += 1
- print "\r(#{nowpage} / #{total})"
- mutex.unlock
- end
- end
- end
- trap :INT do
- Thread.list.each do |thread|
- thread.exit unless thread == Thread.current
- end
- puts "\nInterrupted!"
- exit
- end
- threads.each{|t| t.join}
- puts "\n잠시만 기다려 주세요."
- result.uniq!{|e| e.first}
- result.select!{|e| e[4] =~ date}
- result.map! do |e|
- name = e[1]
- next if name.nil?
- if !e[2].nil? and e[2] != ""
- name += " - " + e[2]
- end
- if !e[3].nil? and e[3] != ""
- name += " - " + e[3]
- end
- [e[0], name.to_sym]
- end
- result.select!{|e| e}
- puts "\n총 글 갯수: #{result.length}"
- table = {}
- result.each do |e|
- if table[e[1]]
- table[e[1]] += 1
- else
- table[e[1]] = 1
- end
- end
- table = table.sort_by{|e| e[1]}.reverse
- File.open("result.txt", "w") do |file|
- table.each.with_index(1) do |e, idx|
- file.print idx
- file.print "위 "
- file.print e[1]
- file.print "글 "
- file.print e[0]
- file.puts
- end
- end
- # binding.pry
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement