Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def self.load_thread_posts_with_max_responses_in_interval(fid, h_back =24)
- title = DB[:forums].filter(siteid:SID,fid:fid).first[:title]
- p "----------------FORUM: #{fid} #{title}"
- from=date_now(h_back)
- BCTalkParser.class_variable_set(:@@from_date, from)
- p " --load_thread_posts_with_max_responses_in_interval fid:#{fid} h_back:#{h_back} start_from:#{from.strftime("%F %H:%M:%S")}"
- to=date_now(0)
- threads_responses = DB[:threads_responses].filter(Sequel.lit("sid=? and fid=? and last_post_date > ?", SID,fid,from))
- .select_map([:tid,:responses,:last_post_date])
- list_threads = []
- start_date = date_now(12)
- ########analz
- sorted_thread_stats = threads_responses.group_by{|dd| dd[0]}
- .select{|k,v| v.size>1}
- .sort_by{|k,tt| dd=tt.map { |el| el[1] }.minmax; dd[1]-dd[0] }
- .reverse.take(THREADS_ANALZ_NUM)
- #sorted_thread_stats.each do |tid, tt|
- Parallel.map_with_index(sorted_thread_stats,:in_threads=>1) do |rr,idx|
- tid = rr[0]
- resps = rr[1]
- #next if tid!=2198936
- resps_minmax=resps.map { |el| el[1] }.minmax
- page_and_num = PageUtil.calc_last_page(resps_minmax[1]+1,20)
- lpage = page_and_num[0]
- lcount = page_and_num[1]
- url_templ = "https://bitcointalk.org/index.php?topic=%s.%s"
- url = url_templ % [tid,(lpage-1)*40]
- downl_pages=BCTalkParser.calc_arr_downl_pages(tid,lpage,lcount, BCTalkParser.from_date).take(10)
- downloaded_pages=[]
- ranks_stat_all=Hash.new(0)
- downl_pages.each do |pp|
- downloaded_pages<<pp[0]
- begin
- data = BCTalkParser.set_opt({rank:1}).parse_thread_page(tid, pp[0])
- ranks_stat= data[:stat].group_by{|x| x}.map{|k,vv| [k,vv.size]}.to_h
- [1,2,3,4,5,11].each{|x| ranks_stat_all[x]+= (ranks_stat[x]||0)}
- fpdate = data[:first_post_date]
- break if fpdate<from
- rescue =>ex
- #puts ex.backtrace
- p "-------------------"
- p "--err tid #{tid} pg #{pp} --#{ex}"
- end
- end
- if ranks_stat_all
- rr={fid:fid, tid:tid, description:"downloaded_pages #{downloaded_pages}" ,
- start_date:from, end_date:to, added: date_now,
- r1_count:ranks_stat_all[1],
- r2_count:ranks_stat_all[2],
- r3_count:ranks_stat_all[3],
- r4_count:ranks_stat_all[4],
- r5_count:ranks_stat_all[5],
- r11_count:ranks_stat_all[11],
- }
- DB[:threads_stat].insert(rr)
- end
- planned_str=downl_pages.map { |pp| "#{pp[0]}" }.join(' ')
- p "[[#{idx}] load_thr #{tid} last pg,count: #{page_and_num}".ljust(50)+
- "planned:#{planned_str.ljust(40)} down:#{downloaded_pages} ranks_stat_all: #{ranks_stat_all}" if downl_pages.size>0
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement