Advertisement
riocampos

gogakuondemand.rb (v2.2)

Jul 30th, 2013
358
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 6.67 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2. # coding: utf-8
  3.  
  4. # 全講座DLで約20分、約557MB(mp4からmp3への変換はPC能力に依存)
  5.  
  6. require 'nokogiri'
  7. require 'net/https'
  8. require 'fileutils'
  9.  
  10. def load_pref
  11.   set_script_dir
  12.   load_pref_file
  13.   load_subjects_file
  14.   set_save_file_dest_dir
  15. end
  16.  
  17. def set_script_dir
  18.   @script_dir = File.expand_path(File.dirname(__FILE__))
  19.   push_dir(@script_dir)
  20. end
  21.  
  22. def load_pref_file
  23.   load './pref.rb'
  24.   @select_subjects_jp = []
  25.   pref.each do |k,v|
  26.     @select_subjects_jp << k if v
  27.   end
  28. end
  29.  
  30. def load_subjects_file
  31.   load './subjects.rb'
  32.   @select_subjects = []
  33.   @subject_urls = subject_urls
  34.   @subjects_jp  = subjects_jp
  35.   @select_subjects_jp.each do |sub_jp|
  36.     @select_subjects << @subjects_jp[sub_jp]
  37.   end
  38. end
  39.  
  40. def set_save_file_dest_dir
  41.   working_dir = save_file_dest_dir ?
  42.                 File.expand_path(File.path(save_file_dest_dir)) :
  43.                 File.expand_path(File.dirname(__FILE__))
  44.   Dir.mkdir(working_dir) unless File.directory?(working_dir)
  45.   push_dir(working_dir)
  46. end
  47.  
  48. def print_download_subjects
  49.   download_subjects = @select_subjects_jp * "、"
  50.   print "ダウンロードする語学講座:#{download_subjects}\n\n"
  51. end
  52.  
  53. def https_body(url)
  54.   uri = URI.parse(url)
  55.   https = Net::HTTP.new(uri.host, uri.port)
  56.   https.use_ssl = true
  57.   https.ssl_version = :TLSv1
  58.   https.verify_mode = OpenSSL::SSL::VERIFY_PEER
  59.   if uri.query
  60.     https.get(uri.path + '?' + uri.query).body
  61.   else
  62.     https.get(uri.path).body
  63.   end
  64. end
  65.  
  66. def get_source_urls
  67.   data_hash ={}
  68.   @select_subjects.each do |sub|
  69.     url = "https://cgi2.nhk.or.jp/gogaku/#{@subject_urls[sub]}/listdataflv.xml"
  70.     doc = Nokogiri.XML(https_body(url))
  71.     data_array = doc.xpath("/musicdata/music")
  72.     date = data_array.map { |d| d["hdate"].scan(/(\d+)(\d+)/)} \
  73.           .map{ |dt| "#{record_year(dt[0][0].to_i)}_%02d_%02d" % dt.flatten}
  74.     data_hash[sub] = {kouza: data_array[0].values[2]}
  75.     data_array.each_with_index do |d, i|
  76.       data_hash[sub][{date: date[i]}] = data_array[i].values[4]
  77.     end
  78.   end
  79.   data_hash
  80. end
  81.  
  82. def record_year(record_month)
  83.   now = Time.now
  84.   this_month = now.month
  85.   this_year = now.year
  86.   @record_year = this_month - record_month < 0 ? this_year - 1 : this_year
  87. end
  88.  
  89. def prepare_download_each_subject(key, value)
  90.   @subject = value[:kouza]
  91.   puts "講座名:#{@subject}"
  92.   subject_dir = File.join(current_dir, key.to_s + "/")
  93.   Dir.mkdir(subject_dir) unless File.directory?(subject_dir)
  94.   subject_dir
  95. end
  96.  
  97. def each_date_process(value)
  98.   value.each do |k, v|
  99.     next if k == :kouza
  100.     @metadata = {}
  101.     date = k[:date]
  102.     @metadata[:subject] = @subject
  103.     @metadata[:title]   = "#{@subject}_#{date}"
  104.     @metadata[:genre]   = "Speech"
  105.     @metadata[:create]  = "NHK"
  106.     @metadata[:year]    = @record_year.to_s
  107.     puts "日付:#{date}"
  108.     print "ダウンロード中...\n"
  109.     merge_list = make_decrypt_key(v)
  110.     print "ファイル変換中...\n"
  111.     merge_mp4(merge_list)
  112.     convert_mp4_mp3
  113.     delete_temp_files
  114.     print "\n"
  115.   end
  116. end
  117.  
  118. def make_decrypt_key(subject_code)
  119.   master_m3u8 = "https://nhk-vh.akamaihd.net/i/gogaku-stream/mp4/#{subject_code}/master.m3u8"
  120.   index_m3u8_url = https_body(master_m3u8)[/http.*/]
  121.   source_urls = https_body(index_m3u8_url).lines.grep(/http/).map{ |url| url.strip }
  122.   crypt_key_url = source_urls.shift[/https:\/\/.*/][0..-2]
  123.   @decrypt_bin_key = https_body(crypt_key_url)
  124.   make_merge_list(source_urls)
  125. end
  126.  
  127. def make_merge_list(urls)
  128.   temp_dir = File.join(current_dir, "temp/")
  129.   Dir.mkdir(temp_dir) unless File.directory?(temp_dir)
  130.   download_and_decrypt_gogaku_files(urls, temp_dir)
  131. end
  132.  
  133. def download_and_decrypt_gogaku_files(urls, temp_dir)
  134.   merge_list = ""
  135.   max = urls.size
  136.   urls.each_with_index do |url, index|
  137.     uri = URI.parse(url)
  138.     http = Net::HTTP.new(uri.host, uri.port)
  139.     crypt_ts = http.get(uri.path + '?' + uri.query).body
  140.     iv = '%032x' % (index + 1)
  141.     iv_bin = iv.unpack('a2'*16).map{ |x| x.hex }.pack('C'*16)
  142.     temp_mp4_path = File.join(temp_dir, "temp_#{'%03d' % (index + 1)}.mp4")
  143.     merge_list << "#{temp_mp4_path} "
  144.     decrypt_mp4 = decrypt(crypt_ts, @decrypt_bin_key, iv_bin)
  145.     open(temp_mp4_path, "w"){ |f| f.write(decrypt_mp4) }
  146.     progress_bar(index + 1, max, "ファイル")
  147.   end
  148.   print "\n"
  149.   merge_list
  150. end
  151.  
  152. def progress_bar(progress, max, unit)
  153.   max_digits = max.to_s.size
  154.   print "\r" + "[#{'%-50s' % ('*' * (progress.to_f / max * 50).to_i)}] #{'%*d' % [max_digits, progress]} / #{max} #{unit}"
  155. end
  156.  
  157. def decrypt(str, key, iv)
  158.   decipher = OpenSSL::Cipher.new("aes-128-cbc")
  159.   decipher.decrypt
  160.   decipher.key = key
  161.   decipher.iv = iv
  162.   decipher.update(str) + decipher.final
  163. end
  164.  
  165. def merge_mp4(merge_list)
  166.   if is_win?
  167.     IO.popen("type #{merge_list} > #{title_path}.mp4".gsub(%r[/], "\\"))
  168.   else
  169.     IO.popen("cat #{merge_list} > #{title_path}.mp4")
  170.   end
  171. end
  172.  
  173. def is_win?
  174.   RUBY_PLATFORM.downcase =~ /mswin(?!ce)|mingw|cygwin|bccwin/ ? true : false
  175. end
  176.  
  177. def convert_mp4_mp3
  178.   command_ffmpeg = %Q[ffmpeg -y -i #{title_path}.mp4 -ab 64k -metadata album="#{@metadata[:subject]}" -metadata title="#{@metadata[:title]}" -metadata genre="#{@metadata[:genre]}" -metadata artist="#{@metadata[:create]}" -metadata date="#{@metadata[:year]}" -id3v2_version 3 #{title_path}.mp3 2>&1]
  179.   IO.popen(command_ffmpeg) do |pipe|
  180.     duration = nil
  181.     progress = 0
  182.     pipe.each("r") do |line|
  183.       if line =~ /: (\d{2}):(\d{2}):(\d{2}).(\d{2}),/
  184.         duration = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
  185.       end
  186.       if line =~ /time=(\d{2}):(\d{2}):(\d{2}).(\d{2})/
  187.         progress = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
  188.         progress = duration if duration && progress > duration
  189.       end
  190.       if duration
  191.         progress_bar(progress, duration, "秒")
  192.       end
  193.     end
  194.     print "\n"
  195.   end
  196. end
  197.  
  198. def delete_temp_files
  199.   temp_dir = File.join(current_dir, "temp/")
  200.   temp_mp4 = Dir.glob(File.join(current_dir, "*.mp4"))
  201.   FileUtils.rm_rf(temp_dir)
  202.   FileUtils.rm_f(temp_mp4)
  203. end
  204.  
  205. def current_dir
  206.   @pwd
  207. end
  208.  
  209. def title_path
  210.   File.join(current_dir, @metadata[:title])
  211. end
  212.  
  213. def push_dir(dir)
  214.   @pwds ||= []
  215.   @pwds.push(dir)
  216.   @pwd = @pwds.last
  217.   Dir::chdir(@pwd)
  218.   @pwd
  219. end
  220.  
  221. def pop_dir
  222.   @pwds.pop
  223.   @pwd = @pwds.last
  224.   Dir::chdir(@pwd)
  225.   @pwd
  226. end
  227.  
  228. def gogaku_on_demand
  229.   load_pref
  230.   print_download_subjects
  231.   data_hash = get_source_urls
  232.   data_hash.each do |key, value|
  233.     subject_dir = prepare_download_each_subject(key, value)
  234.     push_dir(subject_dir)
  235.     each_date_process(value)
  236.     pop_dir
  237.   end
  238.   puts "作業終了"
  239. end
  240.  
  241. gogaku_on_demand
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement