Advertisement
riocampos

gogakuondemand.rb

Jul 25th, 2013
2,025
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 6.10 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2. # coding: utf-8
  3.  
  4. # http://d.hatena.ne.jp/riocampos+tech/20130725/p1
  5. # 全講座DLで約20分、約557MB(mp4からmp3への変換はPC能力に依存)
  6.  
  7. require 'nokogiri'
  8. require 'net/https'
  9. require 'fileutils'
  10.  
  11. def load_pref
  12.   set_script_dir
  13.   load_pref_file
  14.   load_subjects_file
  15.   set_save_file_dest_dir
  16. end
  17.  
  18. def set_script_dir
  19.   @script_dir = File.expand_path(File.dirname(__FILE__))
  20.   push_dir(@script_dir)
  21. end
  22.  
  23. def load_pref_file
  24.   load './pref.rb'
  25.   @select_subjects_jp = []
  26.   pref.each do |k,v|
  27.     @select_subjects_jp << k if v
  28.   end
  29. end
  30.  
  31. def load_subjects_file
  32.   load './subjects.rb'
  33.   @select_subjects = []
  34.   @subject_urls = subject_urls
  35.   @subjects_jp  = subjects_jp
  36.   @select_subjects_jp.each do |sub_jp|
  37.     @select_subjects << @subjects_jp[sub_jp]
  38.   end
  39. end
  40.  
  41. def set_save_file_dest_dir
  42.   working_dir = save_file_dest_dir ?
  43.                 File.expand_path(File.path(save_file_dest_dir)) :
  44.                 File.expand_path(File.dirname(__FILE__))
  45.   Dir.mkdir(working_dir) unless File.directory?(working_dir)
  46.   push_dir(working_dir)
  47. end
  48.  
  49. def print_download_subjects
  50.   download_subjects = @select_subjects_jp * "、"
  51.   print "ダウンロードする語学講座:#{download_subjects}\n\n"
  52. end
  53.  
  54. def https_body(url)
  55.   uri = URI.parse(url)
  56.   https = Net::HTTP.new(uri.host, uri.port)
  57.   https.use_ssl = true
  58.   https.ssl_version = :TLSv1
  59.   https.verify_mode = OpenSSL::SSL::VERIFY_PEER
  60.   https.get(uri.path).body
  61. end
  62.  
  63. def get_source_urls
  64.   data_hash ={}
  65.   @select_subjects.each do |sub|
  66.     url = "https://cgi2.nhk.or.jp/gogaku/#{@subject_urls[sub]}/listdataflv.xml"
  67.     doc = Nokogiri.XML(https_body(url))
  68.     data_array = doc.xpath("/musicdata/music")
  69.     date = data_array.map {|d| d["hdate"].scan(/(\d+)(\d+)/)} \
  70.           .map{|dt| "2013_%02d_%02d" % dt.flatten}
  71.     data_hash[sub] = {kouza: data_array[0].values[2]}
  72.     data_array.each_with_index do |d, i|
  73.       data_hash[sub][{date: date[i]}] = data_array[i].values[4]
  74.     end
  75.   end
  76.   data_hash
  77. end
  78.  
  79. def prepare_download_each_subject(key, value)
  80.   @subject = value[:kouza]
  81.   puts "講座名:#{@subject}"
  82.   subject_dir = File.join(current_dir, key.to_s + "/")
  83.   Dir.mkdir(subject_dir) unless File.directory?(subject_dir)
  84.   subject_dir
  85. end
  86.  
  87. def connect_directory
  88.   `cd #{@script_dir}; phantomjs #{File.join(@script_dir, "loadConnectDirectory.js")}`.strip
  89. end
  90.  
  91. def each_date_process(value)
  92.   value.each do |k, v|
  93.     next if k == :kouza
  94.     @metadata = {}
  95.     date = k[:date]
  96.     @metadata[:subject] = @subject
  97.     @metadata[:title]   = "#{@subject}_#{date}"
  98.     @metadata[:genre]   = "Speech"
  99.     @metadata[:create]  = "NHK"
  100.     @metadata[:year]    = "2013"
  101.     puts "日付:#{date}"
  102.     audio_source_url = "https://nhkmovs-i.akamaihd.net/i/gogaku/streaming/mp4/#{connect_directory}/#{v}/index_0_a.m3u8"
  103.     source_urls = https_body(audio_source_url).lines.grep(/http/).map{|url| url.strip }
  104.     crypt_key_url = source_urls.shift[/https:\/\/.*.key/]
  105.     @decrypt_key = https_body(crypt_key_url).unpack('C'*16).map{|x| x.to_s(16)}.pack('a2'*16)
  106.     merge_list = make_merge_list(source_urls)
  107.     print "ファイル変換中...\n"
  108.     merge_mp4(merge_list)
  109.     convert_mp4_mp3
  110.     delete_temp_files
  111.     print "\n"
  112.   end
  113. end
  114.  
  115. def make_merge_list(urls)
  116.   temp_dir = File.join(current_dir, "temp/")
  117.   Dir.mkdir(temp_dir) unless File.directory?(temp_dir)
  118.   download_and_decrypt_gogaku_files(urls, temp_dir)
  119. end
  120.  
  121. def progress_bar(progress, max, unit)
  122.   max_digits = max.to_s.size
  123.   print "\r" + "[#{'%-50s' % ('*' * (progress.to_f / max * 50).to_i)}] #{'%*d' % [max_digits, progress]} / #{max} #{unit}"
  124. end
  125.  
  126. def download_and_decrypt_gogaku_files(urls, temp_dir)
  127.   merge_list = ""
  128.   max = urls.size
  129.   urls.each_with_index do |url, index|
  130.     uri = URI.parse(url)
  131.     http = Net::HTTP.new(uri.host, uri.port)
  132.     ts = http.get(uri.path).body
  133.     iv = '%032x' % (index + 1)
  134.     temp_crypt_ts_path = File.join(temp_dir, "temp_#{index + 1}.crypt.ts")
  135.     temp_mp4_path = temp_crypt_ts_path.sub(/\.crypt\.ts/, '.mp4')
  136.     merge_list << "#{temp_mp4_path} "
  137.     open(temp_crypt_ts_path, "w"){ |f| f.write(ts) }
  138.     `openssl aes-128-cbc -d -in #{temp_crypt_ts_path} -out #{temp_mp4_path} -p -nosalt -iv #{iv} -K #{@decrypt_key}`
  139.     progress_bar(index + 1, max, "ファイル")
  140.   end
  141.   print "\n"
  142.   merge_list
  143. end
  144.  
  145. def merge_mp4(merge_list)
  146.   `cat #{merge_list} > #{title_path}.mp4`
  147. end
  148.  
  149. def convert_mp4_mp3
  150.   command_ffmpeg = %Q[ffmpeg -y -i #{title_path}.mp4 -ab 64k -metadata album="#{@metadata[:subject]}" -metadata title="#{@metadata[:title]}" -metadata genre="#{@metadata[:genre]}" -metadata artist="#{@metadata[:create]}" -metadata date="#{@metadata[:year]}" -id3v2_version 3 #{title_path}.mp3 2>&1]
  151.   IO.popen(command_ffmpeg) do |pipe|
  152.     duration = nil
  153.     progress = 0
  154.     pipe.each("r") do |line|
  155.       if line =~ /: (\d{2}):(\d{2}):(\d{2}).(\d{2}),/
  156.         duration = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
  157.       end
  158.       if line =~ /time=(\d{2}):(\d{2}):(\d{2}).(\d{2})/
  159.         progress = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
  160.         progress = duration if duration && progress > duration
  161.       end
  162.       if duration
  163.         progress_bar(progress, duration, "秒")
  164.       end
  165.     end
  166.     print "\n"
  167.   end
  168. end
  169.  
  170. def delete_temp_files
  171.   temp_dir = File.join(current_dir, "temp/")
  172.   temp_mp4 = Dir.glob(File.join(current_dir, "*.mp4"))
  173.   FileUtils.rm_rf(temp_dir)
  174.   FileUtils.rm_f(temp_mp4)
  175. end
  176.  
  177. def current_dir
  178.   @pwd
  179. end
  180.  
  181. def title_path
  182.   File.join(current_dir, @metadata[:title])
  183. end
  184.  
  185. def push_dir(dir)
  186.   @pwds ||= []
  187.   @pwds.push(dir)
  188.   @pwd = @pwds.last
  189.   Dir::chdir(@pwd)
  190.   @pwd
  191. end
  192.  
  193. def pop_dir
  194.   @pwds.pop
  195.   @pwd = @pwds.last
  196.   Dir::chdir(@pwd)
  197.   @pwd
  198. end
  199.  
  200. def gogaku_on_demand
  201.   load_pref
  202.   print_download_subjects
  203.   data_hash = get_source_urls
  204.   data_hash.each do |key, value|
  205.     subject_dir = prepare_download_each_subject(key, value)
  206.     push_dir(subject_dir)
  207.     each_date_process(value)
  208.     pop_dir
  209.   end
  210.   puts "作業終了"
  211. end
  212.  
  213. gogaku_on_demand
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement