Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- # coding: utf-8
- # 全講座DLで約20分、約557MB(mp4からmp3への変換はPC能力に依存)
- require 'nokogiri'
- require 'net/https'
- require 'fileutils'
- def load_pref
- set_script_dir
- load_pref_file
- load_subjects_file
- set_save_file_dest_dir
- end
- def set_script_dir
- @script_dir = File.expand_path(File.dirname(__FILE__))
- push_dir(@script_dir)
- end
- def load_pref_file
- load './pref.rb'
- @select_subjects_jp = []
- pref.each do |k,v|
- @select_subjects_jp << k if v
- end
- end
- def load_subjects_file
- load './subjects.rb'
- @select_subjects = []
- @subject_urls = subject_urls
- @subjects_jp = subjects_jp
- @select_subjects_jp.each do |sub_jp|
- @select_subjects << @subjects_jp[sub_jp]
- end
- end
- def set_save_file_dest_dir
- working_dir = save_file_dest_dir ?
- File.expand_path(File.path(save_file_dest_dir)) :
- File.expand_path(File.dirname(__FILE__))
- Dir.mkdir(working_dir) unless File.directory?(working_dir)
- push_dir(working_dir)
- end
- def print_download_subjects
- download_subjects = @select_subjects_jp * "、"
- print "ダウンロードする語学講座:#{download_subjects}\n\n"
- end
- def https_body(url)
- uri = URI.parse(url)
- https = Net::HTTP.new(uri.host, uri.port)
- https.use_ssl = true
- https.ssl_version = :TLSv1
- https.verify_mode = OpenSSL::SSL::VERIFY_PEER
- if uri.query
- https.get(uri.path + '?' + uri.query).body
- else
- https.get(uri.path).body
- end
- end
- def get_source_urls
- data_hash ={}
- @select_subjects.each do |sub|
- url = "https://cgi2.nhk.or.jp/gogaku/#{@subject_urls[sub]}/listdataflv.xml"
- doc = Nokogiri.XML(https_body(url))
- data_array = doc.xpath("/musicdata/music")
- date = data_array.map { |d| d["hdate"].scan(/(\d+)月(\d+)日/)} \
- .map{ |dt| "#{record_year(dt[0][0].to_i)}_%02d_%02d" % dt.flatten}
- data_hash[sub] = {kouza: data_array[0].values[2]}
- data_array.each_with_index do |d, i|
- data_hash[sub][{date: date[i]}] = data_array[i].values[4]
- end
- end
- data_hash
- end
- def record_year(record_month)
- now = Time.now
- this_month = now.month
- this_year = now.year
- @record_year = this_month - record_month < 0 ? this_year - 1 : this_year
- end
- def prepare_download_each_subject(key, value)
- @subject = value[:kouza]
- puts "講座名:#{@subject}"
- subject_dir = File.join(current_dir, key.to_s + "/")
- Dir.mkdir(subject_dir) unless File.directory?(subject_dir)
- subject_dir
- end
- def each_date_process(value)
- value.each do |k, v|
- next if k == :kouza
- @metadata = {}
- date = k[:date]
- @metadata[:subject] = @subject
- @metadata[:title] = "#{@subject}_#{date}"
- @metadata[:genre] = "Speech"
- @metadata[:create] = "NHK"
- @metadata[:year] = @record_year.to_s
- puts "日付:#{date}"
- print "ダウンロード中...\n"
- merge_list = make_decrypt_key(v)
- print "ファイル変換中...\n"
- merge_mp4(merge_list)
- convert_mp4_mp3
- delete_temp_files
- print "\n"
- end
- end
- def make_decrypt_key(subject_code)
- master_m3u8 = "https://nhk-vh.akamaihd.net/i/gogaku-stream/mp4/#{subject_code}/master.m3u8"
- index_m3u8_url = https_body(master_m3u8)[/http.*/]
- source_urls = https_body(index_m3u8_url).lines.grep(/http/).map{ |url| url.strip }
- crypt_key_url = source_urls.shift[/https:\/\/.*/][0..-2]
- @decrypt_bin_key = https_body(crypt_key_url)
- make_merge_list(source_urls)
- end
- def make_merge_list(urls)
- temp_dir = File.join(current_dir, "temp/")
- Dir.mkdir(temp_dir) unless File.directory?(temp_dir)
- download_and_decrypt_gogaku_files(urls, temp_dir)
- end
- def download_and_decrypt_gogaku_files(urls, temp_dir)
- merge_list = ""
- max = urls.size
- urls.each_with_index do |url, index|
- uri = URI.parse(url)
- http = Net::HTTP.new(uri.host, uri.port)
- crypt_ts = http.get(uri.path + '?' + uri.query).body
- iv = '%032x' % (index + 1)
- iv_bin = iv.unpack('a2'*16).map{ |x| x.hex }.pack('C'*16)
- temp_mp4_path = File.join(temp_dir, "temp_#{index + 1}.mp4")
- merge_list << "#{temp_mp4_path} "
- decrypt_mp4 = decrypt(crypt_ts, @decrypt_bin_key, iv_bin)
- open(temp_mp4_path, "w"){ |f| f.write(decrypt_mp4) }
- progress_bar(index + 1, max, "ファイル")
- end
- print "\n"
- merge_list
- end
- def progress_bar(progress, max, unit)
- max_digits = max.to_s.size
- print "\r" + "[#{'%-50s' % ('*' * (progress.to_f / max * 50).to_i)}] #{'%*d' % [max_digits, progress]} / #{max} #{unit}"
- end
- def decrypt(str, key, iv)
- decipher = OpenSSL::Cipher.new("aes-128-cbc")
- decipher.decrypt
- decipher.key = key
- decipher.iv = iv
- decipher.update(str) + decipher.final
- end
- def merge_mp4(merge_list)
- IO.popen("cat #{merge_list} > #{title_path}.mp4")
- end
- def convert_mp4_mp3
- command_ffmpeg = %Q[ffmpeg -y -i #{title_path}.mp4 -ab 64k -metadata album="#{@metadata[:subject]}" -metadata title="#{@metadata[:title]}" -metadata genre="#{@metadata[:genre]}" -metadata artist="#{@metadata[:create]}" -metadata date="#{@metadata[:year]}" -id3v2_version 3 #{title_path}.mp3 2>&1]
- IO.popen(command_ffmpeg) do |pipe|
- duration = nil
- progress = 0
- pipe.each("r") do |line|
- if line =~ /: (\d{2}):(\d{2}):(\d{2}).(\d{2}),/
- duration = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
- end
- if line =~ /time=(\d{2}):(\d{2}):(\d{2}).(\d{2})/
- progress = (($1.to_i * 360000 + $2.to_i * 6000 + $3.to_i * 100 + $4.to_i) / 100.0).round
- progress = duration if duration && progress > duration
- end
- if duration
- progress_bar(progress, duration, "秒")
- end
- end
- print "\n"
- end
- end
- def delete_temp_files
- temp_dir = File.join(current_dir, "temp/")
- temp_mp4 = Dir.glob(File.join(current_dir, "*.mp4"))
- FileUtils.rm_rf(temp_dir)
- FileUtils.rm_f(temp_mp4)
- end
- def current_dir
- @pwd
- end
- def title_path
- File.join(current_dir, @metadata[:title])
- end
- def push_dir(dir)
- @pwds ||= []
- @pwds.push(dir)
- @pwd = @pwds.last
- Dir::chdir(@pwd)
- @pwd
- end
- def pop_dir
- @pwds.pop
- @pwd = @pwds.last
- Dir::chdir(@pwd)
- @pwd
- end
- def gogaku_on_demand
- load_pref
- print_download_subjects
- data_hash = get_source_urls
- data_hash.each do |key, value|
- subject_dir = prepare_download_each_subject(key, value)
- push_dir(subject_dir)
- each_date_process(value)
- pop_dir
- end
- puts "作業終了"
- end
- gogaku_on_demand
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement