Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- # encoding: utf-8
- require 'nkf'
- @encodings = ['utf-8','euc-jp','shift_jis','ascii']
- @help = <<EOF
- Usage: yubin2sql.rb [input-encoding] [KEN_ALL.CSV] (database) (> output.sql)
- * if you don't know encoding, please use `nkf -g file`.
- * to install nkf, `brew install nkf` for example.
- * encoding is one of:
- [#{@encodings.join(", ")}]
- EOF
- def help_and_exit(error_message)
- $stderr.puts ""
- $stderr.puts "[ERROR] " + error_message
- $stderr.puts @help
- exit 1
- end
- def regex(str)
- Regexp.new(str.force_encoding(@encoding),
- Regexp::FIXEDENCODING)
- end
- def encode_str(str)
- if @ec == nil
- @ec = Encoding::Converter.new(@encoding, "utf-8")
- end
- @ec.convert(str)
- end
- if ARGV.size < 2
- help_and_exit "Arguments is missing"
- end
- def han2zen(str)
- NKF.nkf('-w -X Z4', str)
- end
- if @encodings.index(ARGV[0]) == nil
- help_and_exit "Given encoding is invalid"
- else
- @encoding = ""+ARGV[0]
- # @encoding.gsub!("SHIFT-JIS","Windows-31J")
- # @encoding.gsub!("ASCII","ASCII-8bit")
- end
- if File.exist?(ARGV[1]) == false
- help_and_exit "Given file not exists"
- elsif /\.csv$|\.CSV$/.match(ARGV[1]) === nil
- help_and_exit "File type should be CSV"
- end
- $stderr.puts ""
- $stderr.puts "Generating..."
- $stderr.print "[ ]"
- @sql_scheme = <<EOF
- #{"use "+ARGV[2] if ARGV.size > 2};
- DROP TABLE IF EXISTS `pd_postcode`;
- CREATE TABLE `pd_postcode` (
- `id` int(11) NOT NULL AUTO_INCREMENT,
- `citycode` varchar(20),
- `oldcode` varchar(10),
- `code` varchar(20) NOT NULL,
- `ken` varchar(10),
- `ken_kana` varchar(255),
- `city` varchar(255),
- `city_kana` varchar(255),
- `region` varchar(255),
- `region_kana` varchar(255),
- `one_in_many_code` int(1),
- `has_koaza` int(1),
- `has_choume` int(1),
- `many_in_one_code` int(1),
- `updated` int(1),
- `why_updated` int(1),
- `address` varchar(255),
- `address_kana` varchar(255),
- PRIMARY KEY (`id`),
- KEY (`citycode`, `code`)
- );
- EOF
- puts @sql_scheme
- @path = ARGV[1]
- @file = open(@path, "r")
- @file.each_line.with_index do |line,i|
- line = encode_str(line)
- line = han2zen(line)
- line.chomp!
- list = line.split(/"?,"?/)
- def m(i)
- i-1
- end
- citycode = list[m 1]
- oldcode = list[m 2]
- code = list[m 3]
- ken = list[m 7]
- ken_kana = list[m 4]
- city = list[m 8]
- city_kana = list[m 5]
- region = list[m 9]
- region_kana = list[m 6]
- one_in_many_code = list[m 10]
- has_koaza = list[m 11]
- has_choume = list[m 12]
- many_in_one_code = list[m 13]
- updated = list[m 14]
- why_updated = list[m 15]
- @sql_insert = <<EOF
- INSERT INTO `pd_postcode`
- (`citycode`,`oldcode`,`code`,`ken`,`ken_kana`,
- `city`,`city_kana`,`region`,`region_kana`,
- `one_in_many_code`,`has_koaza`,`has_choume`,`many_in_one_code`,`updated`,`why_updated`,
- `address`,`address_kana`)
- VALUES
- ("#{citycode}","#{oldcode}","#{code}","#{ken}","#{ken_kana}"
- ,"#{city}","#{city_kana}","#{region}","#{region_kana}"
- ,#{one_in_many_code},#{has_koaza},#{has_choume},#{many_in_one_code},#{updated},#{why_updated}
- ,"#{ken+city+region}","#{ken_kana+city_kana+region_kana}")
- ;
- EOF
- puts @sql_insert
- if i%5000 == 0
- $stderr.print("\b\b| ]")
- end
- # puts "citycode = #{citycode}"
- # puts "oldcode = #{oldcode}"
- # puts "code = #{code}"
- # puts "ken = #{ken}"
- # puts "ken_kana = #{ken_kana}"
- # puts "city = #{city}"
- # puts "city_kana = #{city_kana}"
- # puts "range = #{range}"
- # puts "range_kana = #{range_kana}"
- # puts "one_in_many_code = #{one_in_many_code}"
- # puts "has_koaza = #{has_koaza}"
- # puts "has_choume = #{has_choume}"
- # puts "many_in_one_code = #{many_in_one_code}"
- # puts "updated = #{updated}"
- # puts "why_updated = #{why_updated}"
- end
- $stderr.puts ""
- $stderr.puts "Done!"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement