Advertisement
Guest User

Untitled

a guest
May 25th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.63 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. # encoding: utf-8
  4.  
  5. require 'nkf'
  6.  
  7. @encodings = ['utf-8','euc-jp','shift_jis','ascii']
  8.  
  9. @help = <<EOF
  10.  
  11. Usage: yubin2sql.rb [input-encoding] [KEN_ALL.CSV] (database) (> output.sql)
  12.  
  13. * if you don't know encoding, please use `nkf -g file`.
  14. * to install nkf, `brew install nkf` for example.
  15. * encoding is one of:
  16. [#{@encodings.join(", ")}]
  17. EOF
  18.  
  19. def help_and_exit(error_message)
  20. $stderr.puts ""
  21. $stderr.puts "[ERROR] " + error_message
  22. $stderr.puts @help
  23. exit 1
  24. end
  25.  
  26. def regex(str)
  27. Regexp.new(str.force_encoding(@encoding),
  28. Regexp::FIXEDENCODING)
  29. end
  30.  
  31. def encode_str(str)
  32. if @ec == nil
  33. @ec = Encoding::Converter.new(@encoding, "utf-8")
  34. end
  35.  
  36. @ec.convert(str)
  37. end
  38.  
  39. if ARGV.size < 2
  40. help_and_exit "Arguments is missing"
  41. end
  42.  
  43. def han2zen(str)
  44. NKF.nkf('-w -X Z4', str)
  45. end
  46.  
  47. if @encodings.index(ARGV[0]) == nil
  48. help_and_exit "Given encoding is invalid"
  49. else
  50. @encoding = ""+ARGV[0]
  51. # @encoding.gsub!("SHIFT-JIS","Windows-31J")
  52. # @encoding.gsub!("ASCII","ASCII-8bit")
  53. end
  54.  
  55. if File.exist?(ARGV[1]) == false
  56. help_and_exit "Given file not exists"
  57. elsif /\.csv$|\.CSV$/.match(ARGV[1]) === nil
  58. help_and_exit "File type should be CSV"
  59. end
  60.  
  61. $stderr.puts ""
  62. $stderr.puts "Generating..."
  63. $stderr.print "[ ]"
  64.  
  65. @sql_scheme = <<EOF
  66. #{"use "+ARGV[2] if ARGV.size > 2};
  67. DROP TABLE IF EXISTS `pd_postcode`;
  68. CREATE TABLE `pd_postcode` (
  69. `id` int(11) NOT NULL AUTO_INCREMENT,
  70. `citycode` varchar(20),
  71. `oldcode` varchar(10),
  72. `code` varchar(20) NOT NULL,
  73. `ken` varchar(10),
  74. `ken_kana` varchar(255),
  75. `city` varchar(255),
  76. `city_kana` varchar(255),
  77. `region` varchar(255),
  78. `region_kana` varchar(255),
  79. `one_in_many_code` int(1),
  80. `has_koaza` int(1),
  81. `has_choume` int(1),
  82. `many_in_one_code` int(1),
  83. `updated` int(1),
  84. `why_updated` int(1),
  85. `address` varchar(255),
  86. `address_kana` varchar(255),
  87. PRIMARY KEY (`id`),
  88. KEY (`citycode`, `code`)
  89. );
  90. EOF
  91.  
  92. puts @sql_scheme
  93.  
  94. @path = ARGV[1]
  95. @file = open(@path, "r")
  96. @file.each_line.with_index do |line,i|
  97. line = encode_str(line)
  98. line = han2zen(line)
  99. line.chomp!
  100.  
  101. list = line.split(/"?,"?/)
  102.  
  103. def m(i)
  104. i-1
  105. end
  106.  
  107. citycode = list[m 1]
  108. oldcode = list[m 2]
  109. code = list[m 3]
  110. ken = list[m 7]
  111. ken_kana = list[m 4]
  112. city = list[m 8]
  113. city_kana = list[m 5]
  114. region = list[m 9]
  115. region_kana = list[m 6]
  116. one_in_many_code = list[m 10]
  117. has_koaza = list[m 11]
  118. has_choume = list[m 12]
  119. many_in_one_code = list[m 13]
  120. updated = list[m 14]
  121. why_updated = list[m 15]
  122.  
  123. @sql_insert = <<EOF
  124. INSERT INTO `pd_postcode`
  125. (`citycode`,`oldcode`,`code`,`ken`,`ken_kana`,
  126. `city`,`city_kana`,`region`,`region_kana`,
  127. `one_in_many_code`,`has_koaza`,`has_choume`,`many_in_one_code`,`updated`,`why_updated`,
  128. `address`,`address_kana`)
  129. VALUES
  130. ("#{citycode}","#{oldcode}","#{code}","#{ken}","#{ken_kana}"
  131. ,"#{city}","#{city_kana}","#{region}","#{region_kana}"
  132. ,#{one_in_many_code},#{has_koaza},#{has_choume},#{many_in_one_code},#{updated},#{why_updated}
  133. ,"#{ken+city+region}","#{ken_kana+city_kana+region_kana}")
  134. ;
  135. EOF
  136.  
  137. puts @sql_insert
  138.  
  139. if i%5000 == 0
  140. $stderr.print("\b\b| ]")
  141. end
  142.  
  143. # puts "citycode = #{citycode}"
  144. # puts "oldcode = #{oldcode}"
  145. # puts "code = #{code}"
  146. # puts "ken = #{ken}"
  147. # puts "ken_kana = #{ken_kana}"
  148. # puts "city = #{city}"
  149. # puts "city_kana = #{city_kana}"
  150. # puts "range = #{range}"
  151. # puts "range_kana = #{range_kana}"
  152. # puts "one_in_many_code = #{one_in_many_code}"
  153. # puts "has_koaza = #{has_koaza}"
  154. # puts "has_choume = #{has_choume}"
  155. # puts "many_in_one_code = #{many_in_one_code}"
  156. # puts "updated = #{updated}"
  157. # puts "why_updated = #{why_updated}"
  158. end
  159.  
  160. $stderr.puts ""
  161. $stderr.puts "Done!"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement