Guest User

Untitled

a guest
Apr 13th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.71 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. DEBUG = false
  4.  
  5. @start = Time.now if DEBUG
  6.  
  7. ##############################################################
  8. # Setup environment
  9. ##############################################################
  10. if File.directory?( File.join(File.dirname(__FILE__), "framework"))
  11. $:.unshift('framework')
  12. require File.join(File.dirname(__FILE__), "framework/merb")
  13. else
  14. require 'merb'
  15. end
  16.  
  17. MERB_ROOT = File.dirname(__FILE__)
  18. RAILS_ROOT = MERB_ROOT
  19.  
  20. RAILS_ENV = MERB_ENV
  21.  
  22. # Get Merb plugins and dependencies
  23. require File.dirname(__FILE__)+'/config/dependencies.rb'
  24.  
  25. ##############################################################
  26. # Loading Only The Needed Dependencies
  27. ##############################################################
  28. require File.join(MERB_ROOT, 'lib', 'related_media.rb')
  29.  
  30. require File.join(MERB_ROOT, 'lib', 'cci', 'cci_parser.rb')
  31.  
  32. require File.join(MERB_ROOT, 'lib', 'cci', 'keywords.rb')
  33.  
  34. require File.join(MERB_ROOT, 'app', 'models', 'story')
  35.  
  36. require File.join(MERB_ROOT, 'app', 'models', 'cut')
  37.  
  38. require File.join(MERB_ROOT, 'app', 'models', 'keywords')
  39.  
  40. require File.join(MERB_ROOT, 'app', 'models', 'raw_cci')
  41.  
  42. require File.join(MERB_ROOT, 'app', 'models', 'category')
  43.  
  44. require File.join(MERB_ROOT, 'deps', 'plugins', 'acts_as_solr', 'init')
  45.  
  46. require 'net/ftp'
  47.  
  48. include Net
  49.  
  50. ##############################################################
  51. # Setup Constants and other misc. info
  52. ##############################################################
  53.  
  54. #Setup FTP Info
  55.  
  56. #FTP_SERVER = 'wire2.wieck.com'
  57. #FTP_USER = 'nytpull'
  58. #FTP_PASSWORD = 't0rnad0'
  59.  
  60. FTP_SERVER = 'localhost'
  61. FTP_USER = 'wieck'
  62. FTP_PASSWORD = 'hurr1cane'
  63.  
  64. FTP_DIRECTORY = "WIRE_STORAGE"
  65.  
  66. #Create the tmp directory unless it doesn't exist
  67. FileUtils.mkdir_p(File.join(File.dirname(__FILE__), "tmp")) unless File.exists?(File.join(__FILE__, "tmp"))
  68.  
  69. #Create our last import timestamp file with the current time if it doesn't exist
  70. # We can run a parser through previous files later if there are other files we haven't imported yet.
  71. unless File.exists?(File.join(File.dirname(__FILE__), "tmp", "last_cci_import"))
  72. `echo '#{Time.now.to_s}' > #{File.join(File.dirname(__FILE__), "tmp", "last_cci_import")}`
  73. end
  74.  
  75. #Parse out the time of the last import from the file
  76. LAST_IMPORT = Time.parse(`cat #{File.join(File.dirname(__FILE__), "tmp", "last_cci_import")}`.chomp!)
  77.  
  78. #This is used to quickly distinguish our raw wire files
  79. FILE_REGEX = /^nytns.*$/
  80.  
  81. #The Date range is the indexes of the split ftp ls string
  82. #We use this later to find when the file was modified, and also where the file name begins
  83. DATE_RANGE = [5, 7]
  84.  
  85.  
  86. ##############################################################
  87. # Start the party
  88. ##############################################################
  89.  
  90. #Recursive method to loop through directories and find parseable stories
  91. def process_files(ftp, ls_output, count)
  92.  
  93. #Get rid of the first line of the output. It's worthless shat
  94. ls_output.delete_at(0)
  95.  
  96. #Loop through each listing
  97. ls_output.each do |listing|
  98.  
  99. #Grab the remote path. Anything after the last date item in the array is a path
  100. #This handles spaces
  101. remote_path = listing.split[(DATE_RANGE[1] + 1)..-1].join(" ")
  102.  
  103. #Check to see if it's a directory or not...
  104. if listing.split('')[0] == 'd'
  105.  
  106. puts "#{'*' * count} dir: #{remote_path}" if DEBUG
  107.  
  108. #Change into the directory
  109. ftp.chdir(remote_path)
  110.  
  111. #Recursion FTW! Go into the directory and look for files.
  112. process_files(ftp, ftp.ls, count + 1)
  113.  
  114. else
  115.  
  116. #Check to make sure these are the files we want...
  117. if remote_path =~ FILE_REGEX
  118.  
  119. #Setup the path we want to fetch the files to
  120. local_path = File.join(File.dirname(__FILE__), "tmp", remote_path)
  121.  
  122. puts "#{'*' * count} CCI file: #{remote_path}" if DEBUG
  123.  
  124. puts "#{'*' * count} Checking update time..." if DEBUG
  125.  
  126. #Parse the updated time from the current listing using the date range constant
  127. updated_time = Time.parse(listing.split[DATE_RANGE[0]..DATE_RANGE[1]].join(" "))
  128.  
  129. #Make sure this is a _new_ file...
  130. if updated_time > LAST_IMPORT
  131.  
  132. puts "#{'*' * count} Fetching file..." if DEBUG
  133.  
  134. #Fetch the file...
  135. ftp.get(remote_path, local_path)
  136.  
  137. puts "#{'*' * count} Processing received file"
  138.  
  139. #Crunch the content through the parser...
  140. parser = CCIParser.new(local_path)
  141. story = parser.parse
  142. story.save
  143.  
  144. #Delete the local file.
  145. FileUtils.rm(local_path)
  146.  
  147. else
  148.  
  149. puts "#{'*' * count} Skipping stale file..." if DEBUG
  150.  
  151. end # end if updated_time > LAST_IMPORT
  152.  
  153. end # end if remote_path =~ FILE_REGEX
  154.  
  155. end # end if listing.split('')[0] == 'd'
  156.  
  157. end # end ls_output.each do |listing|
  158.  
  159. #After this episode of recursion, go back a directory to setup for the next iteration
  160. #Basically, we're unwinding here...
  161. ftp.chdir("..")
  162. end
  163.  
  164. #Open the connection...
  165. FTP.open(FTP_SERVER) do |ftp|
  166. #Login to the server...
  167. ftp.login(FTP_USER, FTP_PASSWORD)
  168.  
  169. #Move into the initial directory
  170. ftp.chdir(FTP_DIRECTORY)
  171.  
  172. #Start the loop!
  173. process_files(ftp, ftp.ls, 1)
  174. end
  175.  
  176. #Update the timestamp of the last record process
  177. `echo '#{Time.now.to_s}' > #{File.join(File.dirname(__FILE__), "tmp", "last_cci_import")}`
  178.  
  179. @finish = Time.now if DEBUG
  180.  
  181. puts "Script took #{(@finish - @start).to_s} seconds" if DEBUG
Add Comment
Please, Sign In to add comment