Advertisement
Guest User

Untitled

a guest
Aug 21st, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.51 KB | None | 0 0
  1. require 'set'
  2.  
  3. class File
  4. def self.tail(path, n = 10)
  5. result = File.open(path, 'r') do |file|
  6. buffer_size = 512
  7. line_count = 0
  8.  
  9. file.seek(0, IO::SEEK_END)
  10.  
  11. offset = file.pos
  12.  
  13. while line_count <= n && offset > 0
  14. to_read = if (offset - buffer_size) < 0
  15. offset
  16. else
  17. buffer_size
  18. end
  19.  
  20. file.seek(offset - to_read)
  21.  
  22. data = file.read(to_read)
  23.  
  24. data.reverse.each_char do |c|
  25. if line_count > n
  26. offset += 1
  27. break
  28. end
  29.  
  30. offset -= 1
  31.  
  32. if c == "n"
  33. line_count += 1
  34. end
  35. end
  36. end
  37.  
  38. file.seek(offset)
  39. file.read
  40. end
  41.  
  42. result
  43. end
  44.  
  45. def each_chunk(chunk_size)
  46. yield read(chunk_size) until eof?
  47. end
  48. end
  49.  
  50. def top_n(filename, n = 100)
  51. pre_sorted_chunks = Dir[".#{filename}_sorted_chunk_*"]
  52.  
  53. if pre_sorted_chunks.empty?
  54. build_pre_sorted_chunks_for(filename)
  55. end
  56.  
  57. top = SortedSet.new
  58.  
  59. # This reference takes ±0.141 seconds.
  60. # pre_sorted_chunks.each do |file|
  61. # top << `tail -n #{n} #{file}`.strip.split.map(&:to_i)
  62. # end
  63.  
  64. # This takes ±0.130 seconds. A little better.
  65. tasks = pre_sorted_chunks.map do |chunk_file_path|
  66. Thread.new(chunk_file_path) do |file_path|
  67. top << File.tail(file_path, n).strip.split.map(&:to_i)
  68. end
  69. end
  70.  
  71. tasks.each(&:join)
  72.  
  73. top.max(n)
  74. end
  75.  
  76. # Current impl. takes ±5m. A lot of time.
  77. def build_pre_sorted_chunks_for(filename)
  78. File.open(filename) do |file|
  79. n = 0
  80.  
  81. # Chunks of 500MB.
  82. file.each_chunk(1024 ** 2 * 500) do |chunk|
  83. numbers = chunk.split.map(&:to_i)
  84.  
  85. sorted_set = SortedSet.new(numbers)
  86. sorted_set = sorted_set.to_a.join("n")
  87.  
  88. File.open(".#{filename}_sorted_chunk_#{n}", 'w') do |f|
  89. f.write(sorted_set)
  90. end
  91.  
  92. n += 1
  93. end
  94. end
  95. end
  96.  
  97. unless ARGV.empty?
  98. filename = ARGV[0]
  99. n = ARGV[1].to_i
  100.  
  101. puts top_n(filename, n)
  102. else
  103. puts "Please provide a filename and a N value to calculate the top N."
  104. end
  105.  
  106. def generate_random_number_file(n = 15_000_000)
  107. require 'set'
  108.  
  109. randoms = Set.new
  110.  
  111. loop do
  112. randoms << rand(n)
  113. return if randoms.size >= n
  114. end
  115.  
  116. File.open('randos.txt', 'w') do |file|
  117. random_list = randoms.to_a.join("n")
  118. file.write(random_list)
  119. end
  120. end
  121.  
  122. def _time(task_name = 'Task')
  123. t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  124. yield
  125. t2 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  126. puts "#{task_name} took #{(t2 - t1) * 1000} milliseconds."
  127. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement