Guest User

Untitled

a guest
Nov 27th, 2018
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.73 KB | None | 0 0
  1. require 'ap'
  2. require 'mail'
  3.  
  4. # String monkeypatch
  5. # This is one of many possible "encoding problem" solutions. It's actually an intractable problem
  6. # but you'd have to read "Gödel, Escher, Bach" to understand why...
  7. class String
  8. def clean_utf8
  9. # self.force_encoding("UTF-8").encode("UTF-16BE", :invalid=>:replace, :replace=>"?").encode("UTF-8")
  10. unpack('C*').pack('U*') if !valid_encoding?
  11. end
  12. end
  13.  
  14. module Parsing
  15. module Email
  16. class Header
  17. EMAIL_HEADER_PARSER_REGEX = /
  18. ([A-Za-z-]+):\s # Find a header key, which ends in a colon and a space. Capture the hyphenated word portion.
  19. ( # Now start capturing the value.
  20. [^\r\n]+ # First, match everything that is not a line ending char.
  21. (?: # Then start a non-capturing repeating match which first consists of...
  22. \r?\n # A line ending combo...
  23. (?![A-Za-z-]+:\s) # But first, do a negative lookahead to make sure the next line does not start with another header-key-looking string
  24. [^\r\n]+ # Then match all text that is not a line ending char.
  25. )* # Repeat this non-capturing group match 0 or more times.
  26. ) # End capturing the value.
  27. /mx # Allow matches to cross line endings (m) and allow whitespace and comments in this regex (x).
  28. EMAIL_MULTILINE_HEADER_VALUE_REGEX = /\r?\n\s*/m
  29.  
  30. attr_reader :headers
  31. def initialize(opts = {})
  32. @headers = (Hash === opts ? opts[:headers] : opts)
  33. end
  34.  
  35. def call
  36. return {} unless @headers
  37. clean_header_values_hash(headers_hash)
  38. end
  39. alias to_h call
  40.  
  41. private
  42.  
  43. def headers_hash
  44. h = {}
  45. @headers.scan(EMAIL_HEADER_PARSER_REGEX).map do |k,v|
  46. if k && v
  47. if h[k]
  48. # if this key already has a value, wrap it in an array and append the new value, otherwise just set the key value
  49. h[k] = [h[k]] unless Array === h[k]
  50. h[k] << v
  51. else
  52. h[k] = v
  53. end
  54. end
  55. end
  56. h
  57. end
  58.  
  59. # Clean up runs of a newline followed by whitespace in header values by replacing with a space
  60. def clean_header_values_hash(h)
  61. h.each do |k,v|
  62. if Array === v
  63. v.map! do |val|
  64. val =~ EMAIL_MULTILINE_HEADER_VALUE_REGEX ? val.gsub!(EMAIL_MULTILINE_HEADER_VALUE_REGEX,' ') : val
  65. end
  66. else
  67. v.gsub!(EMAIL_MULTILINE_HEADER_VALUE_REGEX, ' ') if v =~ EMAIL_MULTILINE_HEADER_VALUE_REGEX
  68. end
  69. end
  70. h
  71. end
  72.  
  73. end
  74. end
  75. end
  76.  
  77.  
  78. headers = <<-HEADERS
  79. Delivered-To: jtest01@test.com
  80. Return-Path: <noreply@comixology.com>
  81. Received: from smtp36.gate.dfw1a (gate36.gate.dfw.mlsrvr.com [172.20.100.36])
  82. by mail138a.mail.dfw.mlsrvr.com (SMTP Server) with ESMTP id 107434FCCD
  83. for <info@comixology.com>; Wed, 29 Dec 2010 05:46:02 -0500 (EST)
  84. X-Spam-Threshold: 95
  85. X-Spam-Score: 0
  86. X-Spam-Flag: NO
  87. X-Virus-Scanned: OK
  88. X-MessageSniffer-Scan-Result: 0
  89. X-MessageSniffer-Rules: 0-0-0-32767-c
  90. X-CMAE-Scan-Result: 0
  91. X-CNFS-Analysis: v=1.0 c=1 a=410GYZBmXEQA:10 a=8nJEP1OIZ-IA:10 a=jrTSQGAaAAAA:8 a=zr8WG4425YMt8zt5NHsA:9 a=9yu0GlTtA_AyZ2ttE_-TzX-Oml0A:4 a=wPNLvfGTeEIA:10 a=vUvjSO8KtUCdM2_P:21 a=bi41oqnuo1lMp9yW:21
  92. X-Orig-To: info@comixology.com
  93. X-Originating-Ip: [173.203.22.3]
  94. Received: from [173.203.22.3] ([173.203.22.3:54314] helo=287315-web1.comixology.com)
  95. by smtp36.gate.dfw1a.rsapps.net (envelope-from <noreply@comixology.com>)
  96. (ecelerity 2.2.3.46 r(37554)) with ESMTP
  97. id 4A/C8-28011-8611B1D4; Wed, 29 Dec 2010 05:46:01 -0500
  98. Received: from comixology.com (localhost [127.0.0.1])
  99. by 287315-web1.comixology.com (Postfix) with ESMTP id E41991188DCD;
  100. Wed, 29 Dec 2010 05:45:59 -0500 (EST)
  101. Date: Wed, 29 Dec 2010 05:45:59 -0500
  102. To: info@comixology.com
  103. From: comiXology <noreply@comixology.com>
  104. Reply-to: carta.mesquita@bol.com.br
  105. Subject: Feedback from the Marvel Mobile Comics App
  106. Message-ID: <843e0dc47783da9c4ff408f8cb9a8c79@comixology.com>
  107. X-Priority: 3
  108. X-Mailer: PHPMailer 5.0.2 (phpmailer.codeworxtech.com)
  109. MIME-Version: 1.0
  110. Content-Transfer-Encoding: 8bit
  111. Content-Type: text/plain; charset="ISO-8859-1"
  112. HEADERS
  113.  
  114. t = Time.now
  115. 500.times do
  116. begin
  117. email = Parsing::Email::Header.new(headers)
  118. email.call
  119. rescue ArgumentError
  120. email = Parsing::Email::Header.new(headers.clean_utf8)
  121. email.call
  122. end
  123. end
  124. puts "Time with my regex: #{Time.now - t} seconds"
  125.  
  126. t = Time.now
  127. 500.times do
  128. email = Mail.new(headers)
  129. email.received
  130. end
  131. puts "Time with Mail gem: #{Time.now - t} seconds"
  132.  
  133.  
  134.  
  135. # Time with my regex: 0.063965 seconds
  136. # Time with Mail gem: 16.327161 seconds
  137. # [Finished in 16.7s]
Add Comment
Please, Sign In to add comment