daily pastebin goal
61%
SHARE
TWEET

Untitled

a guest Mar 13th, 2018 45 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. $xml = open("foo").read
  2.  
  3. NAMECHAR = '[\-\w\d\.:]'
  4. NAME = "([\\w:]#{NAMECHAR}*)"
  5. REFERENCE = /&(?!#{NAME};)/
  6. NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
  7.  
  8. def unnormalize( string, doctype=nil, filter=nil, illegal=nil )
  9.   rv = string.clone
  10.   rv.gsub!( /\r\n?/, "\n" )
  11.   matches = rv.scan( REFERENCE )
  12.   return rv if matches.size == 0
  13.   rv.gsub!( NUMERICENTITY ) {|m|
  14.     m=$1
  15.     m = "0#{m}" if m[0] == ?x
  16.     [Integer(m)].pack('U*')
  17.   }
  18.   matches.collect!{|x|x[0]}.compact!
  19.   if matches.size > 0
  20.     if doctype
  21.       matches.each do |entity_reference|
  22.         unless filter and filter.include?(entity_reference)
  23.           entity_value = doctype.entity( entity_reference )
  24.           p entity_reference
  25.           re = /&#{entity_reference};/
  26.           rv.gsub!( re, entity_value ) if entity_value
  27.         end
  28.       end
  29.     else
  30.       matches.each do |entity_reference|
  31.         unless filter and filter.include?(entity_reference)
  32.           entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
  33.           re = /&#{entity_reference};/
  34.           rv.gsub!( re, entity_value.value ) if entity_value
  35.         end
  36.       end
  37.     end
  38.     rv.gsub!( /&/, '&' )
  39.   end
  40.   rv
  41. end
  42.  
  43. def bench
  44.     t = Time.now
  45.     100.times {
  46.         unnormalize($xml)
  47.     }
  48.     p Time.now - t
  49. end
  50.  
  51. bench
  52. bench
  53. bench
  54. bench
  55. bench
  56. bench
  57. bench
  58. bench
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top