Guest User

Untitled

a guest
Mar 13th, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.34 KB | None | 0 0
  1. $xml = open("foo").read
  2.  
  3. NAMECHAR = '[\-\w\d\.:]'
  4. NAME = "([\\w:]#{NAMECHAR}*)"
  5. REFERENCE = /&(?!#{NAME};)/
  6. NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
  7.  
  8. def unnormalize( string, doctype=nil, filter=nil, illegal=nil )
  9. rv = string.clone
  10. rv.gsub!( /\r\n?/, "\n" )
  11. matches = rv.scan( REFERENCE )
  12. return rv if matches.size == 0
  13. rv.gsub!( NUMERICENTITY ) {|m|
  14. m=$1
  15. m = "0#{m}" if m[0] == ?x
  16. [Integer(m)].pack('U*')
  17. }
  18. matches.collect!{|x|x[0]}.compact!
  19. if matches.size > 0
  20. if doctype
  21. matches.each do |entity_reference|
  22. unless filter and filter.include?(entity_reference)
  23. entity_value = doctype.entity( entity_reference )
  24. p entity_reference
  25. re = /&#{entity_reference};/
  26. rv.gsub!( re, entity_value ) if entity_value
  27. end
  28. end
  29. else
  30. matches.each do |entity_reference|
  31. unless filter and filter.include?(entity_reference)
  32. entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
  33. re = /&#{entity_reference};/
  34. rv.gsub!( re, entity_value.value ) if entity_value
  35. end
  36. end
  37. end
  38. rv.gsub!( /&/, '&' )
  39. end
  40. rv
  41. end
  42.  
  43. def bench
  44. t = Time.now
  45. 100.times {
  46. unnormalize($xml)
  47. }
  48. p Time.now - t
  49. end
  50.  
  51. bench
  52. bench
  53. bench
  54. bench
  55. bench
  56. bench
  57. bench
  58. bench
Add Comment
Please, Sign In to add comment