Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $xml = open("foo").read
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- REFERENCE = /&(?!#{NAME};)/
- NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
- def unnormalize( string, doctype=nil, filter=nil, illegal=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE )
- return rv if matches.size == 0
- rv.gsub!( NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- if doctype
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = doctype.entity( entity_reference )
- p entity_reference
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value ) if entity_value
- end
- end
- else
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value.value ) if entity_value
- end
- end
- end
- rv.gsub!( /&/, '&' )
- end
- rv
- end
- def bench
- t = Time.now
- 100.times {
- unnormalize($xml)
- }
- p Time.now - t
- end
- bench
- bench
- bench
- bench
- bench
- bench
- bench
- bench
Add Comment
Please, Sign In to add comment