Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 3 from lxml import etree
- 4 import re
- 5
- 6 #f = open("test_file.xml","r")
- 7 #tree = etree.parse(f)
- 8 #f.close()
- 9
- 10 #tree = etree.fromstring('<blazon> ... </blazon>')
- 11
- 12 def handle_rules(tree):
- 13 """Rule 1 remove optamt, optcharges, opttreatment & fulltreatment"""
- 14 for result in tree.xpath('//optamt | //optcharges | //opttreatment | //fulltreatment'):
- 15 result.getparent().remove(result)
- 16
- 17
- 18 """Rule 2 replace grouporcharge->group|charge with group """
- 19 results= tree.xpath("//grouporcharge/group/.. | //grouporcharge/charge/..")
- 20 for result in results:
- 21 new_tree = etree.Element('group')
- 22 new_tree.extend( result.xpath('./charge/* | ./group/*') )
- 23
- 24 #print(etree.tostring(new_tree, pretty_print=True))
- 25
- 26 result.getparent().replace(result, new_tree)
- 27
- 28 """Print out"""
- 29 #f = open('test_file_out.xml','w')
- 30 print( etree.tostring( tree, pretty_print=True ))
- 31
- 32 f = open('BurkeTest.xml')
- 33 matches = re.findall(r'(?ms)<blazon>.*?</blazon>', f.read())
- 34 print(len(matches))
- 35 for i,match in enumerate(matches):
- 36 print(i)
- 37 handle_rules(etree.fromstring(match))
Add Comment
Please, Sign In to add comment