Guest User

Untitled

a guest
Jun 23rd, 2018
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.28 KB | None | 0 0
  1. 3 from lxml import etree
  2. 4 import re
  3. 5
  4. 6 #f = open("test_file.xml","r")
  5. 7 #tree = etree.parse(f)
  6. 8 #f.close()
  7. 9
  8. 10 #tree = etree.fromstring('<blazon> ... </blazon>')
  9. 11
  10. 12 def handle_rules(tree):
  11. 13 """Rule 1 remove optamt, optcharges, opttreatment & fulltreatment"""
  12. 14 for result in tree.xpath('//optamt | //optcharges | //opttreatment | //fulltreatment'):
  13. 15 result.getparent().remove(result)
  14. 16
  15. 17
  16. 18 """Rule 2 replace grouporcharge->group|charge with group """
  17. 19 results= tree.xpath("//grouporcharge/group/.. | //grouporcharge/charge/..")
  18. 20 for result in results:
  19. 21 new_tree = etree.Element('group')
  20. 22 new_tree.extend( result.xpath('./charge/* | ./group/*') )
  21. 23
  22. 24 #print(etree.tostring(new_tree, pretty_print=True))
  23. 25
  24. 26 result.getparent().replace(result, new_tree)
  25. 27
  26. 28 """Print out"""
  27. 29 #f = open('test_file_out.xml','w')
  28. 30 print( etree.tostring( tree, pretty_print=True ))
  29. 31
  30. 32 f = open('BurkeTest.xml')
  31. 33 matches = re.findall(r'(?ms)<blazon>.*?</blazon>', f.read())
  32. 34 print(len(matches))
  33. 35 for i,match in enumerate(matches):
  34. 36 print(i)
  35. 37 handle_rules(etree.fromstring(match))
Add Comment
Please, Sign In to add comment