Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## for https://stackoverflow.com/q/74567311/6146136
- #### links to outputs:
- ###### studentinfo_output.xml at https://pastebin.com/bPFg8TBL
- ###### studentinfo_op_unpretty.xml at https://pastebin.com/6NgFsYU0
- ###### studentinfo_op_regpretty.xml at https://pastebin.com/5FQxLzCC
- from bs4 import BeautifulSoup
- with open('studentinfo.xml', 'r') as f:
- xhtml = f.read() # [I just don't like "file" as a variable name]
- soup = BeautifulSoup(xhtml, 'lxml') ## 'xml' parser ---> lose namespaces
- enumTags = ['st:name']
- for d in [c for c in soup.descendants if c.name]:
- for name in enumTags:
- for i, t in enumerate(d.find_all(name, recursive=False)):
- t.name = f'{t.name}{i}'
- # print(soup.prettify)
- # prettify is a method so it should be prettify()
- # if you print inside loop, it'll print with every edit
- # to save the new xml [3 different options]
- with open('studentinfo_output.xml', 'wb') as f:
- f.write(soup.prettify('utf-8')) # option 1
- with open('studentinfo_op_unpretty.xml', 'wb') as f:
- f.write(str(soup).encode('utf-8')) # option 2
- import re
- rpx = re.sub('[^> ]\s*\n\s*<', '<', soup.prettify())
- rpx = re.sub('>\s*\n\s*[^< ]', '>', rpx)
- with open('studentinfo_op_regpretty.xml', 'wb') as f:
- f.write(rpx.encode('utf-8')) # option 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement