Untitled

import xml.etree.ElementTree as ET
import re

newFile = open('using_element_tree.xml', 'w', encoding="utf8")

file = "newscor.xml"
context = ET.iterparse(file, events=("start", "end"))
context = iter(context)


for event, elem in context:
    tag = elem.tag
    tag = re.sub('{http://www.xml-ces.org/schema}', '', tag)

    if event == 'start' and (tag == 's' or tag == 'q'):
        value = elem.text
        if value:
            value = value.strip("&, <, >")
            value = value.strip()
            newFile.write(value)
            print(value)

    elem.clear()

from bs4 import BeautifulSoup
from bs4 import SoupStrainer

newFile = open('using_bs4.xml', 'w', encoding="utf-8")


def only_s_and_q_tags():
    return "s" or "q"

s_and_q_tags = SoupStrainer(only_s_and_q_tags())

with open("newscor.xml", encoding="utf-8") as fp:
    soup = BeautifulSoup(fp, "xml", parse_only=s_and_q_tags)


for string in soup.strings:
    if string not in ['n', 'rn']:
        print(repr(string))
        newFile.write(string)