Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from xml.dom import minidom
- from os import listdir
- from os.path import isfile, join
- #item = mydoc.getElementsByTagName('s')
- mypath = './xmls/POST/'
- onlyfiles = [join(mypath, f) for f in listdir(mypath) if isfile(join(mypath, f))]
- mat = []
- for f in onlyfiles:
- print(f)
- mydoc = minidom.parse(f)
- s_items = mydoc.getElementsByTagName('s')
- for s in s_items:
- sent = []
- w_items = s.getElementsByTagName('w')
- for w in w_items:
- sent.append(w.attributes['lemma'].value)
- mat.append(sent)
- print(len(mat))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement