Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.59 KB | None | 0 0
  1. from xml.dom import minidom
  2. from os import listdir
  3. from os.path import isfile, join
  4.  
  5. #item = mydoc.getElementsByTagName('s')
  6. mypath = './xmls/POST/'
  7. onlyfiles = [join(mypath, f) for f in listdir(mypath) if isfile(join(mypath, f))]
  8.  
  9. mat = []
  10.  
  11. for f in onlyfiles:
  12.     print(f)
  13.     mydoc = minidom.parse(f)
  14.     s_items = mydoc.getElementsByTagName('s')
  15.    
  16.     for s in s_items:
  17.         sent = []
  18.         w_items = s.getElementsByTagName('w')
  19.  
  20.         for w in w_items:
  21.             sent.append(w.attributes['lemma'].value)
  22.        
  23.    
  24.         mat.append(sent)
  25.    
  26. print(len(mat))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement