Guest User

Untitled

a guest
May 23rd, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.04 KB | None | 0 0
  1. import os
  2. import xml.etree.ElementTree as ET
  3. import pandas as pd
  4.  
  5. tree = ET.parse('../data/webnlg2017/challenge_data_train_dev/train/2triples/2triples_Airport_train_challenge.xml')
  6. root = tree.getroot()
  7.  
  8. all_elem = list(root.iter('entry'))
  9.  
  10. # entries
  11. entries = [{
  12. "category": elem.attrib['category'],
  13. "eid": elem.attrib['eid'],
  14. "size": elem.attrib['size']
  15. } for elem in all_elem]
  16.  
  17. entries_df = pd.DataFrame(entries)
  18.  
  19. # original tripleset
  20. otriples = [
  21. {'eid': elem.attrib['eid'],
  22. 'text': e.text} for e in elem.find('originaltripleset').findall('otriple') for elem in all_elem
  23. ]
  24. otriples_df = pd.DataFrame(otriples)
  25.  
  26. # modified tripleset
  27. mtriples = [
  28. {'eid': elem.attrib['eid'],
  29. 'text': e.text} for e in elem.find('modifiedtripleset').findall('mtriple') for elem in all_elem
  30. ]
  31.  
  32. mtriples_df = pd.DataFrame(mtriples)
  33.  
  34. # lexes
  35. lexes = [
  36. {'eid': elem.attrib['eid'],
  37. 'text': e.text,
  38. 'comment': e.attrib['comment'],
  39. 'lid': e.attrib['lid']} for e in elem.findall('lex') for elem in all_elem
  40. ]
  41.  
  42. lexes_df = pd.DataFrame(lexes)
Add Comment
Please, Sign In to add comment