Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.55 KB | None | 0 0
  1. import xml.etree.ElementTree as ET
  2. import requests
  3.  
  4.  
  5. arquivo = "C1_Extrato_2_Palavras.xml"
  6. tree = ET.parse(arquivo)
  7.  
  8. root = tree.getroot()
  9.  
  10. filtro = "*"
  11. for child in root.iter(filtro):
  12. print(child.tag, child.text)
  13.  
  14. print("n")
  15.  
  16. for child in root.findall("body"):
  17. for esse in child.findall("graph"):
  18. print(esse.text)
  19.  
  20. corpus
  21.  
  22.  
  23. body
  24.  
  25. s
  26.  
  27. graph
  28.  
  29. terminals
  30.  
  31. t None
  32. t None
  33. t None
  34. t None
  35. t None
  36. t None
  37. t None
  38. t None
  39. t None
  40. t None
  41. t None
  42. t None
  43. t None
  44. t None
  45. t None
  46. t None
  47. t None
  48. t None
  49. t None
  50. t None
  51. t None
  52. t None
  53. t None
  54. t None
  55. t None
  56. t None
  57. t None
  58. t None
  59. t None
  60. t None
  61. t None
  62. t None
  63. t None
  64. t None
  65. nonterminals
  66.  
  67. nt
  68.  
  69. edge None
  70. nt
  71.  
  72. edge None
  73. edge None
  74. edge None
  75. edge None
  76. nt
  77.  
  78. <?xml version="1.0" encoding="UTF-8"?>
  79. <corpus>
  80.  
  81. <body>
  82. <s id="s1" ref="1" source="Running text" forest="1" text="Um acidente aéreo na localidade de Bukavu, no leste da República Democrática do Congo, matou 17 pessoas na quinta-feira à tarde, informou hoje um porta-voz das Nações Unidas.">
  83. <graph root="s1_500">
  84. <terminals>
  85. <t id="s1_1" word="Um" lemma="um" pos="art" morph="M S" extra="* "/>
  86. <t id="s1_2" word="acidente" lemma="acidente" pos="n" morph="M S" sem="event" extra="--"/>
  87. <t id="s1_3" word="aéreo" lemma="aéreo" pos="adj" morph="M S" extra="nh np-close"/>
  88. <t id="s1_4" word="em" lemma="em" pos="prp" morph="--" extra="sam- np-long"/>
  89. <t id="s1_5" word="a" lemma="o" pos="art" morph="F S" extra="-sam "/>
  90. <t id="s1_6" word="localidade" lemma="localidade" pos="n" morph="F S" sem="Labs Lciv" extra="--"/>
  91. <t id="s1_7" word="de" lemma="de" pos="prp" morph="--" extra="np-close"/>
  92. <t id="s1_8" word="Bukavu" lemma="Bukavu" pos="prop" morph="M/F S" extra="civ * heur"/>
  93. <t id="s1_9" word="," lemma="--" pos="pu" morph="--" extra="--"/>
  94. <t id="s1_10" word="em" lemma="em" pos="prp" morph="--" extra="sam-"/>
  95. <t id="s1_11" word="o" lemma="o" pos="art" morph="M S" extra="-sam "/>
  96. <t id="s1_12" word="leste" lemma="leste" pos="n" morph="M S" sem="dir" extra="--"/>
  97. <t id="s1_13" word="de" lemma="de" pos="prp" morph="--" extra="sam- np-close"/>
  98. <t id="s1_14" word="a" lemma="o" pos="art" morph="F S" extra="-sam "/>
  99. <t id="s1_15" word="República_Democrática_do_Congo" lemma="República_Democrática_do_Congo" pos="prop" morph="F S" extra="civ *"/>
  100. <t id="s1_16" word="," lemma="--" pos="pu" morph="--" extra="--"/>
  101. <t id="s1_17" word="matou" lemma="matar" pos="v-fin" morph="PS 3S IND VFIN" extra="cjt-head cjt-head-STA fmc mv"/>
  102. <t id="s1_18" word="17" lemma="17" pos="num" morph="F P" extra="card"/>
  103. <t id="s1_19" word="pessoas" lemma="pessoa" pos="n" morph="F P" sem="H" extra="--"/>
  104. <t id="s1_20" word="em" lemma="em" pos="prp" morph="--" extra="sam-"/>
  105. <t id="s1_21" word="a" lemma="o" pos="art" morph="F S" extra="-sam "/>
  106. <t id="s1_22" word="quinta-feira" lemma="quinta-feira" pos="n" morph="F S" sem="temp" extra="--"/>
  107. <t id="s1_23" word="a" lemma="a" pos="prp" morph="--" extra="sam-"/>
  108. <t id="s1_24" word="a" lemma="o" pos="art" morph="F S" extra="-sam "/>
  109. <t id="s1_25" word="tarde" lemma="tarde" pos="n" morph="F S" sem="per" extra="--"/>
  110. <t id="s1_26" word="," lemma="--" pos="pu" morph="--" extra="--"/>
  111. <t id="s1_27" word="informou" lemma="informar" pos="v-fin" morph="PS 3S IND VFIN" extra="nosubj nosubj cjt-STA vH fmc mv"/>
  112. <t id="s1_28" word="hoje" lemma="hoje" pos="adv" morph="--" extra="--"/>
  113. <t id="s1_29" word="um" lemma="um" pos="art" morph="M S" extra="--"/>
  114. <t id="s1_30" word="porta-voz" lemma="porta-voz" pos="n" morph="M S" sem="tool Hprof" extra="--"/>
  115. <t id="s1_31" word="de" lemma="de" pos="prp" morph="--" extra="sam-"/>
  116. <t id="s1_32" word="as" lemma="o" pos="art" morph="F P" extra="-sam "/>
  117. <t id="s1_33" word="Nações_Unidas" lemma="Nações_Unidas" pos="prop" morph="F P" extra="org * newlex"/>
  118. <t id="s1_34" word="." lemma="--" pos="pu" morph="--" extra="--"/>
  119. </terminals>
  120.  
  121. <nonterminals>
  122. <nt id="s1_500" cat="s">
  123. <edge label="STA" idref="s1_501"/>
  124. </nt>
  125. <nt id="s1_501" cat="par">
  126. <edge label="CJT" idref="s1_502"/>
  127. <edge label="PU" idref="s1_26"/>
  128. <edge label="CJT" idref="s1_516"/>
  129. <edge label="PU" idref="s1_34"/>
  130. </nt>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement