Advertisement
Guest User

Untitled

a guest
Aug 19th, 2017
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "encoding/xml"
  5. "fmt"
  6. "os"
  7. "strconv"
  8. )
  9.  
  10. type CharacterOffsetBegin struct {
  11. Text string `xml:",chardata" json:",omitempty"`
  12. }
  13.  
  14. type CharacterOffsetEnd struct {
  15. Text string `xml:",chardata" json:",omitempty"`
  16. }
  17.  
  18. type ChidleyRoot314159 struct {
  19. Root *Root `xml:" root,omitempty" json:"root,omitempty"`
  20. }
  21.  
  22. type NER struct {
  23. Text string `xml:",chardata" json:",omitempty"`
  24. }
  25.  
  26. type NormalizedNER struct {
  27. Text string `xml:",chardata" json:",omitempty"`
  28. }
  29.  
  30. type POS struct {
  31. Text string `xml:",chardata" json:",omitempty"`
  32. }
  33.  
  34. type Speaker struct {
  35. Text string `xml:",chardata" json:",omitempty"`
  36. }
  37.  
  38. type Timex struct {
  39. AttrTid string `xml:" tid,attr" json:",omitempty"`
  40. AttrType string `xml:" type,attr" json:",omitempty"`
  41. Text string `xml:",chardata" json:",omitempty"`
  42. }
  43.  
  44. type Coreference struct {
  45. Coreference *Coreference `xml:" coreference,omitempty" json:"coreference,omitempty"`
  46. Mention []*Mention `xml:" mention,omitempty" json:"mention,omitempty"`
  47. }
  48.  
  49. type Dep struct {
  50. AttrExtra string `xml:" extra,attr" json:",omitempty"`
  51. AttrType string `xml:" type,attr" json:",omitempty"`
  52. Dependent *Dependent `xml:" dependent,omitempty" json:"dependent,omitempty"`
  53. Governor *Governor `xml:" governor,omitempty" json:"governor,omitempty"`
  54. }
  55.  
  56. type Dependencies struct {
  57. AttrType string `xml:" type,attr" json:",omitempty"`
  58. Dep []*Dep `xml:" dep,omitempty" json:"dep,omitempty"`
  59. }
  60.  
  61. type Dependent struct {
  62. AttrCopy string `xml:" copy,attr" json:",omitempty"`
  63. AttrIdx string `xml:" idx,attr" json:",omitempty"`
  64. Text string `xml:",chardata" json:",omitempty"`
  65. }
  66.  
  67. type Document struct {
  68. Coreference *Coreference `xml:" coreference,omitempty" json:"coreference,omitempty"`
  69. Sentences *Sentences `xml:" sentences,omitempty" json:"sentences,omitempty"`
  70. }
  71.  
  72. type End struct {
  73. Text string `xml:",chardata" json:",omitempty"`
  74. }
  75.  
  76. type Governor struct {
  77. AttrCopy string `xml:" copy,attr" json:",omitempty"`
  78. AttrIdx string `xml:" idx,attr" json:",omitempty"`
  79. Text string `xml:",chardata" json:",omitempty"`
  80. }
  81.  
  82. type Head struct {
  83. Text string `xml:",chardata" json:",omitempty"`
  84. }
  85.  
  86. type Lemma struct {
  87. Text string `xml:",chardata" json:",omitempty"`
  88. }
  89.  
  90. type Mention struct {
  91. AttrRepresentative string `xml:" representative,attr" json:",omitempty"`
  92. End *End `xml:" end,omitempty" json:"end,omitempty"`
  93. Head *Head `xml:" head,omitempty" json:"head,omitempty"`
  94. Sentence []*Sentence `xml:" sentence,omitempty" json:"sentence,omitempty"`
  95. Start *Start `xml:" start,omitempty" json:"start,omitempty"`
  96. Text *Text `xml:" text,omitempty" json:"text,omitempty"`
  97. }
  98.  
  99. type Parse struct {
  100. Text string `xml:",chardata" json:",omitempty"`
  101. }
  102.  
  103. type Root struct {
  104. Document *Document `xml:" document,omitempty" json:"document,omitempty"`
  105. }
  106.  
  107. type Sentence struct {
  108. AttrId string `xml:" id,attr" json:",omitempty"`
  109. Dependencies []*Dependencies `xml:" dependencies,omitempty" json:"dependencies,omitempty"`
  110. Parse *Parse `xml:" parse,omitempty" json:"parse,omitempty"`
  111. Text string `xml:",chardata" json:",omitempty"`
  112. Tokens *Tokens `xml:" tokens,omitempty" json:"tokens,omitempty"`
  113. }
  114.  
  115. type Sentences struct {
  116. Sentence []*Sentence `xml:" sentence,omitempty" json:"sentence,omitempty"`
  117. }
  118.  
  119. type Start struct {
  120. Text string `xml:",chardata" json:",omitempty"`
  121. }
  122.  
  123. type Text struct {
  124. Text string `xml:",chardata" json:",omitempty"`
  125. }
  126.  
  127. type Token struct {
  128. AttrId string `xml:" id,attr" json:",omitempty"`
  129. CharacterOffsetBegin *CharacterOffsetBegin `xml:" CharacterOffsetBegin,omitempty" json:"CharacterOffsetBegin,omitempty"`
  130. CharacterOffsetEnd *CharacterOffsetEnd `xml:" CharacterOffsetEnd,omitempty" json:"CharacterOffsetEnd,omitempty"`
  131. Lemma *Lemma `xml:" lemma,omitempty" json:"lemma,omitempty"`
  132. NER *NER `xml:" NER,omitempty" json:"NER,omitempty"`
  133. NormalizedNER *NormalizedNER `xml:" NormalizedNER,omitempty" json:"NormalizedNER,omitempty"`
  134. POS *POS `xml:" POS,omitempty" json:"POS,omitempty"`
  135. Speaker *Speaker `xml:" Speaker,omitempty" json:"Speaker,omitempty"`
  136. Timex *Timex `xml:" Timex,omitempty" json:"Timex,omitempty"`
  137. Word *Word `xml:" word,omitempty" json:"word,omitempty"`
  138. }
  139.  
  140. type Tokens struct {
  141. Token []*Token `xml:" token,omitempty" json:"token,omitempty"`
  142. }
  143.  
  144. type Word struct {
  145. Text string `xml:",chardata" json:",omitempty"`
  146. }
  147.  
  148. func main() {
  149. f, err := os.Open("../data/nlp.txt.xml")
  150. defer f.Close()
  151. if err != nil {
  152. panic(err)
  153. }
  154.  
  155. r := &Root{}
  156.  
  157. dec := xml.NewDecoder(f)
  158. err = dec.Decode(r)
  159. if err != nil {
  160. panic(err)
  161. }
  162.  
  163. // replace mentions to the representative mention.
  164. rep := ""
  165. for _, m := range r.Document.Coreference.Coreference.Mention {
  166. idx, _ := strconv.Atoi(m.Sentence[0].Text)
  167. s, _ := strconv.Atoi(m.Start.Text)
  168. e, _ := strconv.Atoi(m.End.Text)
  169.  
  170. if m.AttrRepresentative == "true" {
  171. rep = ""
  172. for i := s; i < e; i++ {
  173. if i != s {
  174. rep += " "
  175. }
  176. rep += r.Document.Sentences.Sentence[idx-1].Tokens.Token[i-1].Word.Text
  177. }
  178. } else {
  179. r.Document.Sentences.Sentence[idx-1].Tokens.Token[s-1].Word.Text =
  180. "[" + rep + "] (" + r.Document.Sentences.Sentence[idx-1].Tokens.Token[s-1].Word.Text
  181. r.Document.Sentences.Sentence[idx-1].Tokens.Token[e-2].Word.Text += ")"
  182. }
  183. }
  184.  
  185. // print the result
  186. for _, s := range r.Document.Sentences.Sentence {
  187. for _, t := range s.Tokens.Token {
  188. fmt.Print(t.Word.Text + " ")
  189. }
  190. fmt.Println("")
  191. }
  192. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement