Advertisement
Guest User

Untitled

a guest
Mar 24th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "os"
  5. "io"
  6. "bufio"
  7. "strings"
  8. "sort"
  9. "strconv"
  10. "github.com/gonum/plot"
  11. "github.com/gonum/plot/plotter"
  12. "github.com/gonum/plot/plotutil"
  13. "github.com/gonum/plot/vg"
  14. )
  15.  
  16. type sortedMap struct {
  17. m map[int]int
  18. s []int
  19. }
  20.  
  21. func (sm *sortedMap) Len() int {
  22. return len(sm.m)
  23. }
  24. func (sm *sortedMap) Less(i, j int) bool {
  25. return sm.m[sm.s[i]] > sm.m[sm.s[j]]
  26. }
  27. func (sm *sortedMap) Swap(i, j int) {
  28. sm.s[i], sm.s[j] = sm.s[j], sm.s[i]
  29. }
  30.  
  31. // sortedKeys returns the slice []int which is sorted by the count in map.
  32. // Eventually, we should hold the sorted slice []int, it's enough to sort.
  33. // func (sm *sortedMap) sortedKeys(m map[int]int) []int {
  34. func sortedKeys(m map[int]int) []int {
  35. sm := new(sortedMap)
  36. sm.m = m
  37. sm.s = make([]int, len(m))
  38. i := 0
  39. for key, _ := range m {
  40. sm.s[i] = key
  41. i++
  42. }
  43.  
  44. sort.Sort(sm)
  45.  
  46. return sm.s
  47. }
  48.  
  49. func drawBarChart(val []float64, label []string) {
  50. group := plotter.Values(val)
  51.  
  52. p, err := plot.New()
  53. if err != nil {
  54. panic(err)
  55. }
  56. p.Title.Text = "The Relationship of Frequency and Word count"
  57. p.X.Label.Text = "Frequency"
  58. p.Y.Label.Text = "Word count"
  59.  
  60. w := vg.Points(1)
  61.  
  62. bars, err := plotter.NewBarChart(group, w)
  63. if err != nil {
  64. panic(err)
  65. }
  66.  
  67. bars.LineStyle.Width = vg.Length(0)
  68. bars.Color = plotutil.Color(0)
  69.  
  70. p.Add(bars)
  71. p.NominalX(label...) // gonum/plot does NOT support Japanse.
  72.  
  73. if err := p.Save(5*vg.Inch, 3*vg.Inch, "q38.png"); err != nil {
  74. panic(err)
  75. }
  76. }
  77.  
  78.  
  79. func main() {
  80. f, err := os.Open("../data/neko.txt.mecab")
  81. defer f.Close()
  82. if err != nil {
  83. panic(err)
  84. }
  85.  
  86. r := bufio.NewReader(f)
  87.  
  88. sents := make([][]map[string]string, 0)
  89. sent := make([]map[string]string, 0)
  90.  
  91. for {
  92. b, _, err := r.ReadLine()
  93. if err == io.EOF {
  94. break
  95. }
  96.  
  97. // store morpheme which is not "EOS" into maps
  98. if string(b) != "EOS" {
  99. // split by tab and comma
  100. tmp := strings.Split(string(b), "\t")
  101. m := append(tmp[:1], strings.Split(tmp[1], ",")...)
  102.  
  103. morpheme := make(map[string]string)
  104.  
  105. morpheme["surface"] = m[0]
  106. morpheme["base"] = m[7]
  107. morpheme["pos"] = m[1]
  108. morpheme["pos1"] = m[2]
  109.  
  110. sent = append(sent, morpheme)
  111. } else { // if we find "EOS", store sentence to sentences and initialize the sent
  112. if len(sent) > 0 { // for appearing "EOS" continuously
  113. sents = append(sents, sent)
  114. sent = make([]map[string]string, 0)
  115. }
  116. }
  117.  
  118. }
  119.  
  120. // count the number of the morpheme has same base
  121. freq := make(map[string]int)
  122. for _, sent := range sents {
  123. for _, m := range sent {
  124. freq[m["base"]]++
  125. }
  126. }
  127.  
  128. // count the number of the kind of word
  129. hist := make(map[int]int)
  130.  
  131. for _, v := range freq {
  132. hist[v]++
  133. }
  134.  
  135. // draw Hist chart
  136. res := sortedKeys(hist)
  137. var val []float64
  138. var label []string
  139. for _, v := range res {
  140. val = append(val, float64(hist[v]))
  141.  
  142. // print label of x axis only a nice round number
  143. if v == 10 || v == 30 || v == 100 {
  144. // fmt.Println(strconv.Itoa(v))
  145. label = append(label, strconv.Itoa(v))
  146. } else {
  147. label = append(label, "")
  148. }
  149. }
  150. drawBarChart(val, label)
  151.  
  152. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement