Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.75 KB | None | 0 0
  1. // Parser is a command line tool for parsing big XML file.
  2. // Author: Ali Shanaakh <hi@shal.dev>
  3. // Usage: go run parse.go -path=./15-ufop/15.1-EX_XML_EDR_UO_03.10.2019.xml
  4. package main
  5.  
  6. import (
  7. "encoding/xml"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "log"
  12. "os"
  13. "time"
  14.  
  15. "golang.org/x/text/encoding/charmap"
  16. )
  17.  
  18. type Founder struct {
  19. Founder string `xml:"FOUNDER"`
  20. }
  21.  
  22. type Record struct {
  23. EDRPOU string `xml:"EDRPOU"`
  24. KVED string `xml:"KVED"`
  25. Boss string `xml:"BOSS"`
  26. Stan string `xml:"STAN"`
  27. ShortName string `xml:"SHORT_NAME"`
  28. Name string `xml:"NAME"`
  29. Address string `xml:"ADDRESS"`
  30. FoundingDocumentNum string `xml:"FOUNDING_DOCUMENT_NUM"`
  31. Founders []Founder `xml:"FOUNDERS"`
  32. }
  33.  
  34. var (
  35. path = flag.String("path", "", "Path to XML file")
  36. )
  37.  
  38. func windows1251(charset string, input io.Reader) (io.Reader, error) {
  39. switch charset {
  40. case "windows-1251":
  41. return charmap.Windows1251.NewDecoder().Reader(input), nil
  42. default:
  43. return nil, fmt.Errorf("unknown charset: %s", charset)
  44. }
  45. }
  46.  
  47. func main() {
  48. flag.Parse()
  49.  
  50. start := time.Now()
  51.  
  52. f, err := os.Open(*path)
  53. if err != nil {
  54. log.Fatal(err)
  55. }
  56.  
  57. stats := make(map[string]int)
  58. decoder := xml.NewDecoder(f)
  59. decoder.CharsetReader = windows1251
  60.  
  61. for {
  62. // Read tokens from the XML document in a stream.
  63. t, _ := decoder.Token()
  64. if t == nil {
  65. break
  66. }
  67.  
  68. // Inspect the type of the token just read.
  69. switch token := t.(type) {
  70. case xml.StartElement:
  71. stats[token.Name.Local]++
  72. if token.Name.Local == "RECORD" {
  73. var record Record
  74. decoder.DecodeElement(&record, &token)
  75. }
  76. }
  77. }
  78.  
  79. for k, v := range stats {
  80. log.Println(k, v)
  81. }
  82.  
  83. log.Println("Time of execution", time.Since(start))
  84. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement