Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Parser is a command line tool for parsing big XML file.
- // Author: Ali Shanaakh <hi@shal.dev>
- // Usage: go run parse.go -path=./15-ufop/15.1-EX_XML_EDR_UO_03.10.2019.xml
- package main
- import (
- "encoding/xml"
- "flag"
- "fmt"
- "io"
- "log"
- "os"
- "time"
- "golang.org/x/text/encoding/charmap"
- )
- type Founder struct {
- Founder string `xml:"FOUNDER"`
- }
- type Record struct {
- EDRPOU string `xml:"EDRPOU"`
- KVED string `xml:"KVED"`
- Boss string `xml:"BOSS"`
- Stan string `xml:"STAN"`
- ShortName string `xml:"SHORT_NAME"`
- Name string `xml:"NAME"`
- Address string `xml:"ADDRESS"`
- FoundingDocumentNum string `xml:"FOUNDING_DOCUMENT_NUM"`
- Founders []Founder `xml:"FOUNDERS"`
- }
- var (
- path = flag.String("path", "", "Path to XML file")
- )
- func windows1251(charset string, input io.Reader) (io.Reader, error) {
- switch charset {
- case "windows-1251":
- return charmap.Windows1251.NewDecoder().Reader(input), nil
- default:
- return nil, fmt.Errorf("unknown charset: %s", charset)
- }
- }
- func main() {
- flag.Parse()
- start := time.Now()
- f, err := os.Open(*path)
- if err != nil {
- log.Fatal(err)
- }
- stats := make(map[string]int)
- decoder := xml.NewDecoder(f)
- decoder.CharsetReader = windows1251
- for {
- // Read tokens from the XML document in a stream.
- t, _ := decoder.Token()
- if t == nil {
- break
- }
- // Inspect the type of the token just read.
- switch token := t.(type) {
- case xml.StartElement:
- stats[token.Name.Local]++
- if token.Name.Local == "RECORD" {
- var record Record
- decoder.DecodeElement(&record, &token)
- }
- }
- }
- for k, v := range stats {
- log.Println(k, v)
- }
- log.Println("Time of execution", time.Since(start))
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement