Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "os"
- "bufio"
- "io"
- "bytes"
- "fmt"
- "errors"
- "strconv"
- "strings"
- )
- const (
- rootNodeName = "this is the root node name, which is must not be used in real data"
- wildCardName = "*"
- delim = 29
- )
- // Analyzer analysis and store the path, can return the path pattern
- type Analyzer = *Node
- // NewAnalyzer create a new Analyzer
- func NewAnalyzer(maxLeaf int) Analyzer {
- return &Node{
- name: rootNodeName,
- deep: 0,
- maxLeaf: maxLeaf,
- children: make([]*Node, 0, maxLeaf),
- }
- }
- func Load(file string, maxLeaf int) (Analyzer, error) {
- f, err := os.Open(file)
- if err != nil {
- return nil, err
- }
- defer f.Close()
- reader := bufio.NewReader(f)
- data := make([]string, 0, 10000)
- for {
- item, err := reader.ReadBytes(delim);
- if err != nil && err != io.EOF{
- return nil, err
- }
- if len(item) > 0 {
- data = append(data, string(item[:len(item)-1]))
- }
- if err == io.EOF {
- break
- }
- }
- return load(data, maxLeaf)
- }
- func (analyzer Analyzer) Apply(items []string) []string {
- node := analyzer
- for _, item := range items {
- if item == "" {
- continue
- }
- item := item
- node = node.addChildIfNotExist(node.newChild(item))
- }
- return node.path()
- }
- func (analyzer Analyzer) patternOf(items []string) []string {
- node := analyzer
- fields := make([]string, 0, len(items))
- for _, item := range items {
- item := item
- matched := false
- for _, n := range node.children {
- if n.name == item || n.name == wildCardName {
- fields = append(fields, n.name)
- matched = true
- }
- }
- if !matched {
- return items
- }
- }
- return fields
- }
- func (analyzer Analyzer) Dump(file string) ( error){
- f, err := os.Create(file)
- if err != nil {
- return err
- }
- defer f.Close()
- data := make([]string, 0, 10000)
- buf := bytes.NewBuffer(nil)
- for _, item := range analyzer.dump(data, -1) {
- if _, err := buf.WriteString(item); err != nil {
- return err
- }
- if err := buf.WriteByte(delim); err != nil {
- return err
- }
- }
- _, err = io.Copy(f, buf)
- return err
- }
- // Node represents a leaf in analyzer tree
- type Node struct {
- name string
- deep int
- maxLeaf int
- parent *Node
- children []*Node
- }
- func (node *Node) addChildIfNotExist(child *Node) *Node {
- for _, n := range node.children {
- if n.name == child.name || n.name == wildCardName {
- // 重名节点已存在, 将自己的儿子节点交给它
- for _, c := range child.children {
- c := c
- n.addChildIfNotExist(c)
- }
- return n
- }
- }
- // 超出最大节点数, 合并
- if len(node.children) >= node.maxLeaf {
- node.mergeChildren()
- return node.children[0]
- }
- node.children = append(node.children, child)
- return child
- }
- func (node *Node) newChild(name string) *Node {
- return &Node{
- name: name,
- parent: node,
- children: make([]*Node, 0, node.maxLeaf),
- maxLeaf: node.maxLeaf,
- deep: node.deep + 1,
- }
- }
- func (node *Node) mergeChildren() *Node {
- newNode := node.newChild(wildCardName)
- // 该节点的所有子节点, 都要归并成一个通配节点, 而这些子节点的子节点, 都要加到新的通配节点上.
- for _, child := range node.children {
- for _, child2 := range child.children {
- newNode.addChildIfNotExist(child2)
- }
- }
- node.children = []*Node{newNode}
- return newNode
- }
- func (node *Node) isRoot() bool {
- return node.name == rootNodeName
- }
- func (node *Node) path() []string {
- fields := make([]string, node.deep)
- i := node.deep - 1
- for node != nil && !node.isRoot() {
- fields[i] = node.name
- i--
- node = node.parent
- }
- return fields
- }
- func (node *Node) dump(data []string, parentIndex int) []string {
- data = append(data, node.name, fmt.Sprintf("%d", parentIndex))
- index := len(data) - 2
- for _, child := range node.children {
- data = child.dump(data, index)
- }
- return data
- }
- func load(data []string, maxLeaf int) (*Node, error){
- if len(data) < 2 {
- return nil, errors.New("data is too short")
- }
- nodes := make([]*Node, 0, len(data))
- for i, item := range data {
- var node *Node
- if i % 2 == 0 {
- pi, err := strconv.Atoi(data[i+1])
- if err != nil {
- return nil, fmt.Errorf("\"%s\" at index %d is not a number", data[i+1], i+1)
- }
- if pi == -1 { // root node
- node = &Node{
- name: item,
- deep: 0,
- maxLeaf: maxLeaf,
- }
- } else {
- node = &Node{
- name: item,
- parent: nodes[pi],
- maxLeaf: maxLeaf,
- }
- node.deep = node.parent.deep + 1
- node.parent.children = append(node.parent.children, node)
- }
- }
- nodes = append(nodes, node)
- }
- return nodes[0], nil
- }
- func main() {
- paths := []string{
- "a/b/c/d",
- "a/w/c/d",
- "a/l/c/d",
- "a/d/i/d",
- "a/1/i/d",
- "a/8/j/d",
- "a/9/j/d",
- "z/4/k/f",
- "",
- "/",
- "a",
- }
- analyzer := NewAnalyzer(3)
- for _, path := range paths {
- fields := strings.Split(path, "/")
- result := analyzer.Apply(fields)
- fmt.Println(path, "=>", strings.Join(result, "/"))
- }
- result := analyzer.Apply(nil)
- fmt.Println("<nil> =>", strings.Join(result, "/"))
- result = analyzer.Apply([]string{})
- fmt.Println("<empty> =>", strings.Join(result, "/"))
- if err := analyzer.Dump("/tmp/data"); err != nil {
- panic(err)
- }
- analyzer, err := Load("/tmp/data", 3)
- if err != nil {
- panic(err)
- }
- }
Add Comment
Please, Sign In to add comment