Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "bufio"
- "bytes"
- "compress/gzip"
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "strconv"
- "strings"
- cbor "github.com/2tvenom/cbor"
- )
- type Vectors map[string][]float64
- type MapOfSimilarity map[float64]string
- func LoadVectors(inputFile string) Vectors {
- vectors := Vectors{}
- // load the Glove TXT file
- file, err := os.Open(inputFile)
- if err != nil {
- log.Fatal(err)
- }
- defer file.Close()
- scanner := bufio.NewScanner(file)
- if err := scanner.Err(); err != nil {
- log.Fatal(err)
- }
- for scanner.Scan() {
- stringSlice := strings.Split(scanner.Text(), " ")
- // length is complete slice minus 1
- for i := 1; i < (len(stringSlice) - 1); i++ {
- // parse the string to a float
- float, _ := strconv.ParseFloat(stringSlice[i], 64)
- // add the float to the vector
- vectors[stringSlice[0]] = append(vectors[stringSlice[0]], float)
- //vectors[stringSlice[0]][i] = float
- }
- }
- return vectors
- }
- func main() {
- // Load vectors
- vectors := Vectors{}
- vectors = LoadVectors("glove.6B.300d.txt")
- // Create encoder and marshal
- var buffTest bytes.Buffer
- encoder := cbor.NewEncoder(&buffTest)
- ok, error := encoder.Marshal(vectors)
- //check binary string
- if !ok {
- fmt.Errorf("Error decoding %s", error)
- }
- fmt.Printf("Encoding to CBOR = done")
- // Create GZIPed version
- var b bytes.Buffer
- w := gzip.NewWriter(&b)
- w.Write(buffTest.Bytes())
- w.Close()
- // Create output file
- fmt.Println("Creating output file", "vectors.cbor.gz")
- err := ioutil.WriteFile("vectors.cbor.gz", b.Bytes(), 0644)
- if err != nil {
- panic(err)
- }
- fmt.Println("DONE")
- }
Add Comment
Please, Sign In to add comment