Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "encoding/json"
- "fmt"
- "log"
- "regexp"
- "strconv"
- "strings"
- "github.com/dgraph-io/badger"
- //"io/ioutil"
- //"os"
- )
- //struct for Story
- type Story struct {
- Title string `json:"title"`
- Url string `json:"url"`
- Text string `json:"text"`
- Dead bool `json:"dead"`
- By string `json:"by"`
- Score string `json:"score"`
- Time string `json:"time"`
- Timestamp string `json:"timestamp"`
- Type string `json:"type"`
- Id string `json:"id"`
- Parent string `json:"parent"`
- Descendants string `json:"descendants"`
- Ranking string `json:"ranking"`
- Deleted bool `json:"deleted"`
- }
- func (h Story) encodeStory() []byte {
- data, err := json.Marshal(h)
- if err != nil {
- panic(err)
- }
- return data
- }
- func decodeStory(data []byte) (Story, error) {
- var h Story
- err := json.Unmarshal(data, &h)
- return h, err
- }
- type Ids struct {
- Ids []int `json:"ids"`
- }
- func (h Ids) encodeIds() []byte {
- data, err := json.Marshal(h)
- if err != nil {
- panic(err)
- }
- return data
- }
- func decodeIds(data []byte) (Ids, error) {
- var h Ids
- err := json.Unmarshal(data, &h)
- return h, err
- }
- func main() {
- // Open the Badger database located in the /tmp/badger directory.
- // Will be created if it doesn't exist.
- db, err := badger.Open(badger.DefaultOptions("./data/badger"))
- if err != nil {
- log.Fatal(err)
- }
- defer db.Close()
- dbTest, err := badger.Open(badger.DefaultOptions("./data/regex2"))
- if err != nil {
- log.Fatal(err)
- }
- defer dbTest.Close()
- err = dbTest.DropAll()
- if err != nil {
- fmt.Println("Error while dropping the database")
- }
- var batch int = 500000
- var keyCount int = 0
- idMap := make(map[string]Ids)
- reg, err := regexp.Compile("[^a-zA-Z0-9]+")
- if err != nil {
- log.Println(err)
- }
- txn := db.NewTransaction(true)
- defer txn.Discard()
- err = db.View(func(txn *badger.Txn) error {
- opts := badger.DefaultIteratorOptions
- opts.PrefetchSize = 10
- it := txn.NewIterator(opts)
- defer it.Close()
- for it.Rewind(); it.Valid(); it.Next() {
- item := it.Item()
- err := item.Value(func(v []byte) error {
- var s Story
- s, err1 := decodeStory(v)
- //check for errors
- if err1 != nil {
- log.Fatal(err)
- }
- //fmt.Println(s.Title)
- word := strings.Fields(s.Title)
- //var ids []string
- for _, v := range word {
- v := reg.ReplaceAllString(v, "")
- err = dbTest.View(func(txn *badger.Txn) error {
- item, err := txn.Get([]byte("_index::title::" + strings.ToLower(v)))
- if err != nil {
- return err
- }
- //if val found retrieve val
- val, err := item.ValueCopy(nil)
- if err != nil {
- return err
- }
- ids, err1 := decodeIds(val)
- if err1 != nil {
- log.Fatal("Error decoding")
- }
- idInt, err1 := strconv.Atoi(s.Id)
- if err1 != nil {
- fmt.Println("error")
- }
- ids.Ids = append(ids.Ids, idInt)
- //make a single transaction
- // Start a writable transaction.
- txn1 := dbTest.NewTransaction(true)
- defer txn1.Discard()
- // Use the transaction...
- err = txn1.Set([]byte("_index::title::"+strings.ToLower(v)), ids.encodeIds())
- if err != nil {
- return err
- }
- // Commit the transaction and check for error.
- if err := txn1.Commit(); err != nil {
- if err != nil {
- fmt.Println(err)
- }
- }
- return nil
- })
- //if val not found set a new key
- if err != nil {
- newIDInt, err2 := strconv.Atoi(s.Id)
- if err2 != nil {
- fmt.Println(err2)
- }
- var id []int
- id = append(id, newIDInt)
- idMap["_index::title::"+strings.ToLower(v)] = Ids{
- Ids: id,
- }
- keyCount++
- if keyCount > batch {
- keyCount = 0
- //write batch
- wb := dbTest.NewWriteBatch()
- defer wb.Cancel()
- for k, v := range idMap {
- err := wb.Set([]byte(k), v.encodeIds())
- if err != nil {
- log.Fatal(err)
- }
- }
- fmt.Println("batch commit")
- err1 := wb.Flush()
- if err1 != nil {
- log.Fatal(err1)
- }
- //reinitialize idMap
- idMap = make(map[string]Ids)
- }
- }
- }
- return nil
- })
- if err != nil {
- return err
- }
- }
- //write batch for remaining words
- wb := dbTest.NewWriteBatch()
- defer wb.Cancel()
- for k, v := range idMap {
- err := wb.Set([]byte(k), v.encodeIds())
- if err != nil {
- log.Fatal(err)
- }
- }
- fmt.Println("batch commit remaining")
- err1 := wb.Flush()
- if err1 != nil {
- log.Fatal(err1)
- }
- return nil
- })
- //error handle for View
- if err != nil {
- log.Fatal(err)
- }
- wb := dbTest.NewWriteBatch()
- defer wb.Cancel()
- for k, v := range idMap {
- err := wb.Set([]byte(k), v.encodeIds())
- if err != nil {
- log.Fatal(err)
- }
- }
- /*
- err = dbTest.View(func(txn *badger.Txn) error {
- item, err := txn.Get([]byte("_index::title::nuclear"))
- if err != nil {
- return err
- }
- val, err := item.ValueCopy(nil)
- if err != nil {
- return err
- }
- fmt.Printf("%s\n", string(val))
- return nil
- })
- if err != nil {
- fmt.Println("NotFound")
- }
- */
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement