Advertisement
Guest User

rege

a guest
Oct 18th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.47 KB | None | 0 0
  1. package main
  2.  
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10.  
  11. "github.com/dgraph-io/badger"
  12. //"io/ioutil"
  13. //"os"
  14. )
  15.  
  16. //struct for Story
  17. type Story struct {
  18. Title string `json:"title"`
  19. Url string `json:"url"`
  20. Text string `json:"text"`
  21. Dead bool `json:"dead"`
  22. By string `json:"by"`
  23. Score string `json:"score"`
  24. Time string `json:"time"`
  25. Timestamp string `json:"timestamp"`
  26. Type string `json:"type"`
  27. Id string `json:"id"`
  28. Parent string `json:"parent"`
  29. Descendants string `json:"descendants"`
  30. Ranking string `json:"ranking"`
  31. Deleted bool `json:"deleted"`
  32. }
  33.  
  34. func (h Story) encodeStory() []byte {
  35. data, err := json.Marshal(h)
  36. if err != nil {
  37. panic(err)
  38. }
  39.  
  40. return data
  41. }
  42.  
  43. func decodeStory(data []byte) (Story, error) {
  44. var h Story
  45. err := json.Unmarshal(data, &h)
  46. return h, err
  47. }
  48.  
  49. type Ids struct {
  50. Ids []int `json:"ids"`
  51. }
  52.  
  53. func (h Ids) encodeIds() []byte {
  54. data, err := json.Marshal(h)
  55. if err != nil {
  56. panic(err)
  57. }
  58.  
  59. return data
  60. }
  61.  
  62. func decodeIds(data []byte) (Ids, error) {
  63. var h Ids
  64. err := json.Unmarshal(data, &h)
  65. return h, err
  66. }
  67.  
  68. func main() {
  69.  
  70. // Open the Badger database located in the /tmp/badger directory.
  71. // Will be created if it doesn't exist.
  72.  
  73. db, err := badger.Open(badger.DefaultOptions("./data/badger"))
  74. if err != nil {
  75. log.Fatal(err)
  76. }
  77. defer db.Close()
  78.  
  79. dbTest, err := badger.Open(badger.DefaultOptions("./data/regex2"))
  80. if err != nil {
  81. log.Fatal(err)
  82. }
  83. defer dbTest.Close()
  84.  
  85. err = dbTest.DropAll()
  86. if err != nil {
  87. fmt.Println("Error while dropping the database")
  88. }
  89.  
  90. var batch int = 100000
  91.  
  92. var keyCount int = 0
  93.  
  94. idMap := make(map[string]Ids)
  95.  
  96. reg, err := regexp.Compile("[^a-zA-Z0-9]+")
  97. if err != nil {
  98. log.Println(err)
  99. }
  100.  
  101. txn := db.NewTransaction(true)
  102. defer txn.Discard()
  103.  
  104. err = db.View(func(txn *badger.Txn) error {
  105.  
  106. opts := badger.DefaultIteratorOptions
  107. opts.PrefetchSize = 10
  108. opts.Prefix=[]byte("story:")
  109. it := txn.NewIterator(opts)
  110. defer it.Close()
  111. for it.Rewind(); it.Valid(); it.Next() {
  112.  
  113. item := it.Item()
  114.  
  115. err := item.Value(func(v []byte) error {
  116. var s Story
  117. s, err1 := decodeStory(v)
  118.  
  119. //check for errors
  120. if err1 != nil {
  121. log.Fatal(err)
  122. }
  123.  
  124. //fmt.Println(s.Title)
  125.  
  126. word := strings.Fields(s.Title)
  127.  
  128. //var ids []string
  129.  
  130. for _, v := range word {
  131.  
  132. v := reg.ReplaceAllString(v, "")
  133.  
  134. err = dbTest.View(func(txn *badger.Txn) error {
  135. item, err := txn.Get([]byte("_index::title::" + strings.ToLower(v)))
  136. if err != nil {
  137. return err
  138. }
  139.  
  140. //if val found retrieve val
  141.  
  142. val, err := item.ValueCopy(nil)
  143. if err != nil {
  144. return err
  145. }
  146.  
  147. ids, err1 := decodeIds(val)
  148.  
  149. if err1 != nil {
  150. log.Fatal("Error decoding")
  151. }
  152.  
  153. idInt, err1 := strconv.Atoi(s.Id)
  154.  
  155. if err1 != nil {
  156. fmt.Println("error")
  157. }
  158.  
  159. ids.Ids = append(ids.Ids, idInt)
  160.  
  161. //make a single transaction
  162.  
  163. // Start a writable transaction.
  164. txn1 := dbTest.NewTransaction(true)
  165. defer txn1.Discard()
  166.  
  167. // Use the transaction...
  168. err = txn1.Set([]byte("_index::title::"+strings.ToLower(v)), ids.encodeIds())
  169. if err != nil {
  170. return err
  171. }
  172.  
  173. // Commit the transaction and check for error.
  174. if err := txn1.Commit(); err != nil {
  175. if err != nil {
  176. fmt.Println(err)
  177. }
  178. }
  179.  
  180. return nil
  181. })
  182.  
  183. //if val not found set a new key
  184. if err != nil {
  185.  
  186. newIDInt, err2 := strconv.Atoi(s.Id)
  187.  
  188. if err2 != nil {
  189. fmt.Println(err2)
  190. }
  191.  
  192. var id []int
  193.  
  194. id = append(id, newIDInt)
  195.  
  196. idMap["_index::title::"+strings.ToLower(v)] = Ids{
  197. Ids: id,
  198. }
  199.  
  200. keyCount++
  201.  
  202. if keyCount > batch {
  203. keyCount = 0
  204. //write batch
  205. wb := dbTest.NewWriteBatch()
  206. defer wb.Cancel()
  207.  
  208. for k, v := range idMap {
  209.  
  210. err := wb.Set([]byte(k), v.encodeIds())
  211.  
  212. if err != nil {
  213. log.Fatal(err)
  214. }
  215. }
  216. fmt.Println("batch commit")
  217.  
  218. err1 := wb.Flush()
  219.  
  220. if err1 != nil {
  221. log.Fatal(err1)
  222. }
  223.  
  224. //reinitialize idMap
  225. idMap = make(map[string]Ids)
  226.  
  227. }
  228.  
  229. }
  230.  
  231. }
  232.  
  233. return nil
  234. })
  235.  
  236. if err != nil {
  237. return err
  238. }
  239.  
  240. }
  241. //write batch for remaining words
  242. wb := dbTest.NewWriteBatch()
  243. defer wb.Cancel()
  244.  
  245. for k, v := range idMap {
  246.  
  247. err := wb.Set([]byte(k), v.encodeIds())
  248.  
  249. if err != nil {
  250. log.Fatal(err)
  251. }
  252. }
  253. fmt.Println("batch commit remaining")
  254. err1 := wb.Flush()
  255.  
  256. if err1 != nil {
  257. log.Fatal(err1)
  258. }
  259.  
  260. return nil
  261. })
  262.  
  263. //error handle for View
  264. if err != nil {
  265. log.Fatal(err)
  266. }
  267.  
  268. wb := dbTest.NewWriteBatch()
  269. defer wb.Cancel()
  270.  
  271. for k, v := range idMap {
  272.  
  273. err := wb.Set([]byte(k), v.encodeIds())
  274.  
  275. if err != nil {
  276. log.Fatal(err)
  277. }
  278. }
  279.  
  280. /*
  281. err = dbTest.View(func(txn *badger.Txn) error {
  282. item, err := txn.Get([]byte("_index::title::nuclear"))
  283. if err != nil {
  284. return err
  285. }
  286. val, err := item.ValueCopy(nil)
  287. if err != nil {
  288. return err
  289. }
  290. fmt.Printf("%s\n", string(val))
  291. return nil
  292. })
  293.  
  294. if err != nil {
  295. fmt.Println("NotFound")
  296. }
  297. */
  298.  
  299. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement