SHARE
TWEET

Untitled

a guest Apr 24th, 2019 69 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // Database splitter and flattener.
  2. // splits based on <fieldname>
  3. // flattens arrays and objects
  4.  
  5. // usage: In CLI: 'mongo <dbname> < dbflatten.js'
  6.  
  7. // Specify collection to be flattened
  8.  
  9. col = db.<collectionname>
  10.  
  11. // Batches inserts in size 'batchSize' to conserve memory.
  12. // Probably there's an optimal batchSize number.
  13. function createCollection(x, batchSize = 20000){
  14.    
  15.   // recursive document flattener
  16.   var newdoc = {}
  17.   function parseDocKeys(doc, key){
  18.     let keys = Object.keys(doc)
  19.     for (let k in keys){
  20.       let nextkey = key+'_'+keys[k]
  21.       let nextdoc = doc[keys[k]]
  22.      
  23.       // for some unknown reason, we have to treat the timestamp and _id fields separately, even though
  24.       //    they contain strings like many of the other fields
  25.       if (keys[k] == 'timestamp'){
  26.         newdoc[nextkey] = nextdoc
  27.       } else if (keys[k] == '_id'){
  28.         newdoc[nextkey] = nextdoc
  29.          
  30.       // Proceed with recursion if we find another Object
  31.       } else if (nextdoc instanceof Object) {
  32.         parseDocKeys(nextdoc, nextkey)
  33.          
  34.       // Default end of recursion
  35.       } else {
  36.         newdoc[nextkey] = nextdoc
  37.       }
  38.     }
  39.     return
  40.   };
  41.    
  42.     // get all docs for model
  43.     var docs = col.find({"<fieldname>": {$eq : x}})  
  44.    
  45.     // Create the collection, batch by batch
  46.     var head_key = x
  47.     var counter = 0
  48.     var docContainer = []
  49.    
  50.     while (docs.hasNext()){
  51.         while ((counter < batchSize) && (docs.hasNext()) ) {
  52.             counter += 1
  53.             doc = docs.next()
  54.             parseDocKeys(doc, head_key)
  55.             docContainer.push(newdoc)
  56.             newdoc = {}
  57.         }
  58.         counter = 0
  59.        
  60.         // Insert the batch in a new collection defined by model
  61.         // I don't know... insert or insertMany?
  62.         db[model].insert(docContainer)
  63.         docContainer = []
  64. }
  65.  
  66. // ==========================
  67. //
  68. //  Main loop
  69. //
  70.  
  71. //get splitter values from the db.<collectionname>
  72. var xs = col.distinct("<fieldname>")
  73. xsIterator = xs.entries()
  74. for (let m of xsIterator){
  75.     let xx = m[1]
  76.     print(xx)
  77.     createCollection(xx);
  78.        
  79.     // examine a sample flat document
  80.     printjson(db[xx].findOne())
  81. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top