Advertisement
Guest User

Untitled

a guest
Aug 27th, 2015
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.52 KB | None | 0 0
  1. /*
  2. docgen.js
  3.  
  4. Generate sample MongoDB documents based on a schemaTemplate.
  5.  
  6. To run: "mongo doc_gen.js"
  7. */
  8.  
  9. /* -- EDIT BELOW -- configuration settings */
  10.  
  11. database = "performance";
  12. collection = "blog";
  13. numDocsToInsert = 1000000;
  14.  
  15. schemaTemplate = {
  16. "headline" : {
  17. "#RANDOM_STRING" : 100
  18. },
  19. "date" : {
  20. "#RANDOM_DATE_DAYS_BACK" : [0,365]
  21. },
  22. "url" : {
  23. "#RANDOM_STRING" : 100
  24. },
  25. "author" : {
  26. "#SUBDOCUMENT" : {
  27. "name" : {
  28. "#RANDOM_STRING" : 25
  29. },
  30. "title" : {
  31. "#RANDOM_STRING" : 25
  32. },
  33. "authorid" : {
  34. "#OBJECTID" : 1
  35. }
  36. }
  37. },
  38. "region" : { "#RAND" : [ "NA", "SA", "AF", "APAC", "APAC", "EU", "UNKNOWN" ] },
  39. "section" : {
  40. "#RAND" : [ "Technology", "Business", "Finance", "Lifestyle" ]
  41. },
  42. "tags" : {
  43. "#ARRAY_VALUES" : ["AAPL", "CUPERTINO", "GOOG", "MOUNTAINVIEW"],
  44. "#ARRAY_LENGTH" : [1,5]
  45. },
  46. "story_views" : {
  47. "#RAND_INT" : [0, 1000000]
  48. },
  49. "comments" : {
  50. "#ARRAY_VALUES" : [
  51. { "#SUBDOCUMENT" : {
  52. "name" : {
  53. "#RANDOM_STRING" : 50
  54. },
  55. "comment" : {
  56. "#RANDOM_LOREM" : 200
  57. },
  58. "date" : {
  59. "#RANDOM_DATE_DAYS_BACK" : [0,365]
  60. }
  61. }
  62. }
  63. ],
  64. "#ARRAY_LENGTH" : [1,5],
  65. "#INSERT_PERCENT" : 50
  66. }
  67. }
  68.  
  69.  
  70. /* -- DO NOT EDIT BELOW -- */
  71.  
  72. /*
  73.  
  74. -- The schemaTemplate above creates documents with the following form --
  75.  
  76. {
  77. "_id" : ObjectId("53459778c979defcb5db7cca"),
  78. "headline" : "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore",
  79. "date" : ISODate("2014-03-08T19:54:48.505Z"),
  80. "url" : "nODRzDCp9FNOHmwyi2DkN0xzoxoSRObLTMJZa9nxv2hQZlyrg1OgfNBVGDwyY3IpR05TNeqpy1rvn4TlakwmdXEKRa9WMVySOXFu",
  81. "author" : {
  82. "name" : "KptEeaxJ8GEGfXLZzyro7UyER",
  83. "title" : "cGZk2CpesND3tL6AI96FGJYx0",
  84. "authorid" : ObjectId("53459778c979defcb5db7cc9")
  85. },
  86. "section" : "Lifestyle",
  87. "tags" : [
  88. "AAPL",
  89. "MOUNTAINVIEW",
  90. "CUPERTINO",
  91. "AAPL"
  92. ],
  93. "story_views" : 485155,
  94. "comments" : [
  95. {
  96. "name" : "iN9OrDZpqsCxtmC9sPVjqcUO8PMnWWsusqK3umwbOIVIuieIPQ",
  97. "comment" : "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut ",
  98. "date" : ISODate("2013-08-03T18:54:48.506Z")
  99. },
  100. {
  101. "name" : "YBxrLoeJpPsbwZrbw4vOYD2PjedyoN2Myx4htw2FkpCwMrCjZZ",
  102. "comment" : "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut ",
  103. "date" : ISODate("2013-07-13T18:54:48.506Z")
  104. }
  105. ]
  106. }
  107.  
  108. -- Key/value schema example --
  109.  
  110. schemaTemplate = {
  111. "gameId" : "Motocross",
  112. "account_id" : {
  113. "#OBJECTID" : 1
  114. },
  115. "tags" : {
  116. "#ARRAY_VALUES" : [
  117. { "#SUBDOCUMENT" : {
  118. "key" : {
  119. "#RAND_INT" : [0,15]
  120. },
  121. "value" : {
  122. "#RAND_INT" : [0,1000000]
  123. },
  124. }
  125. }
  126. ],
  127. "#ARRAY_LENGTH" : [2,10],
  128. "#INSERT_PERCENT" : 100
  129. }
  130. }
  131.  
  132. output:
  133.  
  134. {
  135. "_id" : ObjectId("53446786235c5a35fa03faf7"),
  136. "gameId" : "Motocross",
  137. "account_id" : ObjectId("53446786235c5a35fa03faf6"),
  138. "tags" : [
  139. {
  140. "key" : 10,
  141. "value" : 470185
  142. },
  143. {
  144. "key" : 0,
  145. "value" : 718475
  146. },
  147. {
  148. "key" : 11,
  149. "value" : 829532
  150. }
  151. ]
  152. }
  153.  
  154.  
  155. Operators:
  156.  
  157. #OBJECTID - generate an objectid for the field, ignores value passed in
  158. { "account_id" : { "#OBJECTID" : 1 } }
  159.  
  160. #INSERT_PERCENT - percent of documents containing this field, percentage 0-100 is passed in
  161. { "address_line_2" : { "#VALUE" : "Suite 200", "#INSERT_PERCENT" : 25 }}
  162.  
  163. #RAND_INT - random integer between two values
  164. { "age" : { "#RAND_INT" : [0, 105] } }
  165.  
  166. #RAND - random value from an array
  167. { "position_title" : { "#RAND" : [0, "Editor", "NA", 34, true] } }
  168.  
  169. #SUBDOCUMENT - subdocument operator needed for nested subdocuments within other operators
  170. -- see example below
  171.  
  172. #ARRAY_LENGTH, #ARRAY_VALUES - create a randomly generated array based on array of possible values
  173. {
  174. "tags" : {
  175. "#ARRAY_VALUES" : [
  176. { "#SUBDOCUMENT" : {
  177. "key" : {
  178. "#RAND_INT" : [0,15]
  179. },
  180. "value" : {
  181. "#RAND_INT" : [0,1000000]
  182. },
  183. }
  184. }
  185. ],
  186. "#ARRAY_LENGTH" : [5,10] // random number of array elements between 5 and 10
  187. }
  188.  
  189. #RANDOM_STRING - create a random string of N characters
  190.  
  191. #RANDOM_LOREM - create a random string of N characters, using repeating Lorem ipsum ...
  192.  
  193. #RANDOM_DATE_DAYS_BACK - set the field to a random date within [N,M] days back
  194.  
  195. */
  196.  
  197. function generateDocument(schemaSpec) {
  198. var generatedDoc = {};
  199.  
  200. for (var key in schemaSpec) {
  201. if (schemaSpec.hasOwnProperty(key)) {
  202.  
  203. // embedded JSON object?
  204. if (typeof schemaSpec[key] == 'object') {
  205. generatedVal = getFieldValue(schemaSpec[key]);
  206.  
  207. if (generatedVal != null) {
  208. generatedDoc[key] = generatedVal;
  209. }
  210.  
  211. }
  212. else {
  213. generatedDoc[key] = schemaSpec[key];
  214. }
  215. }
  216. }
  217.  
  218. return generatedDoc;
  219. }
  220.  
  221. // Add new opeartors here
  222. function getFieldValue(subdoc) {
  223.  
  224. // "#INSERT_PRECENT" : 50
  225. if (subdoc.hasOwnProperty('#INSERT_PERCENT')) {
  226. val = subdoc['#INSERT_PERCENT'];
  227. randomNum = Math.floor(Math.random()*100);
  228. if (randomNum >= val) {
  229. return null;
  230. }
  231. }
  232.  
  233. // "#VALUE" : "Test value"
  234. if (subdoc.hasOwnProperty('#VALUE')) {
  235. return subdoc['#VALUE'];
  236. }
  237.  
  238. // "#RAND_INT" : [0, 100]
  239. if (subdoc.hasOwnProperty('#RAND_INT')) {
  240. val = subdoc['#RAND_INT'];
  241. if (val instanceof Array) {
  242. return Math.floor((Math.random()*val[1])+val[0]);
  243. }
  244. }
  245.  
  246. // "#RAND" : ["M", "F", "NA", 15]
  247. if (subdoc.hasOwnProperty('#RAND')) {
  248. val = subdoc['#RAND'];
  249. if (val instanceof Array) {
  250. return val[Math.floor(Math.random()*val.length)];
  251. }
  252. }
  253.  
  254. // "#OBJECTID" : 1
  255. if (subdoc.hasOwnProperty('#OBJECTID')) {
  256. return new ObjectId();
  257. }
  258.  
  259. // "#SUBDOCUMENT" : { "title" : "editor", "name" : "jason" }
  260. if (subdoc.hasOwnProperty('#SUBDOCUMENT')) {
  261. return generateDocument(subdoc['#SUBDOCUMENT']);
  262. }
  263.  
  264. // "#SUBDOCUMENT" : { "title" : "editor", "name" : "jason" }
  265. if (subdoc.hasOwnProperty('#ARRAY_LENGTH')) {
  266. arrayLength = subdoc['#ARRAY_LENGTH'];
  267. if (arrayLength instanceof Array) {
  268. arrayLength = Math.floor((Math.random()*arrayLength[1])+arrayLength[0])
  269. }
  270.  
  271. arrayValues = subdoc['#ARRAY_VALUES'];
  272. return constructArray(arrayValues, arrayLength);
  273. }
  274.  
  275. // "#RANDOM_STRING" : 50
  276. if (subdoc.hasOwnProperty('#RANDOM_STRING')) {
  277. val = subdoc['#RANDOM_STRING'];
  278. return getRandomString(val);
  279. }
  280.  
  281. // "#RANDOM_LOREM" : 100
  282. if (subdoc.hasOwnProperty('#RANDOM_LOREM')) {
  283. val = subdoc['#RANDOM_LOREM'];
  284. return getRandomLorem(val);
  285. }
  286.  
  287. // "#RANDOM_DATE_DAYS_BACK" : [0,100]
  288. if (subdoc.hasOwnProperty('#RANDOM_DATE_DAYS_BACK')) {
  289. val = subdoc['#RANDOM_DATE_DAYS_BACK'];
  290. if (val instanceof Array) {
  291. daysBack = Math.floor((Math.random()*val[1])+val[0]);
  292. var d = new Date();
  293. d.setDate(d.getDate()-daysBack);
  294. return d;
  295. }
  296. }
  297.  
  298. return subdoc;
  299. }
  300.  
  301. function constructArray(possibleArrayValuesArr, numItems) {
  302. var returnValue = new Array();
  303.  
  304. for (var i = 0; i<numItems; i++) {
  305. newValue = possibleArrayValuesArr[Math.floor(Math.random()*possibleArrayValuesArr.length)];
  306.  
  307. if ((typeof newValue == 'object') && newValue.hasOwnProperty('#SUBDOCUMENT')){
  308. newValue = getFieldValue(newValue);
  309. }
  310.  
  311. returnValue.push(newValue);
  312. }
  313.  
  314. return returnValue;
  315. }
  316.  
  317. function getRandomString(len){
  318. AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  319. returnValue = "";
  320.  
  321. for(i = 0; i < len; i++ )
  322. returnValue += ( AB.charAt( Math.floor(Math.random()*AB.length) ) );
  323.  
  324. return returnValue;
  325. }
  326.  
  327. function getRandomLorem(len){
  328. lorem = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum";
  329. loremCounter = 0;
  330. var returnValue = "";
  331.  
  332. for(i = 0; i < len; i++ ) {
  333. if (loremCounter == (lorem.length - 1)){
  334. loremCounter = 0;
  335. }
  336.  
  337. returnValue += ( lorem.charAt( loremCounter++ ) );
  338. }
  339.  
  340. return returnValue;
  341. }
  342.  
  343. // Generate and insert documents
  344. var dbRef = db.getSiblingDB(database);
  345. var bulk = dbRef.getCollection(collection).initializeUnorderedBulkOp();
  346.  
  347. for (var num_docs = 0; num_docs < numDocsToInsert; num_docs++) {
  348.  
  349. bulk.insert(generateDocument(schemaTemplate));
  350.  
  351. if (num_docs % 1000 == 0 && num_docs > 0) {
  352. print("Inserted: " + num_docs + " documents into " + database + "." + collection);
  353. bulk.execute();
  354. bulk = dbRef.getCollection(collection).initializeUnorderedBulkOp();
  355. }
  356. }
  357. print("Inserted: " + num_docs + " documents into " + database + "." + collection);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement