Advertisement
gisejo

Untitled

Nov 1st, 2019
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.89 KB | None | 0 0
  1. //
  2. // graph_generator.cpp
  3. // graph_library
  4. //
  5. // Created by Elijah Afanasiev on 17.08.17.
  6. // Copyright В© 2017 MSU. All rights reserved.
  7. //
  8. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  9.  
  10. #include <omp.h>
  11. #include <iostream>
  12. #include <string>
  13. #include <fstream>
  14. #include <algorithm>
  15.  
  16. #include <stdio.h>
  17. #include <math.h>
  18. #include <vector>
  19.  
  20. using namespace std;
  21.  
  22. #ifndef uint32_t
  23. #define uint32_t int
  24. #endif
  25.  
  26. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  27.  
  28. template <typename _T>
  29. _T rand_uniform_val(int _upper_border)
  30. {
  31. return (_T)(rand() % _upper_border);
  32. }
  33.  
  34. template <>
  35. int rand_uniform_val(int _upper_border)
  36. {
  37. return (int)(rand() % _upper_border);
  38. }
  39.  
  40. template <>
  41. float rand_uniform_val(int _upper_border)
  42. {
  43. return (float)(rand() % _upper_border) / _upper_border;
  44. }
  45.  
  46. template <>
  47. double rand_uniform_val(int _upper_border)
  48. {
  49. return (double)(rand() % _upper_border) / _upper_border;
  50. }
  51.  
  52. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  53.  
  54. bool save_to_file(int *_src_ids, int *_dst_ids, float *_weights, int _vertices_count, long long _edges_count, string _file_name, bool _weighted)
  55. {
  56. ofstream graph_file(_file_name.c_str(), ios::binary);
  57. if (!graph_file.is_open())
  58. return false;
  59.  
  60. graph_file.write(reinterpret_cast<const char*>(&_vertices_count), sizeof(int));
  61. graph_file.write(reinterpret_cast<const char*>(&_edges_count), sizeof(long long));
  62.  
  63. for (long long i = 0; i < _edges_count; i++)
  64. {
  65. graph_file.write(reinterpret_cast<const char*>(&_src_ids[i]), sizeof(int));
  66. graph_file.write(reinterpret_cast<const char*>(&_dst_ids[i]), sizeof(int));
  67.  
  68. //if(_weighted)
  69. graph_file.write(reinterpret_cast<const char*>(&_weights[i]), sizeof(float));
  70. }
  71.  
  72. graph_file.close();
  73. return true;
  74. }
  75.  
  76. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  77.  
  78. void get_cmd_params(int _argc, char **_argv, int &_scale, int &_avg_degree, string &_file_name, string &_type, bool &_directed, bool &_weighted, int &_threads)
  79. {
  80. // set deafualt params
  81. _scale = 14;
  82. _avg_degree = 32;
  83. _file_name = "graph.data";
  84. _type = "RMAT";
  85. _directed = true;
  86. _weighted = true;
  87. _threads = omp_get_max_threads();
  88.  
  89. // get params from cmd line
  90. for (int i = 1; i < _argc; i++)
  91. {
  92. string option(_argv[i]);
  93.  
  94. if (option.compare("-s") == 0)
  95. {
  96. _scale = atoi(_argv[++i]);
  97. }
  98.  
  99. if (option.compare("-e") == 0)
  100. {
  101. _avg_degree = atoi(_argv[++i]);
  102. }
  103.  
  104. if (option.compare("-file") == 0)
  105. {
  106. _file_name = _argv[++i];
  107. }
  108.  
  109. if (option.compare("-type") == 0)
  110. {
  111. _type = _argv[++i];
  112. }
  113.  
  114. if (option.compare("-undirected") == 0)
  115. {
  116. _directed = false;
  117. }
  118.  
  119. if (option.compare("-unweighted") == 0)
  120. {
  121. _weighted = false;
  122. }
  123.  
  124. if (option.compare("-t") == 0)
  125. {
  126. _threads = atoi(_argv[++i]);
  127. }
  128. }
  129.  
  130. }
  131.  
  132.  
  133. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  134.  
  135. void SSCA2(int *_src_ids, int *_dst_ids, float *_weights, int _vertices_count, long _max_clique_size, bool _directed, bool _weighted, long long &_edges_count)
  136. {
  137. uint32_t TotVertices;
  138. uint32_t* clusterSizes;
  139. uint32_t* firstVsInCluster;
  140. uint32_t estTotClusters, totClusters;
  141.  
  142. uint32_t *startVertex, *endVertex;
  143. uint32_t numEdges;
  144. uint32_t numIntraClusterEdges, numInterClusterEdges;
  145. float* weights;
  146. float MinWeight, MaxWeight;
  147. uint32_t MaxCliqueSize;
  148. uint32_t MaxParallelEdges = 1;
  149. double ProbUnidirectional = 1.0;
  150. double ProbIntercliqueEdges = 0.1;
  151. uint32_t i_cluster, currCluster;
  152. uint32_t *startV, *endV, *d;
  153. uint32_t estNumEdges, edgeNum;
  154.  
  155. uint32_t i, j, k, t, t1, t2, dsize;
  156. double p;
  157. uint32_t* permV;
  158.  
  159. // initialize RNG
  160.  
  161. MinWeight = 0.0;
  162. MaxWeight = 1.0;
  163. TotVertices = _vertices_count;
  164.  
  165. // generate clusters
  166. MaxCliqueSize = _max_clique_size;
  167. estTotClusters = 1.25 * TotVertices / (MaxCliqueSize/2);
  168. clusterSizes = (uint32_t *) malloc(estTotClusters*sizeof(uint32_t));
  169.  
  170. for(i = 0; i < estTotClusters; i++)
  171. {
  172. clusterSizes[i] = 1 + (rand_uniform_val<double>(10000.0) *MaxCliqueSize);
  173. }
  174.  
  175. totClusters = 0;
  176.  
  177. firstVsInCluster = (uint32_t *) malloc(estTotClusters*sizeof(uint32_t));
  178.  
  179. firstVsInCluster[0] = 0;
  180. for (i=1; i<estTotClusters; i++)
  181. {
  182. firstVsInCluster[i] = firstVsInCluster[i-1] + clusterSizes[i-1];
  183. if (firstVsInCluster[i] > TotVertices-1)
  184. break;
  185. }
  186.  
  187. totClusters = i;
  188.  
  189. clusterSizes[totClusters-1] = TotVertices - firstVsInCluster[totClusters-1];
  190.  
  191. // generate intra-cluster edges
  192. estNumEdges = (uint32_t) ((TotVertices * (double) MaxCliqueSize * (2-ProbUnidirectional)/2) +
  193. (TotVertices * (double) ProbIntercliqueEdges/(1-ProbIntercliqueEdges))) * (1+MaxParallelEdges/2);
  194.  
  195. if ((estNumEdges > ((1<<30) - 1)) && (sizeof(uint32_t*) < 8))
  196. {
  197. fprintf(stderr, "ERROR: long* should be 8 bytes for this problem size\n");
  198. fprintf(stderr, "\tPlease recompile the code in 64-bit mode\n");
  199. exit(-1);
  200. }
  201.  
  202. edgeNum = 0;
  203. p = ProbUnidirectional;
  204.  
  205. fprintf (stderr, "[allocating %3.3f GB memory ... ", (double) 2*estNumEdges*8/(1<<30));
  206.  
  207. startV = (uint32_t *) malloc(estNumEdges*sizeof(uint32_t));
  208. endV = (uint32_t *) malloc(estNumEdges*sizeof(uint32_t));
  209.  
  210. fprintf(stderr, "done] ");
  211.  
  212. for (i_cluster=0; i_cluster < totClusters; i_cluster++)
  213. {
  214. for (i = 0; i < clusterSizes[i_cluster]; i++)
  215. {
  216. for (j = 0; j < i; j++)
  217. {
  218. for (k = 0; k<1 + ((uint32_t)(MaxParallelEdges - 1) * rand_uniform_val<double>(10000.0)); k++)
  219. {
  220. startV[edgeNum] = j + \
  221. firstVsInCluster[i_cluster];
  222. endV[edgeNum] = i + \
  223. firstVsInCluster[i_cluster];
  224. edgeNum++;
  225. }
  226. }
  227.  
  228. }
  229. }
  230. numIntraClusterEdges = edgeNum;
  231.  
  232. //connect the clusters
  233. dsize = (uint32_t) (log((double)TotVertices)/log(2));
  234. d = (uint32_t *) malloc(dsize * sizeof(uint32_t));
  235. for (i = 0; i < dsize; i++) {
  236. d[i] = (uint32_t) pow(2, (double) i);
  237. }
  238.  
  239. currCluster = 0;
  240.  
  241. for (i = 0; i < TotVertices; i++)
  242. {
  243. p = ProbIntercliqueEdges;
  244. for (j = currCluster; j<totClusters; j++)
  245. {
  246. if ((i >= firstVsInCluster[j]) && (i < firstVsInCluster[j] + clusterSizes[j]))
  247. {
  248. currCluster = j;
  249. break;
  250. }
  251. }
  252. for (t = 1; t < dsize; t++)
  253. {
  254. j = (i + d[t] + (uint32_t)(rand_uniform_val<double>(10000.0) * (d[t] - d[t - 1]))) % TotVertices;
  255. if ((j<firstVsInCluster[currCluster]) || (j>=firstVsInCluster[currCluster] + clusterSizes[currCluster]))
  256. {
  257. for (k = 0; k<1 + ((uint32_t)(MaxParallelEdges - 1)* rand_uniform_val<double>(10000.0)); k++)
  258. {
  259. if (p > rand_uniform_val<double>(10000.0))
  260. {
  261. startV[edgeNum] = i;
  262. endV[edgeNum] = j;
  263. edgeNum++;
  264. }
  265. }
  266. }
  267. p = p/2;
  268. }
  269. }
  270.  
  271. numEdges = edgeNum;
  272. numInterClusterEdges = numEdges - numIntraClusterEdges;
  273.  
  274. free(clusterSizes);
  275. free(firstVsInCluster);
  276. free(d);
  277.  
  278. fprintf(stderr, "done\n");
  279. fprintf(stderr, "\tNo. of inter-cluster edges - %d\n", numInterClusterEdges);
  280. fprintf(stderr, "\tTotal no. of edges - %d\n", numEdges);
  281.  
  282. // shuffle vertices to remove locality
  283. fprintf(stderr, "Shuffling vertices to remove locality ... ");
  284. fprintf(stderr, "[allocating %3.3f GB memory ... ", (double)(TotVertices + 2 * numEdges) * 8 / (1 << 30));
  285.  
  286. permV = (uint32_t *)malloc(TotVertices*sizeof(uint32_t));
  287. startVertex = (uint32_t *)malloc(numEdges*sizeof(uint32_t));
  288. endVertex = (uint32_t *)malloc(numEdges*sizeof(uint32_t));
  289.  
  290. for (i = 0; i<TotVertices; i++)
  291. {
  292. permV[i] = i;
  293. }
  294.  
  295. for (i = 0; i<TotVertices; i++)
  296. {
  297. t1 = i + rand_uniform_val<double>(10000.0) * (TotVertices - i);
  298. if (t1 != i)
  299. {
  300. t2 = permV[t1];
  301. permV[t1] = permV[i];
  302. permV[i] = t2;
  303. }
  304. }
  305.  
  306. for (i = 0; i<numEdges; i++)
  307. {
  308. startVertex[i] = permV[startV[i]];
  309. endVertex[i] = permV[endV[i]];
  310. }
  311.  
  312. free(startV);
  313. free(endV);
  314. free(permV);
  315.  
  316. // generate edge weights
  317.  
  318. fprintf(stderr, "Generating edge weights ... ");
  319. weights = (float *)malloc(numEdges*sizeof(float));
  320. for (i = 0; i<numEdges; i++)
  321. {
  322. weights[i] = MinWeight + (float)(MaxWeight - MinWeight) * rand_uniform_val<float>(10000.0);
  323. }
  324.  
  325. vector<vector<uint32_t> > dests(TotVertices);
  326. vector<vector<float> > weight_vect(TotVertices);
  327.  
  328. _edges_count = numEdges;
  329.  
  330. // add edges to graph
  331. for (uint32_t cur_edge = 0; cur_edge < numEdges; cur_edge++)
  332. {
  333. int from = startVertex[cur_edge];
  334. int to = endVertex[cur_edge];
  335. float edge_weight = weights[cur_edge];
  336.  
  337. if(_directed)
  338. {
  339. _src_ids[cur_edge] = from;
  340. _dst_ids[cur_edge] = to;
  341.  
  342. if(_weighted)
  343. _weights[cur_edge] = edge_weight;
  344. else
  345. _weights[cur_edge] = 1.0;
  346. }
  347.  
  348. if(!_directed)
  349. {
  350. _src_ids[cur_edge] = min(to, from);
  351. _dst_ids[cur_edge] = max(to, from);
  352. if(_weighted)
  353. _weights[cur_edge] = edge_weight;
  354. else
  355. _weights[cur_edge] = 1.0;
  356. }
  357.  
  358. }
  359. fprintf(stderr, "done\n");
  360. }
  361.  
  362. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  363.  
  364. void R_MAT(int *src_ids, int *dst_ids, float *weights, int _vertices_count, long _edges_count, int _a_prob, int _b_prob,
  365. int _c_prob, int _d_prob, int _omp_threads, bool _directed, bool _weighted)
  366. {
  367. int n = (int)log2(_vertices_count);
  368.  
  369. cout << "using " << _omp_threads << " threads" << endl;
  370.  
  371. // generate and add edges to graph
  372. unsigned int seed = 0;
  373. #pragma omp parallel num_threads(_omp_threads) private(seed)
  374. {
  375. seed = int(time(NULL)) * omp_get_thread_num();
  376.  
  377. #pragma omp for schedule(static)
  378. for (long long cur_edge = 0; cur_edge < _edges_count; cur_edge++)
  379. {
  380. int x_middle = _vertices_count / 2, y_middle = _vertices_count / 2;
  381. for (long long i = 1; i < n; i++)
  382. {
  383. int a_beg = 0, a_end = _a_prob;
  384. int b_beg = _a_prob, b_end = b_beg + _b_prob;
  385. int c_beg = _a_prob + _b_prob, c_end = c_beg + _c_prob;
  386. int d_beg = _a_prob + _b_prob + _c_prob, d_end = d_beg + _d_prob;
  387.  
  388. int step = (int)pow(2, n - (i + 1));
  389.  
  390. int probability = rand_r(&seed) % 100;
  391. if (a_beg <= probability && probability < a_end)
  392. {
  393. x_middle -= step, y_middle -= step;
  394. }
  395. else if (b_beg <= probability && probability < b_end)
  396. {
  397. x_middle -= step, y_middle += step;
  398. }
  399. else if (c_beg <= probability && probability < c_end)
  400. {
  401. x_middle += step, y_middle -= step;
  402. }
  403. else if (d_beg <= probability && probability < d_end)
  404. {
  405. x_middle += step, y_middle += step;
  406. }
  407. }
  408. if (rand_r(&seed) % 2 == 0)
  409. x_middle--;
  410. if (rand_r(&seed) % 2 == 0)
  411. y_middle--;
  412.  
  413. int from = x_middle;
  414. int to = y_middle;
  415. float edge_weight = static_cast <float> (rand_r(&seed)) / static_cast <float> (RAND_MAX);
  416.  
  417. if(_directed)
  418. {
  419. src_ids[cur_edge] = from;
  420. dst_ids[cur_edge] = to;
  421.  
  422. if(_weighted)
  423. weights[cur_edge] = edge_weight;
  424. else
  425. weights[cur_edge] = 1.0;
  426. }
  427.  
  428. if(!_directed)
  429. {
  430. src_ids[cur_edge] = min(to, from);
  431. dst_ids[cur_edge] = max(to, from);
  432. if(_weighted)
  433. weights[cur_edge] = edge_weight;
  434. else
  435. weights[cur_edge] = 1.0;
  436. }
  437. }
  438. }
  439. }
  440.  
  441. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  442.  
  443. void uniform_random(int *src_ids, int *dst_ids, float *weights, int _vertices_count, long _edges_count, int _omp_threads, bool _directed, bool _weighted)
  444. {
  445. int n = (int)log2(_vertices_count);
  446.  
  447. cout << "using " << _omp_threads << " threads" << endl;
  448.  
  449. // generate and add edges to graph
  450. unsigned int seed = 0;
  451. #pragma omp parallel num_threads(_omp_threads) private(seed)
  452. {
  453. seed = int(time(NULL)) * omp_get_thread_num();
  454.  
  455. #pragma omp for schedule(static)
  456. for (long long cur_edge = 0; cur_edge < _edges_count; cur_edge++)
  457. {
  458. int from = rand() % _vertices_count;
  459. int to = rand() % _vertices_count;
  460. float edge_weight = static_cast <float> (rand_r(&seed)) / static_cast <float> (RAND_MAX);
  461.  
  462. if(_directed)
  463. {
  464. src_ids[cur_edge] = from;
  465. dst_ids[cur_edge] = to;
  466.  
  467. if(_weighted)
  468. weights[cur_edge] = edge_weight;
  469. else
  470. weights[cur_edge] = 1.0;
  471. }
  472.  
  473. if(!_directed)
  474. {
  475. src_ids[cur_edge] = min(to, from);
  476. dst_ids[cur_edge] = max(to, from);
  477. if(_weighted)
  478. weights[cur_edge] = edge_weight;
  479. else
  480. weights[cur_edge] = 1.0;
  481. }
  482. }
  483. }
  484. }
  485.  
  486. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  487.  
  488. int main(int argc, char **argv)
  489. {
  490. try
  491. {
  492. int scale, avg_degree;
  493. string file_name;
  494. int threads = omp_get_max_threads();
  495. bool weighted, directed;
  496. string type;
  497.  
  498. get_cmd_params(argc, argv, scale, avg_degree, file_name, type, directed, weighted, threads);
  499.  
  500. int vertices_count = pow(2.0, scale);
  501. long long edges_count = avg_degree * (long long)vertices_count;
  502.  
  503. cout << "Using " << threads << " threads" << endl << endl;
  504.  
  505. int *src_ids = new int[edges_count];
  506. int *dst_ids = new int[edges_count];
  507. float *weights;
  508.  
  509. //if(weighted)
  510. //{
  511. weights = new float[edges_count];
  512. //}
  513.  
  514. // generate edges list graph in parallel
  515. double t1 = omp_get_wtime();
  516.  
  517. if(type == "RMAT")
  518. {
  519. cout << "Generating RMAT graph" << endl;
  520. R_MAT(src_ids, dst_ids, weights, vertices_count, edges_count, 45, 20, 20, 15, threads, directed, weighted);
  521. }
  522. else if(type == "SSCA2")
  523. {
  524. cout << "Generating SSCA2 graph" << endl;
  525. SSCA2(src_ids, dst_ids, weights, vertices_count, avg_degree, directed, weighted, edges_count);
  526. }
  527. else if(type == "UR")
  528. {
  529. cout << "Generating Uniform Random graph" << endl;
  530. uniform_random(src_ids, dst_ids, weights, vertices_count, edges_count, threads, directed, weighted);
  531. }
  532. else
  533. {
  534. cout << "Error! Unknow graph type" << endl;
  535. delete[] src_ids;
  536. delete[] dst_ids;
  537.  
  538. //if(weighted)
  539. //{
  540. delete[] weights;
  541. //}
  542. return 1;
  543. }
  544.  
  545. double t2 = omp_get_wtime();
  546. cout << "Generation time: " << t2 - t1 << " sec" << endl << endl;
  547.  
  548. cout << "Saving graph to " << file_name << endl;
  549. t1 = omp_get_wtime();
  550. save_to_file(src_ids, dst_ids, weights, vertices_count, edges_count, file_name, weighted);
  551. t2 = omp_get_wtime();
  552. cout << "Save time: " << t2 - t1 << " sec" << endl << endl;
  553.  
  554. cout << "done!" << endl << endl;
  555.  
  556. delete[] src_ids;
  557. delete[] dst_ids;
  558.  
  559. //if(weighted)
  560. //{
  561. delete[] weights;
  562. //}
  563. }
  564. catch (const char *error)
  565. {
  566. cout << error << endl;
  567. getchar();
  568. return 1;
  569. }
  570. catch (...)
  571. {
  572. cout << "unknown error" << endl;
  573. }
  574.  
  575. cout << "press any key to exit..." << endl;
  576. //getchar();
  577. return 0;
  578. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement