Advertisement
gisejo

Untitled

Nov 1st, 2019
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 17.88 KB | None | 0 0
  1. //
  2. //  graph_generator.cpp
  3. //  graph_library
  4. //
  5. //  Created by Elijah Afanasiev on 17.08.17.
  6. //  Copyright В© 2017 MSU. All rights reserved.
  7. //
  8. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  9.  
  10. #include <omp.h>
  11. #include <iostream>
  12. #include <string>
  13. #include <fstream>
  14. #include <algorithm>
  15.  
  16. #include <stdio.h>
  17. #include <math.h>
  18. #include <vector>
  19.  
  20. using namespace std;
  21.  
  22. #ifndef uint32_t
  23. #define uint32_t int
  24. #endif
  25.  
  26. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  27.  
  28. template <typename _T>
  29. _T rand_uniform_val(int _upper_border)
  30. {
  31.     return (_T)(rand() % _upper_border);
  32. }
  33.  
  34. template <>
  35. int rand_uniform_val(int _upper_border)
  36. {
  37.     return (int)(rand() % _upper_border);
  38. }
  39.  
  40. template <>
  41. float rand_uniform_val(int _upper_border)
  42. {
  43.     return (float)(rand() % _upper_border) / _upper_border;
  44. }
  45.  
  46. template <>
  47. double rand_uniform_val(int _upper_border)
  48. {
  49.     return (double)(rand() % _upper_border) / _upper_border;
  50. }
  51.  
  52. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  53.  
  54. bool save_to_file(int *_src_ids, int *_dst_ids, float *_weights, int _vertices_count, long long _edges_count, string _file_name, bool _weighted)
  55. {
  56.     ofstream graph_file(_file_name.c_str(), ios::binary);
  57.     if (!graph_file.is_open())
  58.         return false;
  59.    
  60.     graph_file.write(reinterpret_cast<const char*>(&_vertices_count), sizeof(int));
  61.     graph_file.write(reinterpret_cast<const char*>(&_edges_count), sizeof(long long));
  62.    
  63.     for (long long i = 0; i < _edges_count; i++)
  64.     {
  65.         graph_file.write(reinterpret_cast<const char*>(&_src_ids[i]), sizeof(int));
  66.         graph_file.write(reinterpret_cast<const char*>(&_dst_ids[i]), sizeof(int));
  67.        
  68.         //if(_weighted)
  69.         graph_file.write(reinterpret_cast<const char*>(&_weights[i]), sizeof(float));
  70.     }
  71.    
  72.     graph_file.close();
  73.     return true;
  74. }
  75.  
  76. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  77.  
  78. void get_cmd_params(int _argc, char **_argv, int &_scale, int &_avg_degree, string &_file_name, string &_type, bool &_directed, bool &_weighted, int &_threads)
  79. {
  80.     // set deafualt params
  81.     _scale = 14;
  82.     _avg_degree = 32;
  83.     _file_name = "graph.data";
  84.     _type = "RMAT";
  85.     _directed = true;
  86.     _weighted = true;
  87.     _threads = omp_get_max_threads();
  88.    
  89.     // get params from cmd line
  90.     for (int i = 1; i < _argc; i++)
  91.     {
  92.         string option(_argv[i]);
  93.        
  94.         if (option.compare("-s") == 0)
  95.         {
  96.             _scale = atoi(_argv[++i]);
  97.         }
  98.        
  99.         if (option.compare("-e") == 0)
  100.         {
  101.             _avg_degree = atoi(_argv[++i]);
  102.         }
  103.        
  104.         if (option.compare("-file") == 0)
  105.         {
  106.             _file_name = _argv[++i];
  107.         }
  108.        
  109.         if (option.compare("-type") == 0)
  110.         {
  111.             _type = _argv[++i];
  112.         }
  113.        
  114.         if (option.compare("-undirected") == 0)
  115.         {
  116.             _directed = false;
  117.         }
  118.        
  119.         if (option.compare("-unweighted") == 0)
  120.         {
  121.             _weighted = false;
  122.         }
  123.        
  124.         if (option.compare("-t") == 0)
  125.         {
  126.             _threads = atoi(_argv[++i]);
  127.         }
  128.     }
  129.  
  130. }
  131.  
  132.  
  133. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  134.  
  135. void SSCA2(int *_src_ids, int *_dst_ids, float *_weights, int _vertices_count, long _max_clique_size, bool _directed, bool _weighted, long long &_edges_count)
  136. {
  137.     uint32_t TotVertices;
  138.     uint32_t* clusterSizes;
  139.     uint32_t* firstVsInCluster;
  140.     uint32_t estTotClusters, totClusters;
  141.    
  142.     uint32_t *startVertex, *endVertex;
  143.     uint32_t numEdges;
  144.     uint32_t numIntraClusterEdges, numInterClusterEdges;
  145.     float* weights;
  146.     float MinWeight, MaxWeight;
  147.     uint32_t MaxCliqueSize;
  148.     uint32_t MaxParallelEdges = 1;
  149.     double ProbUnidirectional = 1.0;
  150.     double ProbIntercliqueEdges = 0.1;
  151.     uint32_t i_cluster, currCluster;
  152.     uint32_t *startV, *endV, *d;
  153.     uint32_t estNumEdges, edgeNum;
  154.    
  155.     uint32_t i, j, k, t, t1, t2, dsize;
  156.     double p;
  157.     uint32_t* permV;
  158.    
  159.     // initialize RNG
  160.    
  161.     MinWeight = 0.0;
  162.     MaxWeight = 1.0;
  163.     TotVertices = _vertices_count;
  164.    
  165.     // generate clusters
  166.     MaxCliqueSize = _max_clique_size;
  167.     estTotClusters = 1.25 * TotVertices / (MaxCliqueSize/2);
  168.     clusterSizes = (uint32_t *) malloc(estTotClusters*sizeof(uint32_t));
  169.    
  170.     for(i = 0; i < estTotClusters; i++)
  171.     {
  172.         clusterSizes[i] = 1 + (rand_uniform_val<double>(10000.0) *MaxCliqueSize);
  173.     }
  174.    
  175.     totClusters = 0;
  176.    
  177.     firstVsInCluster = (uint32_t *) malloc(estTotClusters*sizeof(uint32_t));
  178.    
  179.     firstVsInCluster[0] = 0;
  180.     for (i=1; i<estTotClusters; i++)
  181.     {
  182.         firstVsInCluster[i] = firstVsInCluster[i-1] + clusterSizes[i-1];
  183.         if (firstVsInCluster[i] > TotVertices-1)
  184.             break;
  185.     }
  186.    
  187.     totClusters = i;
  188.    
  189.     clusterSizes[totClusters-1] = TotVertices - firstVsInCluster[totClusters-1];
  190.    
  191.     // generate intra-cluster edges
  192.     estNumEdges = (uint32_t) ((TotVertices * (double) MaxCliqueSize * (2-ProbUnidirectional)/2) +
  193.                               (TotVertices * (double) ProbIntercliqueEdges/(1-ProbIntercliqueEdges))) * (1+MaxParallelEdges/2);
  194.    
  195.     if ((estNumEdges > ((1<<30) - 1)) && (sizeof(uint32_t*) < 8))
  196.     {
  197.         fprintf(stderr, "ERROR: long* should be 8 bytes for this problem size\n");
  198.         fprintf(stderr, "\tPlease recompile the code in 64-bit mode\n");
  199.         exit(-1);
  200.     }
  201.    
  202.     edgeNum = 0;
  203.     p = ProbUnidirectional;
  204.    
  205.     fprintf (stderr, "[allocating %3.3f GB memory ... ", (double) 2*estNumEdges*8/(1<<30));
  206.    
  207.     startV = (uint32_t *) malloc(estNumEdges*sizeof(uint32_t));
  208.     endV = (uint32_t *) malloc(estNumEdges*sizeof(uint32_t));
  209.    
  210.     fprintf(stderr, "done] ");
  211.    
  212.     for (i_cluster=0; i_cluster < totClusters; i_cluster++)
  213.     {
  214.         for (i = 0; i < clusterSizes[i_cluster]; i++)
  215.         {
  216.             for (j = 0; j < i; j++)
  217.             {
  218.                 for (k = 0; k<1 + ((uint32_t)(MaxParallelEdges - 1) * rand_uniform_val<double>(10000.0)); k++)
  219.                 {
  220.                     startV[edgeNum] = j + \
  221.                     firstVsInCluster[i_cluster];
  222.                     endV[edgeNum] = i + \
  223.                     firstVsInCluster[i_cluster];
  224.                     edgeNum++;
  225.                 }
  226.             }
  227.            
  228.         }
  229.     }
  230.     numIntraClusterEdges = edgeNum;
  231.    
  232.     //connect the clusters
  233.     dsize = (uint32_t) (log((double)TotVertices)/log(2));
  234.     d = (uint32_t *) malloc(dsize * sizeof(uint32_t));
  235.     for (i = 0; i < dsize; i++) {
  236.         d[i] = (uint32_t) pow(2, (double) i);
  237.     }
  238.    
  239.     currCluster = 0;
  240.    
  241.     for (i = 0; i < TotVertices; i++)
  242.     {
  243.         p = ProbIntercliqueEdges;
  244.         for (j = currCluster; j<totClusters; j++)
  245.         {
  246.             if ((i >= firstVsInCluster[j]) && (i < firstVsInCluster[j] + clusterSizes[j]))
  247.             {
  248.                 currCluster = j;
  249.                 break;
  250.             }
  251.         }
  252.         for (t = 1; t < dsize; t++)
  253.         {
  254.             j = (i + d[t] + (uint32_t)(rand_uniform_val<double>(10000.0) * (d[t] - d[t - 1]))) % TotVertices;
  255.             if ((j<firstVsInCluster[currCluster]) || (j>=firstVsInCluster[currCluster] + clusterSizes[currCluster]))
  256.             {
  257.                 for (k = 0; k<1 + ((uint32_t)(MaxParallelEdges - 1)* rand_uniform_val<double>(10000.0)); k++)
  258.                 {
  259.                     if (p >  rand_uniform_val<double>(10000.0))
  260.                     {
  261.                         startV[edgeNum] = i;
  262.                         endV[edgeNum] = j;
  263.                         edgeNum++;
  264.                     }
  265.                 }
  266.             }
  267.             p = p/2;
  268.         }
  269.     }
  270.    
  271.     numEdges = edgeNum;
  272.     numInterClusterEdges = numEdges - numIntraClusterEdges;
  273.    
  274.     free(clusterSizes);
  275.     free(firstVsInCluster);
  276.     free(d);
  277.    
  278.     fprintf(stderr, "done\n");
  279.     fprintf(stderr, "\tNo. of inter-cluster edges - %d\n", numInterClusterEdges);
  280.     fprintf(stderr, "\tTotal no. of edges - %d\n", numEdges);
  281.    
  282.     // shuffle vertices to remove locality
  283.     fprintf(stderr, "Shuffling vertices to remove locality ... ");
  284.     fprintf(stderr, "[allocating %3.3f GB memory ... ", (double)(TotVertices + 2 * numEdges) * 8 / (1 << 30));
  285.    
  286.     permV = (uint32_t *)malloc(TotVertices*sizeof(uint32_t));
  287.     startVertex = (uint32_t *)malloc(numEdges*sizeof(uint32_t));
  288.     endVertex = (uint32_t *)malloc(numEdges*sizeof(uint32_t));
  289.    
  290.     for (i = 0; i<TotVertices; i++)
  291.     {
  292.         permV[i] = i;
  293.     }
  294.    
  295.     for (i = 0; i<TotVertices; i++)
  296.     {
  297.         t1 = i + rand_uniform_val<double>(10000.0) * (TotVertices - i);
  298.         if (t1 != i)
  299.         {
  300.             t2 = permV[t1];
  301.             permV[t1] = permV[i];
  302.             permV[i] = t2;
  303.         }
  304.     }
  305.    
  306.     for (i = 0; i<numEdges; i++)
  307.     {
  308.         startVertex[i] = permV[startV[i]];
  309.         endVertex[i] = permV[endV[i]];
  310.     }
  311.    
  312.     free(startV);
  313.     free(endV);
  314.     free(permV);
  315.    
  316.     // generate edge weights
  317.    
  318.     fprintf(stderr, "Generating edge weights ... ");
  319.     weights = (float *)malloc(numEdges*sizeof(float));
  320.     for (i = 0; i<numEdges; i++)
  321.     {
  322.         weights[i] = MinWeight + (float)(MaxWeight - MinWeight) * rand_uniform_val<float>(10000.0);
  323.     }
  324.    
  325.     vector<vector<uint32_t> > dests(TotVertices);
  326.     vector<vector<float> > weight_vect(TotVertices);
  327.    
  328.     _edges_count = numEdges;
  329.    
  330.     // add edges to graph
  331.     for (uint32_t cur_edge = 0; cur_edge < numEdges; cur_edge++)
  332.     {
  333.         int from = startVertex[cur_edge];
  334.         int to = endVertex[cur_edge];
  335.         float edge_weight = weights[cur_edge];
  336.        
  337.         if(_directed)
  338.         {
  339.             _src_ids[cur_edge] = from;
  340.             _dst_ids[cur_edge] = to;
  341.            
  342.             if(_weighted)
  343.                 _weights[cur_edge] = edge_weight;
  344.             else
  345.                 _weights[cur_edge] = 1.0;
  346.         }
  347.        
  348.         if(!_directed)
  349.         {
  350.             _src_ids[cur_edge] = min(to, from);
  351.             _dst_ids[cur_edge] = max(to, from);
  352.             if(_weighted)
  353.                 _weights[cur_edge] = edge_weight;
  354.             else
  355.                 _weights[cur_edge] = 1.0;
  356.         }
  357.  
  358.     }
  359.     fprintf(stderr, "done\n");
  360. }
  361.  
  362. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  363.  
  364. void R_MAT(int *src_ids, int *dst_ids, float *weights, int _vertices_count, long _edges_count, int _a_prob,  int _b_prob,
  365.            int _c_prob, int _d_prob, int _omp_threads,  bool _directed, bool _weighted)
  366. {
  367.     int n = (int)log2(_vertices_count);
  368.    
  369.     cout << "using " << _omp_threads << " threads" << endl;
  370.    
  371.     // generate and add edges to graph
  372.     unsigned int seed = 0;
  373.     #pragma omp parallel num_threads(_omp_threads) private(seed)
  374.     {
  375.         seed = int(time(NULL)) * omp_get_thread_num();
  376.        
  377.         #pragma omp for schedule(static)
  378.         for (long long cur_edge = 0; cur_edge < _edges_count; cur_edge++)
  379.         {
  380.             int x_middle = _vertices_count / 2, y_middle = _vertices_count / 2;
  381.             for (long long i = 1; i < n; i++)
  382.             {
  383.                 int a_beg = 0, a_end = _a_prob;
  384.                 int b_beg = _a_prob, b_end = b_beg + _b_prob;
  385.                 int c_beg = _a_prob + _b_prob, c_end = c_beg + _c_prob;
  386.                 int d_beg = _a_prob + _b_prob + _c_prob, d_end = d_beg + _d_prob;
  387.                
  388.                 int step = (int)pow(2, n - (i + 1));
  389.                
  390.                 int probability = rand_r(&seed) % 100;
  391.                 if (a_beg <= probability && probability < a_end)
  392.                 {
  393.                     x_middle -= step, y_middle -= step;
  394.                 }
  395.                 else if (b_beg <= probability && probability < b_end)
  396.                 {
  397.                     x_middle -= step, y_middle += step;
  398.                 }
  399.                 else if (c_beg <= probability && probability < c_end)
  400.                 {
  401.                     x_middle += step, y_middle -= step;
  402.                 }
  403.                 else if (d_beg <= probability && probability < d_end)
  404.                 {
  405.                     x_middle += step, y_middle += step;
  406.                 }
  407.             }
  408.             if (rand_r(&seed) % 2 == 0)
  409.                 x_middle--;
  410.             if (rand_r(&seed) % 2 == 0)
  411.                 y_middle--;
  412.            
  413.             int from = x_middle;
  414.             int to = y_middle;
  415.             float edge_weight = static_cast <float> (rand_r(&seed)) / static_cast <float> (RAND_MAX);
  416.            
  417.             if(_directed)
  418.             {
  419.                 src_ids[cur_edge] = from;
  420.                 dst_ids[cur_edge] = to;
  421.                
  422.                 if(_weighted)
  423.                     weights[cur_edge] = edge_weight;
  424.                 else
  425.                     weights[cur_edge] = 1.0;
  426.             }
  427.            
  428.             if(!_directed)
  429.             {
  430.                 src_ids[cur_edge] = min(to, from);
  431.                 dst_ids[cur_edge] = max(to, from);
  432.                 if(_weighted)
  433.                     weights[cur_edge] = edge_weight;
  434.                 else
  435.                     weights[cur_edge] = 1.0;
  436.             }
  437.         }
  438.     }
  439. }
  440.  
  441. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  442.  
  443. void uniform_random(int *src_ids, int *dst_ids, float *weights, int _vertices_count, long _edges_count, int _omp_threads,  bool _directed, bool _weighted)
  444. {
  445.     int n = (int)log2(_vertices_count);
  446.    
  447.     cout << "using " << _omp_threads << " threads" << endl;
  448.    
  449.     // generate and add edges to graph
  450.     unsigned int seed = 0;
  451.     #pragma omp parallel num_threads(_omp_threads) private(seed)
  452.     {
  453.         seed = int(time(NULL)) * omp_get_thread_num();
  454.        
  455.         #pragma omp for schedule(static)
  456.         for (long long cur_edge = 0; cur_edge < _edges_count; cur_edge++)
  457.         {
  458.             int from = rand() % _vertices_count;
  459.             int to = rand() % _vertices_count;
  460.             float edge_weight = static_cast <float> (rand_r(&seed)) / static_cast <float> (RAND_MAX);
  461.            
  462.             if(_directed)
  463.             {
  464.                 src_ids[cur_edge] = from;
  465.                 dst_ids[cur_edge] = to;
  466.                
  467.                 if(_weighted)
  468.                     weights[cur_edge] = edge_weight;
  469.                 else
  470.                     weights[cur_edge] = 1.0;
  471.             }
  472.            
  473.             if(!_directed)
  474.             {
  475.                 src_ids[cur_edge] = min(to, from);
  476.                 dst_ids[cur_edge] = max(to, from);
  477.                 if(_weighted)
  478.                     weights[cur_edge] = edge_weight;
  479.                 else
  480.                     weights[cur_edge] = 1.0;
  481.             }
  482.         }
  483.     }
  484. }
  485.  
  486. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  487.  
  488. int main(int argc, char **argv)
  489. {
  490.     try
  491.     {
  492.         int scale, avg_degree;
  493.         string file_name;
  494.         int threads = omp_get_max_threads();
  495.         bool weighted, directed;
  496.         string type;
  497.        
  498.         get_cmd_params(argc, argv, scale, avg_degree, file_name, type, directed, weighted, threads);
  499.        
  500.         int vertices_count = pow(2.0, scale);
  501.         long long edges_count = avg_degree * (long long)vertices_count;
  502.        
  503.         cout << "Using " << threads << " threads" << endl << endl;
  504.        
  505.         int *src_ids = new int[edges_count];
  506.         int *dst_ids = new int[edges_count];
  507.         float *weights;
  508.        
  509.         if(weighted)
  510.         {
  511.            weights = new float[edges_count];
  512.         }
  513.        
  514.         // generate edges list graph in parallel
  515.         double t1 = omp_get_wtime();
  516.        
  517.         if(type == "RMAT")
  518.         {
  519.             cout << "Generating RMAT graph" << endl;
  520.             R_MAT(src_ids, dst_ids, weights, vertices_count, edges_count, 45, 20, 20, 15, threads, directed, weighted);
  521.         }
  522.         else if(type == "SSCA2")
  523.         {
  524.             cout << "Generating SSCA2 graph" << endl;
  525.             SSCA2(src_ids, dst_ids, weights, vertices_count, avg_degree, directed, weighted, edges_count);
  526.         }
  527.         else if(type == "UR")
  528.         {
  529.             cout << "Generating Uniform Random graph" << endl;
  530.             uniform_random(src_ids, dst_ids, weights, vertices_count, edges_count, threads, directed, weighted);
  531.         }
  532.         else
  533.         {
  534.             cout << "Error! Unknow graph type" << endl;
  535.             delete[] src_ids;
  536.             delete[] dst_ids;
  537.            
  538.             if(weighted)
  539.             {
  540.                 delete[] weights;
  541.             }
  542.             return 1;
  543.         }
  544.        
  545.         double t2 = omp_get_wtime();
  546.         cout << "Generation time: " << t2 - t1 << " sec" << endl << endl;
  547.        
  548.         cout << "Saving graph to " << file_name << endl;
  549.         t1 = omp_get_wtime();
  550.         save_to_file(src_ids, dst_ids, weights, vertices_count, edges_count, file_name, weighted);
  551.         t2 = omp_get_wtime();
  552.         cout << "Save time: " << t2 - t1 << " sec" << endl << endl;
  553.        
  554.         cout << "done!" << endl << endl;
  555.        
  556.         delete[] src_ids;
  557.         delete[] dst_ids;
  558.        
  559.         if(weighted)
  560.         {
  561.             delete[] weights;
  562.         }
  563.     }
  564.     catch (const char *error)
  565.     {
  566.         cout << error << endl;
  567.         getchar();
  568.         return 1;
  569.     }
  570.     catch (...)
  571.     {
  572.         cout << "unknown error" << endl;
  573.     }
  574.    
  575.     cout << "press any key to exit..." << endl;
  576.     //getchar();
  577.     return 0;
  578. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement