Advertisement
Approachable

Compress Numba

May 21st, 2019
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.83 KB | None | 0 0
  1. import math
  2. import numpy as np
  3. import scipy as sp
  4. import scipy.sparse as ss
  5. import bz2
  6. import pickle
  7. import time
  8. import timing
  9. import os
  10. import pandas as pd
  11. from numba import vectorize
  12. from numba import cuda
  13.  
  14. os.environ['NUMBAPRO_NVVM'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\nvvm\bin\nvvm64_33_0.dll'
  15. os.environ['NUMBAPRO_LIBDEVICE'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\nvvm\libdevice'
  16.  
  17. # Timers used for debug note time.clock() performs better than time.time()
  18. start = time.time()
  19. # Name of the file to be compressed
  20. filename = r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Data\HR_edges.csv'
  21. # Name of the compressed file
  22. filename2 = r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Compressed\HR_edges_op.bin'
  23. # Read data from edge list into a pandas dataframe
  24. data = pd.read_csv(filename, sep=' ', header=None, dtype=np.int64)
  25. end = time.time()
  26. print("Time to read edgelist :")
  27. print(end - start)
  28. start = time.time()
  29. # References not copies
  30. rows = data[0]
  31. cols = data[1]
  32. ones = np.ones(len(rows), np.uint32)
  33. # Load the data drame into a sparse coo matrix and convert it into an array
  34. matrix = ss.coo_matrix((ones, (rows, cols)), dtype=np.int8).toarray()
  35. end = time.time()
  36. print("Time to generate sparse matrix :")
  37. print(end - start)
  38. # Creates a list to store the final output values before writing into a file
  39. list1 = []
  40.  
  41. # Finds the parent of current node
  42. @cuda.jit(device=True)
  43. def parent(index):
  44.     return int((index-1) / 2)
  45.  
  46. # Finds the sibling of current node left or right sibling based on index
  47. @cuda.jit(device=True)
  48. def sibling(index):
  49.     if(index % 2 == 1):
  50.         return index+1
  51.     else:
  52.         return index-1
  53.  
  54.  
  55. start = time.time()
  56. len_row = matrix.shape[0]
  57. # Find the height of the binary tree and use it to find n i.e the no.of elements in the array
  58. height = int(math.log2(len_row)) + 1
  59. n = (2 ** height) - 1
  60. start_index = n-len(matrix[0])
  61. temp_array = np.full(n, -1, dtype=np.int8)
  62.  
  63.  
  64. @vectorize(['int32(int32,int32,int32,int32,int32)'], target='cuda')
  65. def compress(input_array, temp_array, n, start_index, len_row):
  66.     for i in range(len_row):
  67.         if(input_array[i] == 1):
  68.             current_index = start_index+i
  69.             dcn_reached = False
  70.             while dcn_reached == False:
  71.                 temp_array[current_index] = 1
  72.                 if(temp_array[parent(current_index)] != 1):
  73.                     temp_array[parent(current_index)] = 1
  74.                     temp_array[sibling(current_index)] = 0
  75.                     current_index = parent(current_index)
  76.                 else:
  77.                     dcn_reached = True
  78.     return temp_array
  79.  
  80.  
  81. list1.append(compress(matrix, temp_array, n, start_index, len_row))
  82. print(list1)
  83. end = time.time()
  84. print("Time to compress:")
  85. print(end - start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement