Advertisement
Approachable

Compress

May 21st, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.75 KB | None | 0 0
  1. import math
  2. import numpy as np
  3. import scipy as sp
  4. import scipy.sparse as ss
  5. import bz2
  6. import pickle
  7. import time
  8. import timing
  9. import os
  10. import pandas as pd
  11. from pathlib import Path
  12.  
  13. # Timers used for debug note time.clock() performs better than time.time()
  14. start = time.time()
  15. # Name of the file to be compressed
  16. filename = Path(
  17.     r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Data\HR_edges.csv')
  18. # Name of the compressed file
  19. filename2 = Path(
  20.     r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Compressed\HR_edges_op.bin')
  21. # Read data from edge list into a pandas dataframe
  22. data = pd.read_csv(filename, sep=' ', header=None, dtype=np.int64)
  23. end = time.time()
  24. print("Time to read edgelist :")
  25. print(end - start)
  26. start = time.time()
  27. # References not copies
  28. rows = data[0]
  29. cols = data[1]
  30. ones = np.ones(len(rows), np.uint32)
  31. # Load the data drame into a sparse coo matrix and convert it into an array
  32. matrix = ss.coo_matrix((ones, (rows, cols)), dtype=np.int8).toarray()
  33. end = time.time()
  34. print("Time to generate sparse matrix :")
  35. print(end - start)
  36. # Creates a list to store the final output values before writing into a file
  37. list1 = []
  38.  
  39. # Finds the parent of current node
  40.  
  41.  
  42. def parent(index):
  43.     return int((index-1) / 2)
  44.  
  45. # Finds the sibling of current node left or right sibling based on index
  46.  
  47.  
  48. def sibling(index):
  49.     if(index % 2 == 1):
  50.         return index+1
  51.     else:
  52.         return index-1
  53.  
  54.  
  55. start = time.time()
  56. len_row = matrix.shape[0]
  57. # Find the height of the binary tree and use it to find n i.e the no.of elements in the array
  58. height = int(math.log2(len_row)) + 1
  59. n = (2 ** height) - 1
  60.  
  61. # Loop for lal the elements in row i of the matrix to generate compressed format of that row
  62. for i in range(len_row):
  63.     print("Element "+str(i))
  64.     # Input_array = i'th row of matrix array
  65.     input_array = matrix[i]
  66.     # Initilaize temp array with -1 values
  67.     temp_array = np.full(n, -1, dtype=np.int8)
  68.     start_index = n-len(input_array)
  69.     for i in range(len_row):
  70.         if(input_array[i] == 1):
  71.             current_index = start_index+i
  72.             dcn_reached = False
  73.             while dcn_reached == False:
  74.                 temp_array[current_index] = 1
  75.                 if(temp_array[parent(current_index)] != 1):
  76.                     temp_array[parent(current_index)] = 1
  77.                     temp_array[sibling(current_index)] = 0
  78.                     current_index = parent(current_index)
  79.                 else:
  80.                     dcn_reached = True
  81.     i = np.where(temp_array != -1)
  82.     output = temp_array[i]
  83.     list1.append(output)
  84. bf = bz2.BZ2File(filename2, "wb")
  85. pickle.dump(list1, bf, 2)
  86. bf.close()
  87. end = time.time()
  88. print("Time to compress:")
  89. print(end - start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement