Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- import numpy as np
- import scipy as sp
- import scipy.sparse as ss
- import bz2
- import pickle
- import time
- import timing
- import os
- import pandas as pd
- from pathlib import Path
- # Timers used for debug note time.clock() performs better than time.time()
- start = time.time()
- # Name of the file to be compressed
- filename = Path(
- r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Data\HR_edges.csv')
- # Name of the compressed file
- filename2 = Path(
- r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Compressed\HR_edges_op.bin')
- # Read data from edge list into a pandas dataframe
- data = pd.read_csv(filename, sep=' ', header=None, dtype=np.int64)
- end = time.time()
- print("Time to read edgelist :")
- print(end - start)
- start = time.time()
- # References not copies
- rows = data[0]
- cols = data[1]
- ones = np.ones(len(rows), np.uint32)
- # Load the data drame into a sparse coo matrix and convert it into an array
- matrix = ss.coo_matrix((ones, (rows, cols)), dtype=np.int8).toarray()
- end = time.time()
- print("Time to generate sparse matrix :")
- print(end - start)
- # Creates a list to store the final output values before writing into a file
- list1 = []
- # Finds the parent of current node
- def parent(index):
- return int((index-1) / 2)
- # Finds the sibling of current node left or right sibling based on index
- def sibling(index):
- if(index % 2 == 1):
- return index+1
- else:
- return index-1
- start = time.time()
- len_row = matrix.shape[0]
- # Find the height of the binary tree and use it to find n i.e the no.of elements in the array
- height = int(math.log2(len_row)) + 1
- n = (2 ** height) - 1
- # Loop for lal the elements in row i of the matrix to generate compressed format of that row
- for i in range(len_row):
- print("Element "+str(i))
- # Input_array = i'th row of matrix array
- input_array = matrix[i]
- # Initilaize temp array with -1 values
- temp_array = np.full(n, -1, dtype=np.int8)
- start_index = n-len(input_array)
- for i in range(len_row):
- if(input_array[i] == 1):
- current_index = start_index+i
- dcn_reached = False
- while dcn_reached == False:
- temp_array[current_index] = 1
- if(temp_array[parent(current_index)] != 1):
- temp_array[parent(current_index)] = 1
- temp_array[sibling(current_index)] = 0
- current_index = parent(current_index)
- else:
- dcn_reached = True
- i = np.where(temp_array != -1)
- output = temp_array[i]
- list1.append(output)
- bf = bz2.BZ2File(filename2, "wb")
- pickle.dump(list1, bf, 2)
- bf.close()
- end = time.time()
- print("Time to compress:")
- print(end - start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement