Compress Numba

import math
import numpy as np
import scipy as sp
import scipy.sparse as ss
import bz2
import pickle
import time
import timing
import os
import pandas as pd
from numba import vectorize
from numba import cuda

os.environ['NUMBAPRO_NVVM'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\nvvm\bin\nvvm64_33_0.dll'
os.environ['NUMBAPRO_LIBDEVICE'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\nvvm\libdevice'

# Timers used for debug note time.clock() performs better than time.time()
start = time.time()
# Name of the file to be compressed
filename = r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Data\HR_edges.csv'
# Name of the compressed file
filename2 = r'C:\Users\gashw\OneDrive\Desktop\Project Work\implement\Samples\Compressed\HR_edges_op.bin'
# Read data from edge list into a pandas dataframe
data = pd.read_csv(filename, sep=' ', header=None, dtype=np.int64)
end = time.time()
print("Time to read edgelist :")
print(end - start)
start = time.time()
# References not copies
rows = data[0]
cols = data[1]
ones = np.ones(len(rows), np.uint32)
# Load the data drame into a sparse coo matrix and convert it into an array
matrix = ss.coo_matrix((ones, (rows, cols)), dtype=np.int8).toarray()
end = time.time()
print("Time to generate sparse matrix :")
print(end - start)
# Creates a list to store the final output values before writing into a file
list1 = []

# Finds the parent of current node
@cuda.jit(device=True)
def parent(index):
    return int((index-1) / 2)

# Finds the sibling of current node left or right sibling based on index
@cuda.jit(device=True)
def sibling(index):
    if(index % 2 == 1):
        return index+1
    else:
        return index-1


start = time.time()
len_row = matrix.shape[0]
# Find the height of the binary tree and use it to find n i.e the no.of elements in the array
height = int(math.log2(len_row)) + 1
n = (2 ** height) - 1
start_index = n-len(matrix[0])
temp_array = np.full(n, -1, dtype=np.int8)


@vectorize(['int32(int32,int32,int32,int32,int32)'], target='cuda')
def compress(input_array, temp_array, n, start_index, len_row):
    for i in range(len_row):
        if(input_array[i] == 1):
            current_index = start_index+i
            dcn_reached = False
            while dcn_reached == False:
                temp_array[current_index] = 1
                if(temp_array[parent(current_index)] != 1):
                    temp_array[parent(current_index)] = 1
                    temp_array[sibling(current_index)] = 0
                    current_index = parent(current_index)
                else:
                    dcn_reached = True
    return temp_array


list1.append(compress(matrix, temp_array, n, start_index, len_row))
print(list1)
end = time.time()
print("Time to compress:")
print(end - start)