Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def remove_min_cols(dataframe, thresold=0.85):
- matrix = dataframe.corr()
- cols = list(matrix.columns)
- graph = []
- for i in cols:
- graph.append([])
- for i in range(len(cols)):
- for j in range(i+1, len(cols)):
- if matrix[cols[i]][cols[j]] > thresold:
- graph[i].append(j)
- graph[j].append(i)
- arr = []
- for i in graph:
- arr.append([len(graph[i]), i])
- arr.sort()
- visited = len(cols)*[False]
- del_cols = [
- for i in arr:
- nedges, node = i
- if not visited[node]:
- for j in graph[node]:
- if not visited[j]:
- visited[j] = True
- del_cols.append(cols[j])
- visited[node] = True
- dataframe.drop(columns=del_cols, inplace = True)
Add Comment
Please, Sign In to add comment