Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- human_129.sort_index(axis=1, inplace=True)
- human_129.sort_index(inplace=True)
- human_152.sort_index(axis=1, inplace=True)
- human_152.sort_index(inplace=True)
- intersection_cols = np.intersect1d(human_129.columns, human_152.columns)
- intersection_rows = np.intersect1d(human_129.index, human_152.index)
- print(len(intersection_rows))
- print(len(intersection_cols))
- human_129 = human_129.reindex(index=intersection_rows, columns=intersection_cols)
- human_152 = human_152.reindex(index=intersection_rows, columns=intersection_cols)
- human_129.sort_index(axis=1, inplace=True)
- human_129.sort_index(inplace=True)
- human_152.sort_index(axis=1, inplace=True)
- human_152.sort_index(inplace=True)
- print(human_129.shape)
- print(human_152.shape)
- sparsematrix = io.mmread('my_gene_ontology_edit.obo.2014-01-01.hierarchy_annotations.mtx')
- go_hierarchy = sparsematrix.todok()
- col_names = row_names = np.genfromtxt('my_gene_ontology_edit.obo.2014-01-01.hierarchy_annotations.mtx.rownames.tsv', dtype=str)
- go_hierarchy.axes = col_names
- #necessario rinomiare per fare combaciare con termini go
- human_129.columns = human_129.columns.str.replace(':', '_')
- human_152.columns = human_152.columns.str.replace(':', '_')
- #determinare termini go comuni tra organismo e matrice gerarchica
- intersection = np.sort(np.intersect1d(go_hierarchy.axes, human_129.columns))
- human_129 = human_129.reindex(columns=intersection)
- human_152 = human_152.reindex(columns=intersection)
- human_129.sort_index(axis=1, inplace=True)
- human_129.sort_index(inplace=True)
- human_152.sort_index(axis=1, inplace=True)
- human_152.sort_index(inplace=True)
- #determinazione indici elementi usabili della matrice gerarchica
- import numpy_indexed as npi
- %time idx = npi.indices(go_hierarchy.axes, intersection)
- # seleziona le colonne: attenzione impiega 4-5 minuti con 4 core
- %time go__ = go_hierarchy[:,idx]
- #seleziona le colonne
- %time go = go__[idx,:]
- human_129 = csr_matrix(human_129)
- human_152 = csr_matrix(human_152)
- human_129_tp = (human_129.dot(go) > 0).astype('int8')
- human_129_tp = human_129_tp = human_129
- human_152_tp = (human_152.dot(go) > 0).astype('int8')
- human_152_tp = human_152_tp + human_152
- human_129 = pd.DataFrame(human_129_tp.todense(), index=intersection_rows, columns=intersection)
- human_152 = pd.DataFrame(human_152_tp.todense(), index=intersection_rows, columns=intersection)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement