Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Mar 12 18:16:24 2018
- @author: Fuzzy
- """
- import json
- with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
- data = json.load(f)
- motif2Id = {}
- id2Motif = {}
- with open('id_conversion/humanTFs_All.CSV','r') as inFile:
- header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
- while 1:
- inLine = inFile.readline()
- if not inLine:
- break
- split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
- motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
- #creating Id to motif, mapping one motif to many Ids, create list inside dictonary
- if not split[2] in id2Motif:
- id2Motif[split[2]] = []
- id2Motif[split[2]].append(split[0])
- #attempt to associate a gene Id to many other gene Ids using associated motifs
- gene2GeneDB = {}
- for i in range(3):#run through unique gene IDs and set them as geneIn
- geneIn = id2Motif.keys()[i]
- motifList = [] #empty list for motif hits
- # for loop to search for any hits in humanTFs_All
- for x in range(len(id2Motif)):
- if geneIn == float(id2Motif.keys()[x]):
- motifList = id2Motif[id2Motif.keys()[x]]
- #nested for loop to search for motifs in json file and appends all genes
- geneList = []
- for x in range(len(data)):
- for y in range(len(motifList)):
- if str(motifList[y]) == str(data.keys()[x]):
- geneList += data[data.keys()[x]]
- #nested for loop to search origonal humanTF list for shared gene IDs
- #humanGeneList = []
- gene2GeneDB[str(geneIn)] = []
- for x in range(len(geneList)):
- for y in range(len(id2Motif)):
- if geneList[x] == id2Motif.keys()[y]:
- gene2GeneDB[str(geneIn)].append(str(id2Motif.keys()[y]))
- #humanGeneList.append(id2Motif.keys()[y])
- print gene2GeneDB
- #print (gene2GeneDB[gene2GeneDB.keys()[0:5]])
Add Comment
Please, Sign In to add comment