Untitled

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 12 18:16:24 2018

@author: Fuzzy
"""

import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
    data = json.load(f)

motif2Id = {}
id2Motif = {}

with open('id_conversion/humanTFs_All.CSV','r') as inFile:
    header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
    while 1:
        inLine = inFile.readline()
        if not inLine:
            break
        split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
        motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
        #creating Id to motif, mapping one motif to many Ids, create list inside dictonary
        if not split[2] in id2Motif:
            id2Motif[split[2]] = []
        id2Motif[split[2]].append(split[0])

#attempt to associate a gene Id to many other gene Ids using associated motifs
gene2GeneDB = {}
for i in range(3):#run through unique gene IDs and set them as geneIn

    geneIn = id2Motif.keys()[i]
    motifList = [] #empty list for motif hits
    # for loop to search for any hits in humanTFs_All
    for x in range(len(id2Motif)):
        if geneIn == float(id2Motif.keys()[x]):
            motifList = id2Motif[id2Motif.keys()[x]]

    #nested for loop to search for motifs in json file and appends all genes
    geneList = []
    for x in range(len(data)):
        for y in range(len(motifList)):
            if str(motifList[y]) == str(data.keys()[x]):
                geneList += data[data.keys()[x]]

    #nested for loop to search origonal humanTF list for shared gene IDs
    #humanGeneList = []
    gene2GeneDB[str(geneIn)] = []
    for x in range(len(geneList)):
        for y in range(len(id2Motif)):
            if geneList[x] == id2Motif.keys()[y]:
                gene2GeneDB[str(geneIn)].append(str(id2Motif.keys()[y]))
                #humanGeneList.append(id2Motif.keys()[y])


print gene2GeneDB
#print (gene2GeneDB[gene2GeneDB.keys()[0:5]])