Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import json
- from lxml import etree
- from lxml import html
- import urllib.parse
- coluns = ["geneSymbol", "uniprotAC", "uniprotID", "proteinName","EentrezGeneID", "desc", "notes"]
- q2 = pd.DataFrame(columns=coluns)
- genes = ["BAT1","CCL2","CCR5","CxCL10","CXCL9","HLA-DPB1", "HLA-DRB1","IFNG","IL10","IL12B",
- "IL1B","IL1RN","IL4","IL4R","IL6","LTA","TGFB","TNF","TNFA","TNFB"]
- #"HLA-DQB1",
- url1 = "http://rest.genenames.org/search/symbol:"
- url3 = "+AND+status:Approved"
- url4 = "http://rest.genenames.org/fetch/hgnc_id/"
- url6 = "https://www.ebi.ac.uk/proteins/api/proteins/"
- ic = 0
- for i in range(0,len(genes)):
- url2 = genes[i]
- r = requests.get(urllib.parse.urljoin(url1, url2,url3))
- rHTML = html.fromstring(r.content)
- resp = rHTML.xpath('//str/text()')
- url5 = resp[0][5:]
- r1 = requests.get(urllib.parse.urljoin(url4, url5))
- rHTML1 = html.fromstring(r1.content)
- d1 = rHTML1.xpath('//str[@name="symbol"]/text()')
- d2 = "x"#rHTML1.xpath('//str[@name="uniprotAC"]/text()')
- d3 = rHTML1.xpath('//arr[@name="uniprot_ids"]/str/text()')
- d4 = rHTML1.xpath('//str[@name="name"]/text()')
- d5 = rHTML1.xpath('//str[@name="entrez_id"]/text()')
- d6 = "x"#rHTML1.xpath('//str[@name="desc"]/text()')
- d7 = rHTML1.xpath('//str[@name="status"]/text()')
- q2.loc[i] = [d1[0],d2[0],d3[0],d4[0],d5[0],d6,d7[0]]
- q2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement