Advertisement
Guest User

Untitled

a guest
Nov 23rd, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.38 KB | None | 0 0
  1. import requests
  2. import json
  3. from lxml import etree
  4. from lxml import html
  5. import urllib.parse
  6.  
  7. coluns = ["geneSymbol", "uniprotAC", "uniprotID", "proteinName","EentrezGeneID", "desc", "notes"]
  8. q2 = pd.DataFrame(columns=coluns)
  9. genes = ["BAT1","CCL2","CCR5","CxCL10","CXCL9","HLA-DPB1", "HLA-DRB1","IFNG","IL10","IL12B",
  10. "IL1B","IL1RN","IL4","IL4R","IL6","LTA","TGFB","TNF","TNFA","TNFB"]
  11. #"HLA-DQB1",
  12. url1 = "http://rest.genenames.org/search/symbol:"
  13. url3 = "+AND+status:Approved"
  14. url4 = "http://rest.genenames.org/fetch/hgnc_id/"
  15. url6 = "https://www.ebi.ac.uk/proteins/api/proteins/"
  16. ic = 0
  17. for i in range(0,len(genes)):
  18. url2 = genes[i]
  19. r = requests.get(urllib.parse.urljoin(url1, url2,url3))
  20. rHTML = html.fromstring(r.content)
  21. resp = rHTML.xpath('//str/text()')
  22. url5 = resp[0][5:]
  23.  
  24. r1 = requests.get(urllib.parse.urljoin(url4, url5))
  25. rHTML1 = html.fromstring(r1.content)
  26. d1 = rHTML1.xpath('//str[@name="symbol"]/text()')
  27. d2 = "x"#rHTML1.xpath('//str[@name="uniprotAC"]/text()')
  28. d3 = rHTML1.xpath('//arr[@name="uniprot_ids"]/str/text()')
  29. d4 = rHTML1.xpath('//str[@name="name"]/text()')
  30. d5 = rHTML1.xpath('//str[@name="entrez_id"]/text()')
  31. d6 = "x"#rHTML1.xpath('//str[@name="desc"]/text()')
  32. d7 = rHTML1.xpath('//str[@name="status"]/text()')
  33.  
  34. q2.loc[i] = [d1[0],d2[0],d3[0],d4[0],d5[0],d6,d7[0]]
  35. q2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement