Advertisement
Guest User

Untitled

a guest
Aug 2nd, 2015
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. # Source: parse_drugbank.py
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import xml.sax
  5.  
  6. DATA_DIR = "./data/"
  7.  
  8. class DrugXmlContentHandler(xml.sax.ContentHandler):
  9.  
  10. def __init__(self):
  11. xml.sax.ContentHandler.__init__(self)
  12. self.tags = []
  13. self.generic_names = []
  14. self.brand_names = []
  15.  
  16. def startElement(self, name, attrs):
  17. self.tags.append(name)
  18.  
  19. def endElement(self, name):
  20. self.tags.pop()
  21.  
  22. def characters(self, content):
  23. breadcrumb = "/".join(self.tags)
  24. #if breadcrumb == "drugbank/drug/products/product/name":
  25. # self.brand_names.append(content)
  26. if breadcrumb == "drugbank/drug/name":
  27. self.generic_names.append(content)
  28.  
  29. def write_list_to_file(lst, filename):
  30. fout = open(os.path.join(DATA_DIR, filename), 'wb')
  31. for e in lst:
  32. fout.write("%s\n" % (e.encode("utf-8")))
  33. fout.close()
  34.  
  35.  
  36. source = open(os.path.join(DATA_DIR, "drugbank.xml"), 'rb')
  37. handler = DrugXmlContentHandler()
  38. xml.sax.parse(source, handler)
  39. source.close()
  40.  
  41. write_list_to_file(handler.generic_names, "generic_names.txt")
  42. #write_list_to_file(handler.brand_names, "brand_names.txt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement