Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.29 KB | None | 0 0
  1. #!/usr/local/bin/python3
  2. # Copyright(c) 2019 Robert T. Miller. All rights reserved.
  3.  
  4. # -*- coding: latin-1 -*-
  5. """Interconvert PDB internal and external coordinates.
  6.  
  7. Interconvert PDB Structure data between external (X, Y, Z cartesian)
  8. coordinates and internal (bond length, angle and dihedral angle) measurements.
  9. """
  10.  
  11. #
  12. # replicate buildprot with biopython
  13. #
  14.  
  15. import argparse
  16. import os
  17. import sys
  18. import re
  19. from io import StringIO
  20. # import itertools
  21. # print(sys.path)
  22.  
  23. import gzip
  24. import warnings
  25.  
  26. from Bio.PDB.PDBParser import PDBParser
  27.  
  28. from Bio.PDB.pic import read_PIC, report_PIC, structure_rebuild_test
  29. from Bio.PDB.pic import internal_to_atom_coordinates, write_PIC
  30. from Bio.PDB.pic import atom_to_internal_coordinates, PIC_Residue
  31. from Bio.PDB.pic import write_PDB, write_SCAD, PIC_Chain, AtomKey
  32.  
  33. PDB_repository_base = None
  34.  
  35. if os.path.isdir('/media/data/pdb'):
  36. PDB_repository_base = '/media/data/pdb/'
  37. elif os.path.isdir('/Volumes/data/pdb'):
  38. PDB_repository_base = '/Volumes/data/pdb/'
  39.  
  40. print(sys.version)
  41.  
  42. scale_val = 2
  43.  
  44. arg_parser = argparse.ArgumentParser(
  45. description='Interconvert .pic (pprotein internal coordinates) and '
  46. '.pdb (protein data bank) files.')
  47. arg_parser.add_argument(
  48. 'file', nargs='*',
  49. help='a .pdb or .pic path/filename to read (first model, first chain), '
  50. 'or a PDB idCode with optional chain ID to read from {0} as .ent.gz'
  51. .format((PDB_repository_base or '[PDB resource not defined - '
  52. 'please configure before use]')))
  53.  
  54. arg_parser.add_argument(
  55. '-f', dest='filelist',
  56. help='a Dunbrack cullPDB pdb ID list to read from {0} as .ent.gz'
  57. .format((PDB_repository_base or '[PDB resource not defined - '
  58. 'please configure before use]')))
  59. arg_parser.add_argument('-skip', dest='skip_count',
  60. help='count of pdb ID list entries to skip')
  61. arg_parser.add_argument('-limit', dest='limit_count',
  62. help='stop after this many pdb ID list entries')
  63. arg_parser.add_argument('-wp', help='write pdb file with .PyPDB extension',
  64. action="store_true")
  65. arg_parser.add_argument('-wi', help='write pic file with .PyPIC extension',
  66. action="store_true")
  67. arg_parser.add_argument('-ws', help='write OpenSCAD file with .scad extension',
  68. action="store_true")
  69. arg_parser.add_argument('-scale', dest='scale',
  70. help='OpenSCAD output: units (usually mm) per '
  71. 'angstrom, default ' + str(scale_val))
  72. arg_parser.add_argument('-maxp', dest='maxp',
  73. help='max N-C peptide bond length for chain breaks,'
  74. 'default ' + str(PIC_Chain.MaxPeptideBond))
  75. arg_parser.add_argument('-backbone',
  76. help='OpenSCAD output: skip sidechains',
  77. action="store_true")
  78. arg_parser.add_argument('-t', help='test conversion pdb/pic to pic/pdb',
  79. action="store_true")
  80. arg_parser.add_argument('-tv', help='verbose test conversion pdb<>pic',
  81. action="store_true")
  82. arg_parser.add_argument('-nh', help='ignore hydrogens on PDB read',
  83. action="store_true")
  84. arg_parser.add_argument('-d2h', help='swap D (deuterium) for H on PDB read',
  85. action="store_true")
  86. arg_parser.add_argument('-amide',
  87. help='only amide proton, skip other Hs on PDB read',
  88. action="store_true")
  89. arg_parser.add_argument('-gcb',
  90. help='generate GLY C-beta atoms',
  91. action="store_true")
  92. arg_parser.add_argument('-rama',
  93. help='print psi, phi, omega values',
  94. action="store_true")
  95.  
  96. args = arg_parser.parse_args()
  97.  
  98. # print(args)
  99.  
  100. if args.nh:
  101. PIC_Residue.accept_atoms = PIC_Residue.accept_backbone
  102. if args.amide:
  103. PIC_Residue.accept_atoms = PIC_Residue.accept_backbone + ('H',)
  104. if args.d2h:
  105. PIC_Residue.accept_atoms += PIC_Residue.accept_deuteriums
  106. AtomKey.d2h = True
  107. if args.gcb:
  108. PIC_Residue.gly_Cbeta = True
  109. if args.maxp:
  110. PIC_Chain.MaxPeptideBond = float(args.maxp)
  111. if args.scale:
  112. scale_val = args.scale
  113. if args.skip_count:
  114. args.skip_count = int(args.skip_count)
  115. if args.limit_count:
  116. args.limit_count = int(args.limit_count)
  117.  
  118. toProcess = args.file
  119. pdbidre = re.compile(r'(^\d(\w\w)\w)(\w)?$')
  120.  
  121. if args.filelist:
  122. flist = open(args.filelist, 'r')
  123. for aline in flist:
  124. fields = aline.split()
  125. pdbidMatch = pdbidre.match(fields[0])
  126. if pdbidMatch:
  127. # print(m.group(1) + ' ' + m.group(2))
  128. # toProcess.append(PDB_repository_base + m.group(2)
  129. # + '/pdb' + m.group(1) + '.ent.gz' )
  130. toProcess.append(pdbidMatch.group(0))
  131.  
  132.  
  133. if len(toProcess):
  134. print(len(toProcess), 'entries to process')
  135. else:
  136. print("no files to process. use '-h' for help")
  137. sys.exit(0)
  138.  
  139. PDB_parser = PDBParser(PERMISSIVE=True, QUIET=True)
  140.  
  141. fileNo = 1
  142.  
  143. for target in toProcess:
  144. if args.skip_count and fileNo <= args.skip_count:
  145. fileNo += 1
  146. continue
  147. if args.limit_count is not None:
  148. if args.limit_count <= 0:
  149. sys.exit(0)
  150. args.limit_count -= 1
  151. pdb_input = False
  152. pic_input = False
  153. pdb_structure = None
  154. # pdb_chain = None
  155. prot_id = ''
  156. outfile = os.path.basename(target)
  157.  
  158. pdbidMatch = pdbidre.match(target)
  159. if pdbidMatch is not None:
  160. assert PDB_repository_base, 'PDB repository base directory missing, '
  161. 'please configure for this host'
  162. pdb_input = True
  163. filename = (PDB_repository_base + pdbidMatch.group(2).lower() + '/pdb'
  164. + pdbidMatch.group(1).lower() + '.ent.gz')
  165. prot_id = pdbidMatch.group(1)
  166. else:
  167. filename = target
  168. prot_id = target
  169. if '.' in prot_id:
  170. prot_id = prot_id[0:prot_id.find('.')]
  171. if '/' in prot_id:
  172. prot_id = prot_id[prot_id.rfind('/')+1:]
  173. if 'pdb' in prot_id:
  174. prot_id = prot_id[prot_id.rfind('pdb') + 3:]
  175.  
  176. if not pdb_input:
  177. pdb_structure = read_PIC(
  178. gzip.open(filename, mode='rt')
  179. if filename.endswith('.gz') else filename)
  180. if pdb_structure is not None:
  181. pic_input = True
  182.  
  183. if pdb_structure is None:
  184. pdb_input = True
  185. try:
  186. pdb_structure = PDB_parser.get_structure(
  187. prot_id,
  188. gzip.open(filename, mode='rt')
  189. if filename.endswith('.gz') else filename)
  190. except FileNotFoundError:
  191. print(filename, "not found")
  192. continue
  193.  
  194. # get specified chain if given, else just pick first for now
  195. # count atoms to detect pic file if don't know already
  196.  
  197. rCount = 0
  198. aCount = 0
  199.  
  200. if pdbidMatch is not None and pdbidMatch.group(3) is not None:
  201. # have chain specifier for PDBid
  202. if pdb_structure[0][pdbidMatch.group(3)] is not None:
  203. pdb_chain = pdb_structure[0][pdbidMatch.group(3)]
  204. rCount = len(pdb_chain)
  205. else:
  206. print('chain ' + pdbidMatch.group(3) + ' not found in ' + filename)
  207. continue
  208.  
  209. if pdb_input:
  210. if not args.t:
  211. print(fileNo, 'parsed pdb input', prot_id, filename)
  212. # print('header:', pdb_structure.header.get('head', 'NONE'))
  213. # print('idcode:', pdb_structure.header.get('idcode', 'NONE'))
  214. # print('deposition date:', pdb_structure.header.get(
  215. # 'deposition_date', 'NONE'))
  216. # for res in pdb_chain.get_residues(): # pdb_structure.get_residues():
  217. # print(res.get_full_id(), res.resname,
  218. # 'disordered' if res.disordered else '')
  219. else:
  220. print(fileNo, 'parsed pic input ', filename)
  221. report_PIC(pdb_structure, verbose=True)
  222.  
  223. # print(pdb_structure.header['idcode'], pdb_chain.id, ':',
  224. # pdb_structure.header['head'])
  225.  
  226. if args.wp:
  227. if pic_input:
  228. internal_to_atom_coordinates(pdb_structure)
  229. write_PDB(pdb_structure, outfile + '.PyPDB')
  230. print('wrote pdb output for', outfile)
  231.  
  232. if args.wi:
  233. if pdb_input:
  234. # add_PIC(pdb_structure)
  235. atom_to_internal_coordinates(pdb_structure)
  236. write_PIC(pdb_structure, outfile + '.PyPIC')
  237. print('wrote pic output for', outfile)
  238.  
  239. if args.t or args.tv:
  240. sp = StringIO()
  241. if pdb_input:
  242. with warnings.catch_warnings(record=True) as w:
  243. # warnings.simplefilter("error")
  244. try:
  245. r = structure_rebuild_test(pdb_structure, args.tv)
  246. warns = (len(w) > 0)
  247. if args.tv and warns:
  248. for wrn in w:
  249. print(wrn.message)
  250. print(prot_id, fileNo, r['report'],
  251. ('WARNINGS' if warns else ''))
  252. except Exception as e:
  253. print(prot_id, fileNo, 'ERROR FAIL:', type(e), e)
  254.  
  255. elif pic_input:
  256. internal_to_atom_coordinates(pdb_structure)
  257. write_PDB(pdb_structure, sp)
  258. sp.seek(0)
  259. pdb2 = PDB_parser.get_structure(prot_id, sp)
  260. atom_to_internal_coordinates(pdb2)
  261. sp2 = StringIO()
  262. write_PIC(pdb2, sp2)
  263. sp2.seek(0)
  264. inf = open(filename, 'r')
  265. lineCount = 0
  266. matchCount = 0
  267. diffCount = 0
  268. # for line1, line2 in itertools.zip_longest(inf, sp2):
  269. for line1, line2 in zip(inf, sp2):
  270. lineCount += 1
  271. if line1 == line2:
  272. matchCount += 1
  273. else:
  274. diffCount += 1
  275. if args.tv:
  276. print(line1, '!=', line2)
  277. print(lineCount, matchCount, diffCount)
  278. if args.rama:
  279. if pdb_input:
  280. atom_to_internal_coordinates(pdb_structure)
  281. for r in pdb_structure.get_residues():
  282. # print(r.pic.get_dihedral('N:CA:C:O'))
  283. print(r, r.pic.get_angle('psi'), r.pic.get_angle('phi'),
  284. r.pic.get_angle('omg'), r.pic.get_angle('chi2'),
  285. r.pic.get_length('0C:1N'))
  286.  
  287. if args.ws:
  288. write_SCAD(pdb_structure, outfile + '.scad', scale_val, pdbid=prot_id,
  289. backboneOnly=args.backbone)
  290.  
  291. fileNo += 1
  292.  
  293. print('normal termination')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement