Advertisement
reeps

parseval

Apr 25th, 2018
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.66 KB | None | 0 0
  1. ##https://github.com/nickpettican/PDB-parser/blob/master/PDBparser.py
  2.  
  3. import gzip
  4. import os
  5. import re
  6. import sys
  7. from operator import itemgetter
  8. from itertools import groupby
  9. #import chimera
  10. #from chimera import runCommand
  11. #import Midas
  12. #import MatchMaker
  13.  
  14. def openPDBFile(protein_name):
  15.     f = gzip.open(pathFinder(protein_name), 'rb')
  16.     file = f.read()
  17.     return file
  18. def  PDBFileOnEnt(protein_name):
  19.     return gzip.open(pathFinder(protein_name), 'rb')
  20. num_of_str_with = []
  21. #find strings with this protein name
  22. def findSims(protein_name):
  23.     f = open('95sim', 'r');
  24.     strings = f.read();
  25.     str_array = strings.splitlines()
  26.     for m in range(len(str_array)):
  27.         if (str_array[m].find(protein_name) <> -1):
  28.             num_of_str_with.append(m)  # count from 0th string
  29.  
  30. def pathFinder(protein_name):
  31.     path = '/home/dima/pdb/' + protein_name[1].lower() + protein_name[2].lower() + '/pdb' + protein_name.lower() + '.ent.gz'
  32.     return path
  33. #save file from tar gz to pdb file
  34. def saveInFile(protein_name):
  35.     f = open('/home/dima/artur/forpdb/' + protein_name + '.pdb', 'w')
  36.     f.write(openPDBFile(protein_name))
  37. #save and return the path to reserved file
  38. def pathFinderPDB(protein_name):
  39.     saveInFile(protein_name)
  40.     return '/home/dima/artur/forpdb/' + protein_name + '.pdb'
  41. #pars of PDB file
  42. def parsPDB(protein_name):
  43.     data = [line.strip() for line in open(pathFinderPDB(protein_name), 'r')]
  44.     return data
  45. #print and find all differents elemenents in line[0]
  46. def lineHeads(protein_name):
  47.     data = [line.strip().split for line in open(pathFinderPDB(protein_name), 'r')]
  48.     print "The file has %s lines \n" % len(data)
  49.     line_head_set = set()
  50.     for line in data:
  51.         if line[0] != "END":
  52.             line_head_set.add(line[0])
  53.     for element in set(line_head_set):
  54.         x = [line[0].count(element) for line in data]  
  55.         print "{x} of which are {y} elements\n".format(x=x.count(1), y=element)
  56.  
  57. def findElement(pdb, busca):
  58.     return sum( 1 for l in pdb if re.search(busca, l))
  59.  
  60. def findRange(pdb,busca):
  61.     # finds the range/s where the query "busca" is
  62.     range_busca = [i for i, line in enumerate(pdb) if re.search(busca, line)]
  63.     real_ranges = [map(itemgetter(1), value)
  64.                   for i, value in groupby(enumerate(range_busca),
  65.                   lambda (i, x): i-x)]
  66.     if len(real_ranges) == 1:
  67.         return [[real_ranges[0][0], real_ranges[0][-1] + 1]]
  68.     elif len(real_ranges) >= 2:
  69.         return [[real_ranges[i][0], real_ranges[i][-1] + 1]
  70.                 for i, line in enumerate(real_ranges)]
  71. def returnThePart(pdb, busca):
  72.     range = findRange(pdb, busca)
  73.  
  74.  
  75.  
  76. protein_name = sys.argv[1]
  77. lineHeads(protein_name)
  78. data = parsPDB(protein_name)
  79. print findRange(data, 'HETATM')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement