Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##https://github.com/nickpettican/PDB-parser/blob/master/PDBparser.py
- import gzip
- import os
- import re
- import sys
- from operator import itemgetter
- from itertools import groupby
- #import chimera
- #from chimera import runCommand
- #import Midas
- #import MatchMaker
- def openPDBFile(protein_name):
- f = gzip.open(pathFinder(protein_name), 'rb')
- file = f.read()
- return file
- def PDBFileOnEnt(protein_name):
- return gzip.open(pathFinder(protein_name), 'rb')
- num_of_str_with = []
- #find strings with this protein name
- def findSims(protein_name):
- f = open('95sim', 'r');
- strings = f.read();
- str_array = strings.splitlines()
- for m in range(len(str_array)):
- if (str_array[m].find(protein_name) <> -1):
- num_of_str_with.append(m) # count from 0th string
- def pathFinder(protein_name):
- path = '/home/dima/pdb/' + protein_name[1].lower() + protein_name[2].lower() + '/pdb' + protein_name.lower() + '.ent.gz'
- return path
- #save file from tar gz to pdb file
- def saveInFile(protein_name):
- f = open('/home/dima/artur/forpdb/' + protein_name + '.pdb', 'w')
- f.write(openPDBFile(protein_name))
- #save and return the path to reserved file
- def pathFinderPDB(protein_name):
- saveInFile(protein_name)
- return '/home/dima/artur/forpdb/' + protein_name + '.pdb'
- #pars of PDB file
- def parsPDB(protein_name):
- data = [line.strip() for line in open(pathFinderPDB(protein_name), 'r')]
- return data
- #print and find all differents elemenents in line[0]
- def lineHeads(protein_name):
- data = [line.strip().split for line in open(pathFinderPDB(protein_name), 'r')]
- print "The file has %s lines \n" % len(data)
- line_head_set = set()
- for line in data:
- if line[0] != "END":
- line_head_set.add(line[0])
- for element in set(line_head_set):
- x = [line[0].count(element) for line in data]
- print "{x} of which are {y} elements\n".format(x=x.count(1), y=element)
- def findElement(pdb, busca):
- return sum( 1 for l in pdb if re.search(busca, l))
- def findRange(pdb,busca):
- # finds the range/s where the query "busca" is
- range_busca = [i for i, line in enumerate(pdb) if re.search(busca, line)]
- real_ranges = [map(itemgetter(1), value)
- for i, value in groupby(enumerate(range_busca),
- lambda (i, x): i-x)]
- if len(real_ranges) == 1:
- return [[real_ranges[0][0], real_ranges[0][-1] + 1]]
- elif len(real_ranges) >= 2:
- return [[real_ranges[i][0], real_ranges[i][-1] + 1]
- for i, line in enumerate(real_ranges)]
- def returnThePart(pdb, busca):
- range = findRange(pdb, busca)
- protein_name = sys.argv[1]
- lineHeads(protein_name)
- data = parsPDB(protein_name)
- print findRange(data, 'HETATM')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement