Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import pandas as pd
import os
from util import *
from Bio.PDB import PDBList, PDBParser, PPBuilder
def is_protein_monomer(pdb_id):
"""
Check if the given PDB ID corresponds to a protein monomer.
Parameters:
pdb_id (str): The 4-character PDB ID code.
Returns:
bool: True if the structure is a single protein chain, False otherwise.
"""
# Fetch PDB file
pdbl = PDBList()
pdb_file = pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format='pdb')
# Parse the structure
parser = PDBParser(QUIET=True)
structure = parser.get_structure(pdb_id, pdb_file)
# Extract polypeptide chains
ppb = PPBuilder()
polypeptides = []
for model in structure:
for chain in model:
# Get polypeptide segments in the chain
pps = ppb.build_peptides(chain)
if pps:
polypeptides.append(chain)
os.remove(pdb_file)
# Check if we have exactly one polypeptide chain
if len(polypeptides) == 1:
return True
else:
return False
print("Reading data from CSPRANK.csv")
csv_file_path = '/home/tiburon/Desktop/ROT4/CSP_Rank/CSPRANK.csv'
data = pd.read_csv(csv_file_path)
NMR_apo_pdb_dir = './NMR_apo_pdb_files/'
print(f"Creating directory {NMR_apo_pdb_dir} if it does not exist")
if not os.path.exists(NMR_apo_pdb_dir):
os.makedirs(NMR_apo_pdb_dir)
manual_apo_bmrbs = []
visited_bmb_ids = []
print("Processing apo_bmrb IDs")
for bmrb_id in data['apo_bmrb']:
bmrb_id = str(bmrb_id)
if bmrb_id in visited_bmb_ids:
continue
if bmrb_id in manual_apo_bmrbs:
continue
visited_bmb_ids.append(bmrb_id)
pdb_id = get_pdb_from_bmrb(bmrb_id)
# raise
if pdb_id is None:
manual_apo_bmrbs.append(bmrb_id)
continue
if not is_protein_monomer(pdb_id):
print(f"Skipping PDB ID {pdb_id} for BMRB ID {bmrb_id} as it is not a protein monomer")
manual_apo_bmrbs.append(bmrb_id)
continue
print(f"Downloading PDB file for BMRB ID {bmrb_id} with PDB ID {pdb_id}")
get_pdb_file(pdb_id, output_dir=NMR_apo_pdb_dir)
old_file_path = f'{NMR_apo_pdb_dir}{pdb_id}.pdb'
new_file_path = f'{NMR_apo_pdb_dir}{bmrb_id}_{pdb_id}.pdb'
if os.path.exists(old_file_path):
os.rename(old_file_path, new_file_path)
print("Processing holo_pdb IDs")
for i, pdb_id in enumerate(data['holo_pdb']):
if str(data['apo_bmrb'][i]) not in manual_apo_bmrbs:
continue
bmrb_id = str(data['apo_bmrb'][i])
print(f"Downloading PDB file for holo PDB ID {pdb_id}")
get_pdb_file(pdb_id, output_dir=NMR_apo_pdb_dir)
old_file_path = f'{NMR_apo_pdb_dir}{pdb_id}.pdb'
new_file_path = f'{NMR_apo_pdb_dir}{bmrb_id}_{pdb_id}.pdb'
if os.path.exists(old_file_path):
os.rename(old_file_path, new_file_path)
print("Listing files in NMR_apo_pdb_dir")
files = [f for f in os.listdir(NMR_apo_pdb_dir)]
NMR_apo_pdb_split_dir = './NMR_apo_pdb_split_files/'
print(f"Creating directory {NMR_apo_pdb_split_dir} if it does not exist")
if not os.path.exists(NMR_apo_pdb_split_dir):
os.makedirs(NMR_apo_pdb_split_dir)
new_files = []
print("Expanding PDB files")
for file in files:
basename = os.path.basename(file)
stri = f"python3 expand.py {NMR_apo_pdb_dir}{file} {NMR_apo_pdb_split_dir}"
print(stri)
os.system(stri)
new_files = [NMR_apo_pdb_split_dir + f for f in os.listdir(NMR_apo_pdb_split_dir) if f.endswith('.pdb')]
def remove_chain_from_file(file, chain):
with open(file, 'r') as f:
lines = f.readlines()
stri = ""
for line in lines:
if line.startswith('ATOM') and line[21] != chain:
continue
stri += line
with open(file, 'w') as f:
f.write(stri)
print("Removing chains from new files")
for file in new_files:
longest_chain = get_longest_chain(file)
remove_chain_from_file(file, longest_chain)
output_dir = f'NMR_apo_shift_predictions/'
outdirect = 'NMR_apo_for_CSpred/'
print(f"Creating directory {outdirect} if it does not exist")
if not(exists(outdirect)):
os.system('mkdir ' + outdirect)
print("Making files for CSpred with single chain.")
for f in tqdm(new_files):
last_residue_id = 0
current_residue_id = None
outs = ""
basename = f[f.rfind('/')+1:]
with open(f, 'r') as infi:
for line in infi:
if line.startswith('ATOM') or line.startswith('HETATM'):
residue_id = int(line[22:26].strip())
if residue_id != current_residue_id:
current_residue_id = residue_id
last_residue_id += 1
new_residue_id_str = str(last_residue_id).rjust(4, ' ')
updated_line = line[:21] + "A" + new_residue_id_str + line[26:]
outs += updated_line
elif not line.startswith('TER'):
outs += line
outfile = outdirect + basename
with open(outfile, 'w') as outf:
outf.write(outs)
def get_ph(pdb):
ph = -1
with open('./db_apo_cond.txt', 'r') as inf:
for l in inf:
this_pdb = l.split(' ')[0].split('/')[-1].split('.')[0].split('_')[0]
if this_pdb == pdb:
ph = float(l.split(' ')[1].strip())
if ph != -1:
return ph
if pdb == '1dmo':
return 7.5
print(pdb + " not found in db_apo_cond.txt")
raise
print("Generating NMR_apo_cond.txt")
string = ''
for f in tqdm(new_files):
pdb = f[f.rfind('/')+1:].split('.')[0].split('_')[0].split('-')[0]
ph = get_ph(pdb.lower())
basename = f[f.rfind('/')+1:]
string += outdirect + basename + ' ' + str(ph) + '\n'
print(string)
with open('NMR_apo_cond.txt', 'w') as fout:
fout.write(string)
print("CONTINUE IF YOU WANT TO CONVERT TO IUPAC")
if continue_prompt():
strings = []
for i, inf in tqdm(enumerate(listdir(outdirect))):
string = ""
string += 'load coo pdb ' + outdirect + inf + '\n'
string += 'to iupac\n'
string += 'write coo pdb ' + outdirect + inf + "\n"
with open('cmd.txt', 'w') as outf:
outf.write(string)
os.system('pdbstat -s < cmd.txt')