Skip to content
Permalink
b9fe25bf2d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
114 lines (92 sloc) 4.31 KB
import pandas as pd
import os
from util import *
from Bio.PDB import PDBList, PDBParser, PPBuilder
print("Reading data from CSPRANK.csv")
csv_file_path = '/home/tiburon/Desktop/ROT4/CSP_Rank/CSPRANK.csv'
data = pd.read_csv(csv_file_path)
NMR_holo_structure_dir = '/home/tiburon/Desktop/ROT4/CSP_Rank/NMR_holo_structure_files/'
print(f"Creating directory {NMR_holo_structure_dir} if it does not exist")
if not(exists(NMR_holo_structure_dir)):
os.system('mkdir ' + NMR_holo_structure_dir)
print("Processing holo_pdb IDs")
for i, pdb_id in enumerate(data['holo_pdb']):
print(f"Downloading PDB file for holo PDB ID {pdb_id}")
get_pdb_file(pdb_id, output_dir=NMR_holo_structure_dir)
old_file_path = f'{NMR_holo_structure_dir}{pdb_id}.pdb'
print("Listing files in NMR_holo_structure_dir")
files = [f for f in os.listdir(NMR_holo_structure_dir)]
NMR_holo_pdb_split_dir = './NMR_holo_pdb_split_files/'
print(f"Creating directory {NMR_holo_pdb_split_dir} if it does not exist")
if not os.path.exists(NMR_holo_pdb_split_dir):
os.makedirs(NMR_holo_pdb_split_dir)
new_files = []
print("Expanding PDB files")
for file in files:
basename = os.path.basename(file)
stri = f"python3 expand.py {NMR_holo_structure_dir}{file} {NMR_holo_pdb_split_dir}"
print(stri)
os.system(stri)
new_files = [NMR_holo_pdb_split_dir + f for f in os.listdir(NMR_holo_pdb_split_dir) if f.endswith('.pdb')]
outdirect = NMR_holo_pdb_split_dir
print(f"Creating directory {outdirect} if it does not exist")
if not(exists(outdirect)):
os.system('mkdir ' + outdirect)
def renumber_pdb_residues(input_pdb, output_pdb):
"""
Renumber residues for all chains in a PDB file so that each chain starts at residue 1
and increments sequentially.
Parameters
----------
input_pdb : str
Path to the input PDB file.
output_pdb : str
Path to the output PDB file.
"""
# Data structures to keep track of renumbering
current_chain = None
current_res_label = None # A tuple (chain, old_resnum, iCode)
chain_res_map = {} # Maps (chain, old_resnum, iCode) to new resnum
chain_current_res_count = {}
with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
for line in infile:
record_type = line[0:6].strip()
# We will focus on ATOM/HETATM lines for renumbering
if record_type in ('ATOM', 'HETATM'):
chain_id = line[21].strip() # Chain identifier
old_resnum = line[22:26].strip() # Original residue number
iCode = line[26].strip() # Insertion code if present
# Check if chain changed or first time seeing this chain
if chain_id not in chain_current_res_count:
chain_current_res_count[chain_id] = 0
# Create a residue label tuple
res_label = (chain_id, old_resnum, iCode)
# If we encounter a new residue within the chain, increment the count
if res_label not in chain_res_map:
chain_current_res_count[chain_id] += 1
chain_res_map[res_label] = chain_current_res_count[chain_id]
# Get the new residue number
new_resnum = chain_res_map[res_label]
# Construct the new line with updated residue number
# Residue number occupies columns 23–26 in PDB format (1-based indexing)
# We must ensure correct formatting: right-justified in a width of 4
new_line = line[:22] + f"{new_resnum:>4}" + line[26:]
outfile.write(new_line)
else:
# Non-ATOM/HETATM lines are written unchanged
outfile.write(line)
for file in tqdm(new_files):
new_file = file[:file.rfind('.')] + '_renumbered.pdb'
renumber_pdb_residues(file, new_file)
os.system('mv ' + new_file + ' ' + file)
print("CONTINUE IF YOU WANT TO CONVERT TO IUPAC")
if continue_prompt():
strings = []
for i, inf in tqdm(enumerate(listdir(outdirect))):
string = ""
string += 'load coo pdb ' + outdirect + inf + '\n'
string += 'to iupac\n'
string += 'write coo pdb ' + outdirect + inf + "\n"
with open('cmd.txt', 'w') as outf:
outf.write(string)
os.system('pdbstat -s < cmd.txt')