Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSP_Rank/get_NMR_holo_pdb_files.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
114 lines (92 sloc)
4.31 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
from util import * | |
from Bio.PDB import PDBList, PDBParser, PPBuilder | |
print("Reading data from CSPRANK.csv") | |
csv_file_path = '/home/tiburon/Desktop/ROT4/CSP_Rank/CSPRANK.csv' | |
data = pd.read_csv(csv_file_path) | |
NMR_holo_structure_dir = '/home/tiburon/Desktop/ROT4/CSP_Rank/NMR_holo_structure_files/' | |
print(f"Creating directory {NMR_holo_structure_dir} if it does not exist") | |
if not(exists(NMR_holo_structure_dir)): | |
os.system('mkdir ' + NMR_holo_structure_dir) | |
print("Processing holo_pdb IDs") | |
for i, pdb_id in enumerate(data['holo_pdb']): | |
print(f"Downloading PDB file for holo PDB ID {pdb_id}") | |
get_pdb_file(pdb_id, output_dir=NMR_holo_structure_dir) | |
old_file_path = f'{NMR_holo_structure_dir}{pdb_id}.pdb' | |
print("Listing files in NMR_holo_structure_dir") | |
files = [f for f in os.listdir(NMR_holo_structure_dir)] | |
NMR_holo_pdb_split_dir = './NMR_holo_pdb_split_files/' | |
print(f"Creating directory {NMR_holo_pdb_split_dir} if it does not exist") | |
if not os.path.exists(NMR_holo_pdb_split_dir): | |
os.makedirs(NMR_holo_pdb_split_dir) | |
new_files = [] | |
print("Expanding PDB files") | |
for file in files: | |
basename = os.path.basename(file) | |
stri = f"python3 expand.py {NMR_holo_structure_dir}{file} {NMR_holo_pdb_split_dir}" | |
print(stri) | |
os.system(stri) | |
new_files = [NMR_holo_pdb_split_dir + f for f in os.listdir(NMR_holo_pdb_split_dir) if f.endswith('.pdb')] | |
outdirect = NMR_holo_pdb_split_dir | |
print(f"Creating directory {outdirect} if it does not exist") | |
if not(exists(outdirect)): | |
os.system('mkdir ' + outdirect) | |
def renumber_pdb_residues(input_pdb, output_pdb): | |
""" | |
Renumber residues for all chains in a PDB file so that each chain starts at residue 1 | |
and increments sequentially. | |
Parameters | |
---------- | |
input_pdb : str | |
Path to the input PDB file. | |
output_pdb : str | |
Path to the output PDB file. | |
""" | |
# Data structures to keep track of renumbering | |
current_chain = None | |
current_res_label = None # A tuple (chain, old_resnum, iCode) | |
chain_res_map = {} # Maps (chain, old_resnum, iCode) to new resnum | |
chain_current_res_count = {} | |
with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile: | |
for line in infile: | |
record_type = line[0:6].strip() | |
# We will focus on ATOM/HETATM lines for renumbering | |
if record_type in ('ATOM', 'HETATM'): | |
chain_id = line[21].strip() # Chain identifier | |
old_resnum = line[22:26].strip() # Original residue number | |
iCode = line[26].strip() # Insertion code if present | |
# Check if chain changed or first time seeing this chain | |
if chain_id not in chain_current_res_count: | |
chain_current_res_count[chain_id] = 0 | |
# Create a residue label tuple | |
res_label = (chain_id, old_resnum, iCode) | |
# If we encounter a new residue within the chain, increment the count | |
if res_label not in chain_res_map: | |
chain_current_res_count[chain_id] += 1 | |
chain_res_map[res_label] = chain_current_res_count[chain_id] | |
# Get the new residue number | |
new_resnum = chain_res_map[res_label] | |
# Construct the new line with updated residue number | |
# Residue number occupies columns 23–26 in PDB format (1-based indexing) | |
# We must ensure correct formatting: right-justified in a width of 4 | |
new_line = line[:22] + f"{new_resnum:>4}" + line[26:] | |
outfile.write(new_line) | |
else: | |
# Non-ATOM/HETATM lines are written unchanged | |
outfile.write(line) | |
for file in tqdm(new_files): | |
new_file = file[:file.rfind('.')] + '_renumbered.pdb' | |
renumber_pdb_residues(file, new_file) | |
os.system('mv ' + new_file + ' ' + file) | |
print("CONTINUE IF YOU WANT TO CONVERT TO IUPAC") | |
if continue_prompt(): | |
strings = [] | |
for i, inf in tqdm(enumerate(listdir(outdirect))): | |
string = "" | |
string += 'load coo pdb ' + outdirect + inf + '\n' | |
string += 'to iupac\n' | |
string += 'write coo pdb ' + outdirect + inf + "\n" | |
with open('cmd.txt', 'w') as outf: | |
outf.write(string) | |
os.system('pdbstat -s < cmd.txt') |