Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSP_Rank/prep_for_CSpred_AF2_holo.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
110 lines (94 sloc)
4.03 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from os import listdir | |
from tqdm import tqdm | |
import sys | |
from os.path import exists, isdir | |
from paths import * | |
from util import * | |
# 9/1/24 remove _aligned suffix, possibility of running CS predictions using models with missing hydrogens | |
directs = [AF2_holo_structure_dir] | |
files = [] | |
for direct in directs: | |
if isdir(direct): | |
for f in listdir(direct): | |
files.append(direct + f) | |
# look for existing directory of output in f'{CS_Predictions}{pdb.lower()}_AFS_shift_predictions/' | |
# remove any matches in files with matching file.basename: | |
# Directory where the CS prediction outputs are stored | |
output_dir = f'AF2_holo_shift_predictions/' | |
# Check if the output directory exists | |
if os.path.isdir(output_dir): | |
# List all files in the output directory | |
output_files = os.listdir(output_dir) | |
# Set of base filenames from the output directory | |
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE:: | |
output_basenames = {os.path.basename(file) for file in output_files} | |
# Filter out files from the `files` list that have matching basenames in the output directory | |
files = [file for file in files if os.path.basename(file) not in output_basenames and file.find('dropout') != -1 or file.find('exp') != -1 or file.find('comp') != -1] | |
print("number of files = " + str(len(files)) + ".") | |
if not(continue_prompt()): | |
raise | |
outdirect = PDB_FILES +'AF2_holo_for_CSpred/' | |
outdirect_for_cond_file = 'AF2_holo_for_CSpred/' | |
if not(exists(outdirect)): | |
os.system('mkdir ' + outdirect) | |
print("Making files for CSpred with single chain.") | |
for f in tqdm(files): | |
last_residue_id = 0 | |
current_residue_id = None | |
outs = "" | |
basename = f[f.rfind('/')+1:] | |
with open( f, 'r') as infi: | |
for line in infi: | |
if line.startswith('ATOM') or line.startswith('HETATM'): | |
residue_id = int(line[22:26].strip()) # Extract current residue ID, stripping spaces for safety | |
if residue_id != current_residue_id: | |
if current_residue_id is not None and residue_id != current_residue_id + 1: | |
last_residue_id += 1 # Increment our continuous residue ID counter | |
current_residue_id = residue_id | |
last_residue_id += 1 # Increment our continuous residue ID counter | |
# Reconstruct the line with the updated residue ID, keeping everything else the same | |
new_residue_id_str = str(last_residue_id).rjust(4) | |
updated_line = line[:21] + "A" + new_residue_id_str + line[26:] | |
outs += updated_line | |
elif not line.startswith('TER'): | |
outs += line | |
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE:: | |
outfile = outdirect + basename | |
with open(outfile, 'w') as outf: | |
outf.write(outs) | |
def get_ph(pdb): | |
ph = -1 | |
with open('./db_holo_cond.txt', 'r') as inf: | |
for l in inf: | |
this_pdb = l.split(' ')[0].split('/')[-1].split('.')[0].split('_')[0] | |
if this_pdb == pdb: | |
ph = float(l.split(' ')[1].strip()) | |
if ph != -1: | |
return ph | |
if pdb == '1dmo': | |
return 7.5 | |
print(pdb + " not found in db_apo_cond.txt") | |
raise | |
raise | |
string = '' | |
for f in tqdm(files): | |
pdb = f[f.rfind('/')+1:].split('.')[0].split('_')[0] | |
ph = get_ph(pdb.lower()) | |
if ph == None: | |
continue | |
basename = f[f.rfind('/')+1:] | |
string += outdirect_for_cond_file + basename + ' ' + str(ph) + '\n' | |
with open('AF2_holo_cond.txt', 'w') as fout: | |
fout.write(string) | |
print("CONTINUE IF YOU WANT TO CONVERT TO IUPAC") | |
if continue_prompt(): | |
strings = [] | |
for i,inf in tqdm(enumerate(listdir(outdirect))): | |
string = "" | |
string += 'load coo pdb ' + outdirect + inf + '\n' | |
string += 'to iupac\n' | |
string += 'write coo pdb ' + outdirect + inf +"\n" | |
with open('cmd.txt', 'w') as outf: | |
outf.write(string) | |
os.system('pdbstat -s < cmd.txt') |