Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import os
from os import listdir
from tqdm import tqdm
import sys
from os.path import exists, isdir
from paths import *
from util import *
if len(sys.argv) < 2:
print('usage: prep_for_CSpred.py <pdb_id>')
#print('usage: prep_for_CSpred.py <directory_to_prep> <new_directory>')
raise
pdb = sys.argv[1]
# 9/1/24 remove _aligned suffix, possibility of running CS predictions using models with missing hydrogens
#directs = [PDB_FILES + pdb.upper() + '_aligned/', PDB_FILES + pdb.upper() + '_alt_aligned/']
directs = [PDB_FILES + pdb.upper() +'/', PDB_FILES + pdb.upper() + '_alt/']
files = []
for direct in directs:
if isdir(direct):
for f in listdir(direct):
files.append(direct + f)
experimental_direct = experimental_structures+pdb.lower()+'/'
for f in listdir(experimental_direct):
if f.endswith('.pdb'):
files.append(experimental_direct + f)
if exists(experimental_structures+'exp_'+pdb.lower()+'.pdb'):
files.append(experimental_structures+'exp_'+pdb.lower()+'.pdb')
computational_direct = computational_structures
for f in listdir(computational_direct):
if f.find(pdb.lower()+'.pdb') != -1 and f.endswith('.pdb'):
files.append(computational_direct + f)
# look for existing directory of output in f'{CS_Predictions}{pdb.lower()}_AFS_shift_predictions/'
# remove any matches in files with matching file.basename:
# Directory where the CS prediction outputs are stored
output_dir = f'{CS_Predictions}{pdb.lower()}_AFS_shift_predictions/'
# Check if the output directory exists
if os.path.isdir(output_dir):
# List all files in the output directory
output_files = os.listdir(output_dir)
# Set of base filenames from the output directory
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE::
output_basenames = {os.path.basename(file) for file in output_files}
# Filter out files from the `files` list that have matching basenames in the output directory
files = [file for file in files if os.path.basename(file) not in output_basenames and file.find('dropout') != -1 or file.find('exp') != -1 or file.find('comp') != -1]
print("number of files = " + str(len(files)) + ".")
if not(continue_prompt()):
raise
outdirect = PDB_FILES + pdb.lower()+'_for_CSpred/'
outdirect_for_cond_file = pdb.lower()+'_for_CSpred/'
if not(exists(outdirect)):
os.system('mkdir ' + outdirect)
print("Making files for CSpred with single chain.")
for f in tqdm(files):
last_residue_id = 0
current_residue_id = None
outs = ""
basename = f[f.rfind('/')+1:]
with open( f, 'r') as infi:
for line in infi:
if line.startswith('ATOM') or line.startswith('HETATM'):
residue_id = int(line[22:26].strip()) # Extract current residue ID, stripping spaces for safety
if residue_id != current_residue_id:
current_residue_id = residue_id
last_residue_id += 1 # Increment our continuous residue ID counter
# Reconstruct the line with the updated residue ID, keeping everything else the same
new_residue_id_str = str(last_residue_id).rjust(4)
updated_line = line[:21] + "A" + new_residue_id_str + line[26:]
outs += updated_line
elif not line.startswith('TER'):
outs += line
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE::
outfile = outdirect + basename
with open(outfile, 'w') as outf:
outf.write(outs)
def get_ph(pdb):
ph = -1
with open('./db_holo_cond.txt', 'r') as inf:
for l in inf:
this_pdb = l.split(' ')[0]
this_pdb = this_pdb[this_pdb.rfind('/')+1:this_pdb.rfind('.')]
if this_pdb == pdb:
ph = float(l.split(' ')[1].strip())
if ph != -1:
return ph
raise
ph = get_ph(pdb.lower())
string = ''
for f in tqdm(listdir(outdirect)):
string += outdirect_for_cond_file + f + ' ' + str(ph) + '\n'
with open(pdb + '_cond.txt', 'w') as fout:
fout.write(string)
print("CONTINUE IF YOU WANT TO CONVERT TO IUPAC")
if continue_prompt():
strings = []
for i,inf in tqdm(enumerate(listdir(outdirect))):
string = ""
string += 'load coo pdb ' + outdirect + inf + '\n'
string += 'to iupac\n'
string += 'write coo pdb ' + outdirect + inf +"\n"
with open('cmd.txt', 'w') as outf:
outf.write(string)
os.system('pdbstat -s < cmd.txt')