Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import os
from os import listdir
from tqdm import tqdm
import sys
from os.path import exists, isdir
if len(sys.argv) < 3:
print('usage: python3 prep_for_CSpred.py <directory_to_prep> <new_directory>')
raise
input_dir = sys.argv[1]
output_dir = sys.argv[2]
files = []
if isdir(input_dir):
for f in listdir(input_dir):
files.append(input_dir + f)
# Check if the output directory exists
if os.path.isdir(output_dir):
# List all files in the output directory
output_files = os.listdir(output_dir)
# Set of base filenames from the output directory
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE::
output_basenames = {os.path.basename(file) for file in output_files}
# Filter out files from the `files` list that have matching basenames in the output directory
files = [file for file in files if os.path.basename(file) not in output_basenames]
else:
print("Output directory does not exist. Creating it.")
os.makedirs(output_dir)
print("number of files = " + str(len(files)) + ".")
outdirect = output_dir
outdirect_for_cond_file = output_dir
if not(exists(outdirect)):
os.system('mkdir ' + outdirect)
print("Making files for CSpred with single chain.")
for f in tqdm(files):
if f.endswith('.pdb'):
last_residue_id = 0
current_residue_id = None
outs = ""
basename = f[f.rfind('/')+1:]
with open( f, 'r') as infi:
for line in infi:
if line.startswith('ATOM') or line.startswith('HETATM'):
residue_id = int(line[22:26].strip()) # Extract current residue ID, stripping spaces for safety
if residue_id != current_residue_id:
current_residue_id = residue_id
last_residue_id += 1 # Increment our continuous residue ID counter
# Reconstruct the line with the updated residue ID, keeping everything else the same
new_residue_id_str = str(last_residue_id).rjust(4)
updated_line = line[:21] + "A" + new_residue_id_str + line[26:]
outs += updated_line
elif not line.startswith('TER'):
outs += line
# IT IS POSSIBLE TO FITLER WHICH FILES GO INTO THE DIRECTORY BOUND FOR NMRBOX. YOU CAN DO SO HERE::
outfile = outdirect + basename
with open(outfile, 'w') as outf:
outf.write(outs)
ph = -1
while True:
try:
ph = float(input("Enter pH of solution (e.g. 7.0): "))
break
except:
continue
string = ''
for f in tqdm(listdir(outdirect)):
string += outdirect_for_cond_file + f + ' ' + str(ph) + '\n'
with open('CSpred_cond.txt', 'w') as fout:
fout.write(string)
# Run pdbstat on the files
for i,inf in tqdm(enumerate(listdir(outdirect))):
string = ""
string += 'load coo pdb ' + outdirect + inf + '\n'
string += 'to iupac\n'
string += 'write coo pdb ' + outdirect + inf +"\n"
with open('cmd.txt', 'w') as outf:
outf.write(string)
os.system('pdbstat -s < cmd.txt')
# Now, move the output directory and batch script to NMRBOX e.g.:
# scp CSpred_cond.txt tbenavides@phosphorus.nmrbox.org:~/CSP/
# scp -r ./output_dir/ tbenavides@phosphorus.nmrbox.org:~/CSP/
# Then, run the following command on NMRBOX:
# tbenavides@phosphorus.nmrbox.org:~/CSP/ $ mkdir shift_predictions
# tbenavides@phosphorus.nmrbox.org:~/CSP/ $ CSpred -b CSpred_cond.txt -o ./shift_predictions/