Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSP_Rank/open_ES_ensemble.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
323 lines (265 sloc)
12.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# open_comp_exp_csp.py | |
from Bio.PDB import PDBParser, PDBIO, Select | |
import pymol | |
from pymol import cmd, stored | |
from os.path import exists | |
import os | |
from os import listdir | |
from os.path import isdir | |
import csv | |
from tqdm import tqdm | |
import os | |
from os import listdir | |
from os.path import exists, isdir | |
import mdtraj as md | |
import numpy as np | |
from util import * | |
import glob | |
from paths import * | |
def split_pdb_chains(pdb_filepath): | |
# Parse the input PDB file | |
parser = PDBParser() | |
structure = parser.get_structure('input_structure', pdb_filepath) | |
new_files = [] | |
# Iterate through chains and write each chain to a separate PDB file | |
io = PDBIO() | |
for model in structure: | |
for chain in model: | |
chain_id = chain.get_id() | |
output_filename = f'{os.path.splitext(pdb_filepath)[0]}_{chain_id}.pdb' | |
new_files.append(output_filename) | |
io.set_structure(chain) | |
io.save(output_filename, select=Select()) | |
return new_files | |
def open_structures(bound_file, apo, label, CSPs, bound_seq): | |
bound = bound_file[bound_file.rfind('/')+1:bound_file.rfind('/')+5].lower() | |
new_exp_file = bound_file[:bound_file.rfind('.')]+'_'+apo+'.pdb' | |
update_b_factors_longest_chain(bound_file, CSPs, bound_seq, new_exp_file) | |
pdb = bound | |
bound_path = new_exp_file | |
bound = pdb | |
pymol.finish_launching() | |
new_files = split_pdb_chains(bound_path) | |
file_sizes = [os.path.getsize(file_path) for file_path in new_files] | |
# load in the larger of the two files: | |
protein_file = new_files[file_sizes.index(max(file_sizes))] | |
peptide_file = new_files[file_sizes.index(min(file_sizes))] | |
#pymol.finish_launching() | |
cmd.load(bound_path, label) | |
if file_sizes.index(max(file_sizes)) == 0: | |
pymol.cmd.alter(label + " and chain A", "chain='prot"+label+"'") | |
pymol.cmd.alter(label + " and chain B", "chain='peptide"+label+"'") | |
else: | |
pymol.cmd.alter(label + " and chain B", "chain='prot"+label+"'") | |
pymol.cmd.alter(label + " and chain A", "chain='peptide"+label+"'") | |
# Zoom to chain A | |
cmd.orient() | |
cmd.viewport(800, 800) | |
return new_files | |
def hide_structures(label, chain): | |
selection_string = f"{label} and chain {chain}" | |
pymol.cmd.hide('everything', selection_string) | |
def show_structures(label, chain): | |
selection_string = f"{label} and chain {chain}" | |
pymol.cmd.show('cartoon', selection_string) | |
def hide_CSP_surface(label, chain): | |
selection_string = f"{label} and chain {chain}" | |
pymol.cmd.hide('surface', selection_string) | |
cmd.set('transparency', 0, selection_string) | |
def show_CSP_surface(label, chain, cutoffs): | |
selection_string = f"{label} and chain {chain}" | |
pymol.cmd.show('surface', selection_string) | |
cmd.set('transparency', 0.5, selection_string) | |
pymol.cmd.color('blue', selection_string) | |
# Define the colors for each range | |
colors = ['blue', 'cyan', 'white', 'yellow', 'orange', 'red'] | |
# Assuming cutoffs are sorted in increasing order and have length 4 | |
if len(cutoffs) != 5: | |
raise ValueError("Cutoffs list must contain exactly four values.") | |
for i, cutoff in enumerate(cutoffs): | |
lower_bound = cutoffs[i - 1] if i > 0 else 0 | |
upper_bound = cutoff | |
color_range_selection = f"({selection_string}) and b > {lower_bound} and b < {upper_bound}" | |
pymol.cmd.color(colors[i], color_range_selection) | |
# Color residues above the last cutoff | |
last_cutoff_selection = f"({selection_string}) and b > {cutoffs[-1]}" | |
pymol.cmd.color(colors[-1], last_cutoff_selection) | |
def show_CSP_ribbon(label, chain, cutoffs): | |
selection_string = f"{label} and chain {chain}" | |
# Set initial color for the ribbon | |
pymol.cmd.color('white', selection_string) | |
# Hide the surface and show the ribbon | |
pymol.cmd.hide('surface', selection_string) | |
#pymol.cmd.show('ribbon', selection_string) | |
# Define the colors for each range | |
colors = ['gray50', 'darksalmon', 'deepsalmon', 'firebrick'] | |
print(cutoffs) | |
# Check if the cutoffs list contains exactly five values | |
if len(cutoffs) != 3: | |
raise ValueError("Cutoffs list must contain exactly five values.") | |
color_range_selection = f"({selection_string}) and b < 0" | |
pymol.cmd.color('white', color_range_selection) | |
for i, cutoff in enumerate(cutoffs): | |
lower_bound = cutoffs[i - 1] if i > 0 else 0 | |
upper_bound = cutoff | |
color_range_selection = f"({selection_string}) and b > {lower_bound} and b < {upper_bound}" | |
pymol.cmd.color(colors[i], color_range_selection) | |
# Color residues above the last cutoff and show them as sticks | |
last_cutoff_selection = f"({selection_string}) and b > {cutoffs[-1]}" | |
pymol.cmd.color(colors[-1], last_cutoff_selection) | |
pymol.cmd.show('sticks', last_cutoff_selection) | |
def show_atoms_hide_ribbon(label, chain): | |
selection_string = f"{label} and chain {chain}" | |
pymol.cmd.hide('everything', selection_string) | |
#pymol.cmd.show('spheres', selection_string) | |
pymol.cmd.show('sticks', selection_string) | |
def save_view_as_png(png_file_path, ray=1, dpi=300): | |
# Enable ray tracing for higher quality | |
if ray: | |
pymol.cmd.ray() | |
# Set transparent background | |
pymol.cmd.set('ray_opaque_background', 0) | |
# Save PNG with specified dpi | |
def process_png_files(bound, images_directory="images"): | |
png_files = glob.glob(f"{images_directory}/{bound}*.png") | |
for png in png_files: | |
command = [ | |
"convert", png, | |
"-background", "white", | |
"-alpha", "remove", | |
"-alpha", "off", | |
png | |
] | |
subprocess.run(command) | |
def process_structure(file_pref): | |
pymol.cmd.color("red", file_pref + " and chain prot" + file_pref) | |
if file_pref.find('NMR') != -1: | |
pymol.cmd.color("yellow", file_pref + " and chain peptide" + file_pref) | |
elif file_pref == "AF2": | |
pymol.cmd.color("magenta", file_pref + " and chain peptide" + file_pref) | |
elif file_pref == 'AF3': | |
pymol.cmd.color("cyan", file_pref + " and chain peptide" + file_pref) | |
elif file_pref.find('ES') != -1: | |
pymol.cmd.color("blue", file_pref + " and chain peptide" + file_pref) | |
pymol.cmd.color("green", file_pref + " and chain prot" + file_pref) | |
pymol.cmd.orient() | |
pymol.cmd.show('sticks', file_pref + ' and chain peptide' + file_pref) | |
pymol.cmd.hide('cartoon', file_pref + ' and chain peptide' + file_pref) | |
def hide_residues(file_pref, well_defined_residues): | |
pymol.cmd.hide('everything', file_pref + ' and chain prot' + file_pref + ' and resi -'+str(well_defined_res[0])) | |
pymol.cmd.hide('everything', file_pref + ' and chain prot' + file_pref + ' and resi '+str(well_defined_res[1])+'-') | |
method = "MONTE" | |
CSmethod = "UCBShift" | |
data_source_file = './CSPRANK.csv' | |
images_directory = './images/' | |
parsed_data = pd.read_csv(data_source_file) | |
apos = [str(data) for data in parsed_data['apo_bmrb']] | |
apo_pdbs = [str(data) for data in parsed_data['apo_pdb']] | |
holos = [data.lower() for data in parsed_data['holo_pdb']] | |
well_defined_residues = [data for data in parsed_data['Well_Defined_Residues']] | |
match_sequences = [data for data in parsed_data['match_seq']] | |
pdbs = input('Provide bound pdb ( e.g. "7jq8" ) ') | |
pdb = pdbs.strip() | |
holo = pdb.lower() | |
apo = str(apos[holos.index(holo.lower())]).lower() | |
structures = ['AF2', 'ES', 'NMR'] | |
z_scores = [0, 1, 3] | |
match_seq = match_sequences[apos.index(apo)] | |
well_defined_res = well_defined_residues[apos.index(apo)] | |
well_defined_res = [int(resi.strip())-1 for resi in well_defined_res.split(':')[1].split('..')] | |
new_files = [] | |
ES_prefs = [] | |
for structure in structures: | |
CSPs = [] | |
CSP_cutoff = -1 | |
bound_file = "" | |
bound_seq = "" | |
file_pref = structure | |
print(structure) | |
def pymol_open(CSPs, CSP_cutoff, file_pref, bound_file): | |
cutoffs = [] | |
CSP_below_thresh = [ C for C in CSPs if C < CSP_cutoff and C > 0 ] | |
for z_score in z_scores: | |
cutoffs.append(calculate_z_score_threshold(CSP_below_thresh, z_score)) | |
genfiles = open_structures(bound_file, apo, file_pref, CSPs, bound_seq) | |
for f in genfiles: | |
new_files.append(f) | |
process_structure(file_pref) | |
show_CSP_ribbon(file_pref, 'prot' + file_pref, cutoffs) | |
# if file_pref != 'ES': | |
# hide_residues(file_pref, well_defined_res) | |
if structure == 'NMR': | |
# load NMR w/ real shifts | |
structure = 'NMR_real' | |
file_pref = structure | |
bound_file = experimental_structures + 'exp_' + holo + '.pdb' | |
CSPs, CSP_cutoff, bound_seq = calc_CSP_wrapper(apo, holo, well_defined_res, method=method, CSmethod='REAL', structure_source=structure, match_seq=match_seq) | |
pymol_open(CSPs, CSP_cutoff, file_pref, bound_file) | |
# load NMR w/ pred shifts | |
structure = 'NMR_pred' | |
file_pref = structure | |
bound_file = experimental_structures + 'exp_' + holo + '.pdb' | |
CSPs, CSP_cutoff, bound_seq = calc_CSP_wrapper(apo, holo, well_defined_res, method=method, CSmethod=CSmethod, structure_source=structure, match_seq=match_seq) | |
pymol_open(CSPs, CSP_cutoff, file_pref, bound_file) | |
elif structure == 'AF2': | |
bound_file = computational_structures + 'comp_' + holo + '.pdb' | |
CSPs, CSP_cutoff, bound_seq = calc_CSP_wrapper(apo, holo, well_defined_res, method=method, CSmethod=CSmethod, structure_source=structure, match_seq=match_seq) | |
pymol_open(CSPs, CSP_cutoff, file_pref, bound_file) | |
elif structure == 'AF3': | |
bound_file = NMR_holo_structure_dir + holo + '.pdb' | |
CSPs, CSP_cutoff, bound_seq = calc_CSP_wrapper(apo, holo, well_defined_res, method=method, CSmethod=CSmethod, structure_source=structure, match_seq=match_seq) | |
pymol_open(CSPs, CSP_cutoff, file_pref, bound_file) | |
elif structure == "ES": | |
possible_bound_dirs = [PDB_FILES + dire + '/' for dire in listdir(PDB_FILES) if dire.find(holo.lower()) != -1 and isdir(PDB_FILES + dire)] | |
print("Select a directory from the following list:") | |
for i, dire in enumerate(possible_bound_dirs): | |
print(f"{i}: {dire}") | |
selected_index = int(input("Enter the number corresponding to your choice: ")) | |
bound_dir = possible_bound_dirs[selected_index] | |
pdb_files = [ bound_dir + f for f in listdir(bound_dir) if f.endswith('af2.pdb')] | |
print(pdb_files) | |
for i, pdb_file in enumerate(pdb_files): | |
bound_file = pdb_file | |
file_pref = 'ES' + str(i) | |
ES_prefs.append(file_pref) | |
# bound_file = find_medoid_structure(pdb_files) # get medoid model of ES ensemble | |
bound_file_basename = bound_file[bound_file.rfind('/')+1:bound_file.rfind('.')] # return basename so we know which CSpredictions to load | |
CSPs, CSP_cutoff, bound_seq = calc_CSP_wrapper(apo, holo, well_defined_res, method=method, CSmethod=CSmethod, structure_source=structure, match_seq=match_seq, basename=bound_file_basename) | |
pymol_open(CSPs, CSP_cutoff, file_pref, bound_file) | |
else: | |
print("received malformed structure selection: " + structure) | |
continue | |
for file in new_files: | |
os.system('rm ' + file) | |
if 'AF2' in structures: | |
pymol.cmd.align('AF2 and chain protAF2', 'chain protNMR_real') | |
if 'AF3' in structures: | |
pymol.cmd.align('AF3 and chain protAF3', 'chain protNMR_real') | |
if 'ES' in structures: | |
for pref in ES_prefs: | |
pymol.cmd.align(pref + ' and chain prot' + pref, 'chain protNMR_real') | |
pymol.cmd.orient() | |
pymol.cmd.hide('everything', 'hydro') | |
if False: | |
print("continue if you want to create a movie from this pymol session.") | |
continue_prompt() | |
# Define the path and filename for the output movie | |
output_movie = images_directory + holo | |
print("generating movie for " + holo + " AFS prediction") | |
# Set the view and orientation of the structure | |
pymol.cmd.bg_color("white") | |
pymol.cmd.set('ray_trace_frames', 1) # Ensure frames are ray-traced | |
#pymol.cmd.set('ray_trace_mode', 3) # Improve ray-tracing quality | |
pymol.cmd.viewport(1920, 1080) # Set the viewport size to 1920x1080 for HD frames | |
# Create a movie rolling around the y-axis over 8 seconds | |
pymol.cmd.mset("1 x100") # Set the number of frames and frame rate | |
cmd.movie.add_roll(8.0,axis='y',start=1) | |
#cmd.movie.produce(output_movie, quality=90) | |
cmd.mpng(output_movie) | |
process_png_files(holo, images_directory="images") | |
print("using mencoder mpng --> avi") | |
os.system("mencoder -mc 0 -noskip -skiplimit 0 -ovc x264 -x264encopts preset=slow:crf=20 'mf://images/"+holo+"*.png' -mf type=png:fps=18 -o '"+images_directory+holo+"_pred.avi'") | |
# IF CONVERT DOESNT WORK THEN USE FFMPEG: | |
print("Using ffmpeg to convert avi --> gif") | |
os.system('ffmpeg -i \''+images_directory+holo+'_pred.avi\' -vf "fps=10,scale=960:-540:flags=lanczos,palettegen" -y palette.png') | |
os.system("ffmpeg -i '"+images_directory+holo+"_pred.avi' -i palette.png -filter_complex \"fps=10,scale=960:-540:flags=lanczos[x];[x][1:v]paletteuse=dither=none\" './images/"+holo+"_pred.gif'") |