Skip to content
Permalink
b9fe25bf2d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
174 lines (148 sloc) 6.35 KB
import sys
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join
def determine_special_case(holo_model_path, pdb_id):
"""Determines the color and label for each data point based on model type"""
if 'exp_' + pdb_id.lower() in holo_model_path:
return 'green'
elif 'comp_' + pdb_id.lower() in holo_model_path:
return 'cyan'
elif 'v3_' in holo_model_path and 'dropout' in holo_model_path:
return 'blue'
elif 'v2_' in holo_model_path and 'dropout' in holo_model_path:
return 'pink'
elif 'dropout' in holo_model_path:
return 'red'
elif 'v2_' in holo_model_path:
return 'purple'
elif 'v3_' in holo_model_path:
return 'purple'
elif 'notemplate' in holo_model_path:
return 'orange'
elif 'multimer' in holo_model_path:
return 'yellow'
else:
return 'gray'
# Define colors and their labels
colors = {
'green': 'NMR',
'cyan': 'Baseline AF2',
'blue': 'AFS v3',
'pink': 'AFS v2',
'red': 'AFS v1',
'purple': 'AFS2 v2',
'orange': 'AF ALT',
'yellow': 'AFS2 v1/3',
'gray': 'NA'
}
# List of PDB IDs to process
pdb_ids = ['2jw1', '2lgk', '2lsk', '2law', '2kwv', '2mnu', '2mps', '5tp6', '5urn', '7ovc', '7jyn', '7jq8', '6h8c']
for pdb_id in pdb_ids:
print(f"Processing {pdb_id}...")
# Get CSP rank score file path
csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'
# Read the CSP rank scores file
try:
df = pd.read_csv(csp_rank_score_file)
except Exception as e:
print(f"Error reading CSP rank scores file for {pdb_id}: {e}")
continue
# Add special cases column
df['special_cases'] = df['holo_model_path'].apply(lambda x: determine_special_case(x, pdb_id))
# Create figure with 6 subplots in 3 rows, 2 columns
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3, 2, figsize=(16, 24))
# Add title with PDB ID
fig.suptitle(f'Bayesian Analysis for {pdb_id.upper()}\n', fontsize=24, y=0.95, weight='bold')
# Add Bayes equation on left margin
fig.text(0.5, 0.92, 'P(model|data) = P(model) * P(data|model)',
fontsize=18, ha='center', weight='bold')
# Top row: Posterior plots
# Plot 1: Bayes Score vs TM Score (left)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax1.scatter(subset['tm_score'], subset['consensus'] * subset['Confidence'],
alpha=0.5, label=label, color=case)
ax1.set_ylabel('P(model|data)\nBayes Score', fontsize=14, weight='bold')
ax1.set_xlabel('TM Score', fontsize=14, weight='bold')
ax1.set_title('Posterior (TM)', fontsize=16, weight='bold')
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.tick_params(labelsize=12)
# Plot 2: Bayes Score vs DockQ Score (right)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax2.scatter(subset['dockq_score'], subset['consensus'] * subset['Confidence'],
alpha=0.5, label=None, color=case)
ax2.set_ylabel('P(model|data)\nBayes Score', fontsize=14, weight='bold')
ax2.set_xlabel('DockQ Score', fontsize=14, weight='bold')
ax2.set_title('Posterior (DockQ)', fontsize=16, weight='bold')
ax2.grid(True, linestyle='--', alpha=0.7)
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.tick_params(labelsize=12)
# Middle row: Prior plots
# Plot 3: Confidence vs TM Score (left)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax3.scatter(subset['tm_score'], subset['Confidence'],
alpha=0.5, label=None, color=case)
ax3.set_ylabel('P(model)\nConfidence', fontsize=14, weight='bold')
ax3.set_xlabel('TM Score', fontsize=14, weight='bold')
ax3.set_title('Prior (TM)', fontsize=16, weight='bold')
ax3.grid(True, linestyle='--', alpha=0.7)
ax3.set_xlim(0, 1)
ax3.set_ylim(0, 1)
ax3.tick_params(labelsize=12)
# Plot 4: Confidence vs DockQ Score (right)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax4.scatter(subset['dockq_score'], subset['Confidence'],
alpha=0.5, label=None, color=case)
ax4.set_ylabel('P(model)\nConfidence', fontsize=14, weight='bold')
ax4.set_xlabel('DockQ Score', fontsize=14, weight='bold')
ax4.set_title('Prior (DockQ)', fontsize=16, weight='bold')
ax4.grid(True, linestyle='--', alpha=0.7)
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.tick_params(labelsize=12)
# Bottom row: Likelihood plots
# Plot 5: Consensus vs TM Score (left)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax5.scatter(subset['tm_score'], subset['consensus'],
alpha=0.5, label=None, color=case)
ax5.set_ylabel('P(data|model)\nConsensus Score', fontsize=14, weight='bold')
ax5.set_xlabel('TM Score', fontsize=14, weight='bold')
ax5.set_title('Likelihood (TM)', fontsize=16, weight='bold')
ax5.grid(True, linestyle='--', alpha=0.7)
ax5.set_xlim(0, 1)
ax5.set_ylim(0, 1)
ax5.tick_params(labelsize=12)
# Plot 6: Consensus vs DockQ Score (right)
for case, label in colors.items():
subset = df[df['special_cases'] == case]
ax6.scatter(subset['dockq_score'], subset['consensus'],
alpha=0.5, label=None, color=case)
ax6.set_ylabel('P(data|model)\nConsensus Score', fontsize=14, weight='bold')
ax6.set_xlabel('DockQ Score', fontsize=14, weight='bold')
ax6.set_title('Likelihood (DockQ)', fontsize=16, weight='bold')
ax6.grid(True, linestyle='--', alpha=0.7)
ax6.set_xlim(0, 1)
ax6.set_ylim(0, 1)
ax6.tick_params(labelsize=12)
# Adjust layout and add single legend
plt.subplots_adjust(right=0.85, hspace=0.3) # Make room for legend and adjust spacing
legend = fig.legend(title='Model Types', loc='center right',
bbox_to_anchor=(0.98, 0.5), fontsize=12)
legend.get_title().set_fontsize(14)
legend.get_title().set_weight('bold')
# Save the plot
plt.savefig(f'./Figures/bayes_components_{pdb_id.lower()}.png',
bbox_inches='tight', dpi=300)
plt.close()
print(f"Completed processing {pdb_id}")