plot_structure_comparison_funnels.py

import sys
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join
from paths import *

def determine_special_case(holo_model_path, pdb_id):
    """Determines the color and label for each data point based on model type"""
    if 'exp_' + pdb_id.lower() in holo_model_path:
        return 'green'
    elif 'comp_' + pdb_id.lower() in holo_model_path:
        return 'cyan'
    elif 'v3_' in holo_model_path and 'dropout' in holo_model_path:
        return 'blue'
    elif 'v2_' in holo_model_path and 'dropout' in holo_model_path:
        return 'pink'
    elif 'dropout' in holo_model_path:
        return 'red'
    elif 'v2_' in holo_model_path:
        return 'purple'
    elif 'v3_' in holo_model_path:
        return 'purple'
    elif 'notemplate' in holo_model_path:
        return 'orange'
    elif 'multimer' in holo_model_path:
        return 'yellow'
    else:
        return 'gray'

# Define colors and their labels
colors = {
    'green': 'NMR',
    'cyan': 'Baseline AF2',
    'blue': 'AFS v3',
    'pink': 'AFS v2',
    'red': 'AFS v1',
    'purple': 'AFS2 v2',
    'orange': 'AF ALT',
    'yellow': 'AFS2 v1/3',
    'gray': 'NA'
}

# List of PDB IDs to process
pdb_ids = ['2jw1', '2lgk', '2lsk', '2law', '2kwv', '2mnu', '2mps', '5tp6', '5urn', '7ovc', '7jyn', '7jq8', '6h8c']

for pdb_id in pdb_ids:
    print(f"Processing {pdb_id}...")

    # Get CSP rank score file path
    csp_rank_score_file = os.path.join(CSP_Rank_Scores, f'CSP_{pdb_id.lower()}_CSpred.csv')

    # Read the CSP rank scores file
    try:
        df = pd.read_csv(csp_rank_score_file)
    except Exception as e:
        print(f"Error reading CSP rank scores file for {pdb_id}: {e}")
        continue

    # Add special cases column
    df['special_cases'] = df['holo_model_path'].apply(lambda x: determine_special_case(x, pdb_id))

    # Create figure with 4 subplots (2x2)
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 16))

    # Plot 1: Consensus vs TM Score
    for case, label in colors.items():
        subset = df[df['special_cases'] == case]
        ax1.scatter(subset['tm_score'], subset['consensus'],
                   alpha=0.5, label=None, color=case)

    ax1.set_ylabel('Consensus Score')
    ax1.set_xlabel('TM Score')
    ax1.set_title('Consensus Score vs TM Score')
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)

    # Plot 2: Bayes vs TM Score
    for case, label in colors.items():
        subset = df[df['special_cases'] == case]
        ax2.scatter(subset['tm_score'], subset['consensus'] * subset['Confidence'],
                   alpha=0.5, label=label, color=case)

    ax2.set_ylabel('Bayes Score')
    ax2.set_xlabel('TM Score')
    ax2.set_title('Bayes Score vs TM Score')
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)

    # Plot 3: Consensus vs DockQ Score
    for case, label in colors.items():
        subset = df[df['special_cases'] == case]
        ax3.scatter(subset['dockq_score'], subset['consensus'],
                   alpha=0.5, label=None, color=case)

    ax3.set_ylabel('Consensus Score')
    ax3.set_xlabel('DockQ Score')
    ax3.set_title('Consensus Score vs DockQ Score')
    ax3.grid(True, linestyle='--', alpha=0.7)
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)

    # Plot 4: Bayes vs DockQ Score
    for case, label in colors.items():
        subset = df[df['special_cases'] == case]
        ax4.scatter(subset['dockq_score'], subset['consensus'] * subset['Confidence'],
                   alpha=0.5, label=None, color=case)

    ax4.set_ylabel('Bayes Score')
    ax4.set_xlabel('DockQ Score')
    ax4.set_title('Bayes Score vs DockQ Score')
    ax4.grid(True, linestyle='--', alpha=0.7)
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)

    # Adjust layout and add single legend
    plt.subplots_adjust(right=0.85)  # Make room for legend
    fig.legend(title='Model Types', loc='center right',
              bbox_to_anchor=(0.98, 0.5))

    # Add title for entire figure
    fig.suptitle(f'Structure Comparison Metrics for {pdb_id.upper()}',
                fontsize=16, y=0.95)

    # Create Figures directory if it doesn't exist
    figures_dir = os.path.join(working_dir, 'Figures')
    os.makedirs(figures_dir, exist_ok=True)

    # Save the plot
    plt.savefig(os.path.join(figures_dir, f'structure_comparison_metrics_{pdb_id.lower()}.png'),
                bbox_inches='tight', dpi=300)
    plt.close()

    print(f"Completed processing {pdb_id}")
	import sys
	from tqdm import tqdm
	import pandas as pd
	import matplotlib.pyplot as plt
	import os
	from os import listdir
	from os.path import isfile, join
	from paths import *

	def determine_special_case(holo_model_path, pdb_id):
	"""Determines the color and label for each data point based on model type"""
	if 'exp_' + pdb_id.lower() in holo_model_path:
	return 'green'
	elif 'comp_' + pdb_id.lower() in holo_model_path:
	return 'cyan'
	elif 'v3_' in holo_model_path and 'dropout' in holo_model_path:
	return 'blue'
	elif 'v2_' in holo_model_path and 'dropout' in holo_model_path:
	return 'pink'
	elif 'dropout' in holo_model_path:
	return 'red'
	elif 'v2_' in holo_model_path:
	return 'purple'
	elif 'v3_' in holo_model_path:
	return 'purple'
	elif 'notemplate' in holo_model_path:
	return 'orange'
	elif 'multimer' in holo_model_path:
	return 'yellow'
	else:
	return 'gray'

	# Define colors and their labels
	colors = {
	'green': 'NMR',
	'cyan': 'Baseline AF2',
	'blue': 'AFS v3',
	'pink': 'AFS v2',
	'red': 'AFS v1',
	'purple': 'AFS2 v2',
	'orange': 'AF ALT',
	'yellow': 'AFS2 v1/3',
	'gray': 'NA'
	}

	# List of PDB IDs to process
	pdb_ids = ['2jw1', '2lgk', '2lsk', '2law', '2kwv', '2mnu', '2mps', '5tp6', '5urn', '7ovc', '7jyn', '7jq8', '6h8c']

	for pdb_id in pdb_ids:
	print(f"Processing {pdb_id}...")

	# Get CSP rank score file path
	csp_rank_score_file = os.path.join(CSP_Rank_Scores, f'CSP_{pdb_id.lower()}_CSpred.csv')

	# Read the CSP rank scores file
	try:
	df = pd.read_csv(csp_rank_score_file)
	except Exception as e:
	print(f"Error reading CSP rank scores file for {pdb_id}: {e}")
	continue

	# Add special cases column
	df['special_cases'] = df['holo_model_path'].apply(lambda x: determine_special_case(x, pdb_id))

	# Create figure with 4 subplots (2x2)
	fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 16))

	# Plot 1: Consensus vs TM Score
	for case, label in colors.items():
	subset = df[df['special_cases'] == case]
	ax1.scatter(subset['tm_score'], subset['consensus'],
	alpha=0.5, label=None, color=case)

	ax1.set_ylabel('Consensus Score')
	ax1.set_xlabel('TM Score')
	ax1.set_title('Consensus Score vs TM Score')
	ax1.grid(True, linestyle='--', alpha=0.7)
	ax1.set_xlim(0, 1)
	ax1.set_ylim(0, 1)

	# Plot 2: Bayes vs TM Score
	for case, label in colors.items():
	subset = df[df['special_cases'] == case]
	ax2.scatter(subset['tm_score'], subset['consensus'] * subset['Confidence'],
	alpha=0.5, label=label, color=case)

	ax2.set_ylabel('Bayes Score')
	ax2.set_xlabel('TM Score')
	ax2.set_title('Bayes Score vs TM Score')
	ax2.grid(True, linestyle='--', alpha=0.7)
	ax2.set_xlim(0, 1)
	ax2.set_ylim(0, 1)

	# Plot 3: Consensus vs DockQ Score
	for case, label in colors.items():
	subset = df[df['special_cases'] == case]
	ax3.scatter(subset['dockq_score'], subset['consensus'],
	alpha=0.5, label=None, color=case)

	ax3.set_ylabel('Consensus Score')
	ax3.set_xlabel('DockQ Score')
	ax3.set_title('Consensus Score vs DockQ Score')
	ax3.grid(True, linestyle='--', alpha=0.7)
	ax3.set_xlim(0, 1)
	ax3.set_ylim(0, 1)

	# Plot 4: Bayes vs DockQ Score
	for case, label in colors.items():
	subset = df[df['special_cases'] == case]
	ax4.scatter(subset['dockq_score'], subset['consensus'] * subset['Confidence'],
	alpha=0.5, label=None, color=case)

	ax4.set_ylabel('Bayes Score')
	ax4.set_xlabel('DockQ Score')
	ax4.set_title('Bayes Score vs DockQ Score')
	ax4.grid(True, linestyle='--', alpha=0.7)
	ax4.set_xlim(0, 1)
	ax4.set_ylim(0, 1)

	# Adjust layout and add single legend
	plt.subplots_adjust(right=0.85) # Make room for legend
	fig.legend(title='Model Types', loc='center right',
	bbox_to_anchor=(0.98, 0.5))

	# Add title for entire figure
	fig.suptitle(f'Structure Comparison Metrics for {pdb_id.upper()}',
	fontsize=16, y=0.95)

	# Create Figures directory if it doesn't exist
	figures_dir = os.path.join(working_dir, 'Figures')
	os.makedirs(figures_dir, exist_ok=True)

	# Save the plot
	plt.savefig(os.path.join(figures_dir, f'structure_comparison_metrics_{pdb_id.lower()}.png'),
	bbox_inches='tight', dpi=300)
	plt.close()

	print(f"Completed processing {pdb_id}")