plot_dockq_funnel.py

import sys
from tqdm import tqdm
from util import compute_structure_similarity

if len(sys.argv) < 2:
    print("Usage: python script.py <pdb_id>")
    sys.exit(1)

pdb_id = sys.argv[1].upper()

# Get CSP rank score file path
csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'

import os
from os import listdir
from os.path import isfile, join
import pandas as pd

# Read the CSP rank scores file
try:
    df = pd.read_csv(csp_rank_score_file)
except Exception as e:
    print(f"Error reading CSP rank scores file: {e}")
    sys.exit(1)

# Define a function to determine the special case color
def determine_special_case(holo_model_path):
    if 'exp_' + pdb_id.lower() in holo_model_path:
        return 'green'
    elif 'comp_' + pdb_id.lower() in holo_model_path:
        return 'cyan'
    elif 'v3_' in holo_model_path and 'dropout' in holo_model_path:
        return 'blue'
    elif 'v2_' in holo_model_path and 'dropout' in holo_model_path:
        return 'pink'
    elif 'dropout' in holo_model_path:
        return 'red'
    elif 'v2_' in holo_model_path:
        return 'purple'
    elif 'v3_' in holo_model_path:
        return 'purple'
    elif 'notemplate' in holo_model_path:
        return 'orange'
    elif 'multimer' in holo_model_path:
        return 'yellow'
    else:
        return 'NA'

# Apply the function to each row in the dataframe
df['special_cases'] = df['holo_model_path'].apply(determine_special_case)

# Create scatter plot of consensus vs dockq scores, colored by special cases
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))

# Define colors for each special case
colors = {
    'green': 'NMR',
    'cyan': 'Baseline AF2',
    'blue': 'AFS v3',
    'pink': 'AFS v2',
    'red': 'AFS v1',
    'purple': 'AFS2 v2',
    'orange': 'AF ALT',
    'yellow': 'AFS2 v1/3',
    'gray': 'NA'
}

# Plot each special case with its corresponding color
for case, color in colors.items():
    subset = df[df['special_cases'] == case]

    #plt.scatter(subset['dockq_score'], subset['consensus'], alpha=0.5, label=color, color=case)
    plt.scatter(subset['dockq_score'], subset['consensus'] * subset['Confidence'], alpha=0.5, label=color, color=case)

plt.ylabel('Bayes Score')
plt.xlabel('DockQ Score')
plt.title('Bayes Score vs dockq Score')
plt.grid(True, linestyle='--', alpha=0.7)
# Adjust figure size to accommodate legend
# Adjust figure size to add more buffer for legend
plt.gcf().set_size_inches(12, 6)  # Make figure wider to accommodate legend
plt.subplots_adjust(right=0.7)   # Adjust right margin to make room for legend

plt.legend(title='Special Cases', loc='center left', bbox_to_anchor=(1.15, 0.5))

# Save the plot with adjusted bbox to include legend
plt.savefig(f'./Figures/bayes_dockq_scatter_{pdb_id.lower()}.png', bbox_inches='tight', dpi=300)
#plt.show()
plt.close()
	import sys
	from tqdm import tqdm
	from util import compute_structure_similarity

	if len(sys.argv) < 2:
	print("Usage: python script.py <pdb_id>")
	sys.exit(1)

	pdb_id = sys.argv[1].upper()

	# Get CSP rank score file path
	csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'

	import os
	from os import listdir
	from os.path import isfile, join
	import pandas as pd

	# Read the CSP rank scores file
	try:
	df = pd.read_csv(csp_rank_score_file)
	except Exception as e:
	print(f"Error reading CSP rank scores file: {e}")
	sys.exit(1)

	# Define a function to determine the special case color
	def determine_special_case(holo_model_path):
	if 'exp_' + pdb_id.lower() in holo_model_path:
	return 'green'
	elif 'comp_' + pdb_id.lower() in holo_model_path:
	return 'cyan'
	elif 'v3_' in holo_model_path and 'dropout' in holo_model_path:
	return 'blue'
	elif 'v2_' in holo_model_path and 'dropout' in holo_model_path:
	return 'pink'
	elif 'dropout' in holo_model_path:
	return 'red'
	elif 'v2_' in holo_model_path:
	return 'purple'
	elif 'v3_' in holo_model_path:
	return 'purple'
	elif 'notemplate' in holo_model_path:
	return 'orange'
	elif 'multimer' in holo_model_path:
	return 'yellow'
	else:
	return 'NA'

	# Apply the function to each row in the dataframe
	df['special_cases'] = df['holo_model_path'].apply(determine_special_case)

	# Create scatter plot of consensus vs dockq scores, colored by special cases
	import matplotlib.pyplot as plt

	plt.figure(figsize=(10, 6))

	# Define colors for each special case
	colors = {
	'green': 'NMR',
	'cyan': 'Baseline AF2',
	'blue': 'AFS v3',
	'pink': 'AFS v2',
	'red': 'AFS v1',
	'purple': 'AFS2 v2',
	'orange': 'AF ALT',
	'yellow': 'AFS2 v1/3',
	'gray': 'NA'
	}

	# Plot each special case with its corresponding color
	for case, color in colors.items():
	subset = df[df['special_cases'] == case]

	#plt.scatter(subset['dockq_score'], subset['consensus'], alpha=0.5, label=color, color=case)
	plt.scatter(subset['dockq_score'], subset['consensus'] * subset['Confidence'], alpha=0.5, label=color, color=case)

	plt.ylabel('Bayes Score')
	plt.xlabel('DockQ Score')
	plt.title('Bayes Score vs dockq Score')
	plt.grid(True, linestyle='--', alpha=0.7)
	# Adjust figure size to accommodate legend
	# Adjust figure size to add more buffer for legend
	plt.gcf().set_size_inches(12, 6) # Make figure wider to accommodate legend
	plt.subplots_adjust(right=0.7) # Adjust right margin to make room for legend

	plt.legend(title='Special Cases', loc='center left', bbox_to_anchor=(1.15, 0.5))

	# Save the plot with adjusted bbox to include legend
	plt.savefig(f'./Figures/bayes_dockq_scatter_{pdb_id.lower()}.png', bbox_inches='tight', dpi=300)
	#plt.show()
	plt.close()