fig2_1.py

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from mpl_toolkits.axes_grid1 import make_axes_locatable

CSmethod = "UCBShift"
data_source_file = './CSPRANK.csv'
df = pd.read_csv(data_source_file)

# Calculate residuals
df['residual_1'] = df['F_AF2'] - df['F_NMR']
df['residual_2'] = df['MCC_AF2'] - df['MCC_NMR']
df['residual_3'] = df['consensus_AF2'] - df['consensus_NMR']

# Create figure and define a GridSpec for layout
fig = plt.figure(figsize=(15, 15))
gs = fig.add_gridspec(3, 1)

# ------------------ TOP PLOT (TM vs residual_3) -------------------
ax_top = fig.add_subplot(gs[1])
sns.scatterplot(ax=ax_top, x='AF2_TM', y='residual_3', data=df, alpha=0.7, color='blue')

# Reference line at y=0
ax_top.axhline(y=0, color='red', linestyle='--', linewidth=2)
# ax_top.set_xlabel('TM Score', fontsize=12)
# ax_top.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
ax_top.set_ylabel('')
ax_top.set_xlabel('')

# Add marginal histograms for the top plot
divider_top = make_axes_locatable(ax_top)
ax_top_histx = divider_top.append_axes("top", 0.5, pad=0.1, sharex=ax_top)
ax_top_histy = divider_top.append_axes("right", 0.5, pad=0.1, sharey=ax_top)

# Move the title above the top histogram
ax_top_histx.set_title('TM Score vs AF2 - PDB CSP_Rank Score', fontsize=12)
ax_top_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
                  verticalalignment='bottom', horizontalalignment='left', transform=ax_top.transAxes)
ax_top_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
                  verticalalignment='top', horizontalalignment='left', transform=ax_top.transAxes)

# Histogram for TM on top
ax_top_histx.hist(df['AF2_TM'], bins=30, color='gray', edgecolor='black')
ax_top_histx.axis('off')  # Hide axis labels

# Histogram for residual_3 on the right
ax_top_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
ax_top_histy.axis('off')  # Hide axis labels

# Adjust limits so that histograms align correctly
ax_top_histx.set_xlim(ax_top.get_xlim())
ax_top_histy.set_ylim(ax_top.get_ylim())

# ------------------ MIDDLE PLOT (DockQ vs residual_3) -------------------
ax_middle = fig.add_subplot(gs[2])
sns.scatterplot(ax=ax_middle, x='AF2_DockQ', y='residual_3', data=df, alpha=0.7, color='green')

ax_middle.axhline(y=0, color='red', linestyle='--', linewidth=2)
# ax_middle.set_xlabel('AF2 DockQ', fontsize=12)
# ax_middle.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
ax_middle.set_ylabel('')
ax_middle.set_xlabel('')

# Add marginal histograms for the middle plot
divider_middle = make_axes_locatable(ax_middle)
ax_middle_histx = divider_middle.append_axes("top", 0.5, pad=0.1, sharex=ax_middle)
ax_middle_histy = divider_middle.append_axes("right", 0.5, pad=0.1, sharey=ax_middle)

# Move the title above the middle histogram
ax_middle_histx.set_title('DockQ vs AF2 - PDB CSP_Rank Score', fontsize=12)
ax_middle_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
                     verticalalignment='bottom', horizontalalignment='left', transform=ax_middle.transAxes)
ax_middle_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
                     verticalalignment='top', horizontalalignment='left', transform=ax_middle.transAxes)

# Histogram for DockQ on top
ax_middle_histx.hist(df['AF2_DockQ'], bins=30, color='gray', edgecolor='black')
ax_middle_histx.axis('off')

# Histogram for residual_3 on the right
ax_middle_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
ax_middle_histy.axis('off')

ax_middle_histx.set_xlim(ax_middle.get_xlim())
ax_middle_histy.set_ylim(ax_middle.get_ylim())

# ------------------ BOTTOM PLOT (consensus_NMR vs consensus_AF2) -------------------
ax_bottom = fig.add_subplot(gs[0])
sns.scatterplot(ax=ax_bottom, x='consensus_NMR', y='consensus_AF2', data=df)
ax_bottom.plot([0, 1], [0, 1], color='red')
# ax_bottom.set_xlabel('PDB CSP_Rank Score', fontsize=12)
# ax_bottom.set_ylabel('AF2 CSP_Rank Score', fontsize=12)
ax_bottom.set_title('PDB vs AF2 CSP_Rank Score', fontsize=12)
ax_bottom.set_ylabel('')
ax_bottom.set_xlabel('')
ax_bottom.text(0.7, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=22,
               verticalalignment='bottom', horizontalalignment='left', transform=ax_bottom.transAxes)
ax_bottom.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=22,
               verticalalignment='top', horizontalalignment='left', transform=ax_bottom.transAxes)

# Histogram of residuals (not displayed on the figure, just calculations)
data = df['residual_3']
max_abs_value = max(abs(data.min()), abs(data.max()))
symmetric_range = (-max_abs_value, max_abs_value)
bin_width = (symmetric_range[1] - symmetric_range[0]) / 20
bins = np.arange(symmetric_range[0], symmetric_range[1] + bin_width, bin_width)

percent_above_threshold = (df['residual_3'] > 0.1).mean() * 100
print(f"Percent of rows where 'residual_3' value is > 0.1: {percent_above_threshold:.2f}%")
percent_below_threshold = (df['residual_3'] < -0.1).mean() * 100
print(f"Percent of rows where 'residual_3' value is < -0.1: {percent_below_threshold:.2f}%")
percent_between_thresholds = ((df['residual_3'] >= -0.1) & (df['residual_3'] <= 0.1)).mean() * 100
print(f"Percent of rows where 'residual_3' value is between -0.1 and 0.1: {percent_between_thresholds:.2f}%")

# Get number of rows where residual_3 > 0
num_rows_above_zero = (df['residual_3'] > 0).sum()
print(f"Number of rows where 'residual_3' value is > 0: {num_rows_above_zero}")

fig.subplots_adjust(hspace=0.3)


# plt.tight_layout()
plt.show()
	import matplotlib.pyplot as plt
	import pandas as pd
	import seaborn as sns
	import numpy as np
	from mpl_toolkits.axes_grid1 import make_axes_locatable

	CSmethod = "UCBShift"
	data_source_file = './CSPRANK.csv'
	df = pd.read_csv(data_source_file)

	# Calculate residuals
	df['residual_1'] = df['F_AF2'] - df['F_NMR']
	df['residual_2'] = df['MCC_AF2'] - df['MCC_NMR']
	df['residual_3'] = df['consensus_AF2'] - df['consensus_NMR']

	# Create figure and define a GridSpec for layout
	fig = plt.figure(figsize=(15, 15))
	gs = fig.add_gridspec(3, 1)

	# ------------------ TOP PLOT (TM vs residual_3) -------------------
	ax_top = fig.add_subplot(gs[1])
	sns.scatterplot(ax=ax_top, x='AF2_TM', y='residual_3', data=df, alpha=0.7, color='blue')

	# Reference line at y=0
	ax_top.axhline(y=0, color='red', linestyle='--', linewidth=2)
	# ax_top.set_xlabel('TM Score', fontsize=12)
	# ax_top.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
	ax_top.set_ylabel('')
	ax_top.set_xlabel('')

	# Add marginal histograms for the top plot
	divider_top = make_axes_locatable(ax_top)
	ax_top_histx = divider_top.append_axes("top", 0.5, pad=0.1, sharex=ax_top)
	ax_top_histy = divider_top.append_axes("right", 0.5, pad=0.1, sharey=ax_top)

	# Move the title above the top histogram
	ax_top_histx.set_title('TM Score vs AF2 - PDB CSP_Rank Score', fontsize=12)
	ax_top_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
	verticalalignment='bottom', horizontalalignment='left', transform=ax_top.transAxes)
	ax_top_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
	verticalalignment='top', horizontalalignment='left', transform=ax_top.transAxes)

	# Histogram for TM on top
	ax_top_histx.hist(df['AF2_TM'], bins=30, color='gray', edgecolor='black')
	ax_top_histx.axis('off') # Hide axis labels

	# Histogram for residual_3 on the right
	ax_top_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
	ax_top_histy.axis('off') # Hide axis labels

	# Adjust limits so that histograms align correctly
	ax_top_histx.set_xlim(ax_top.get_xlim())
	ax_top_histy.set_ylim(ax_top.get_ylim())

	# ------------------ MIDDLE PLOT (DockQ vs residual_3) -------------------
	ax_middle = fig.add_subplot(gs[2])
	sns.scatterplot(ax=ax_middle, x='AF2_DockQ', y='residual_3', data=df, alpha=0.7, color='green')

	ax_middle.axhline(y=0, color='red', linestyle='--', linewidth=2)
	# ax_middle.set_xlabel('AF2 DockQ', fontsize=12)
	# ax_middle.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
	ax_middle.set_ylabel('')
	ax_middle.set_xlabel('')

	# Add marginal histograms for the middle plot
	divider_middle = make_axes_locatable(ax_middle)
	ax_middle_histx = divider_middle.append_axes("top", 0.5, pad=0.1, sharex=ax_middle)
	ax_middle_histy = divider_middle.append_axes("right", 0.5, pad=0.1, sharey=ax_middle)

	# Move the title above the middle histogram
	ax_middle_histx.set_title('DockQ vs AF2 - PDB CSP_Rank Score', fontsize=12)
	ax_middle_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
	verticalalignment='bottom', horizontalalignment='left', transform=ax_middle.transAxes)
	ax_middle_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
	verticalalignment='top', horizontalalignment='left', transform=ax_middle.transAxes)

	# Histogram for DockQ on top
	ax_middle_histx.hist(df['AF2_DockQ'], bins=30, color='gray', edgecolor='black')
	ax_middle_histx.axis('off')

	# Histogram for residual_3 on the right
	ax_middle_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
	ax_middle_histy.axis('off')

	ax_middle_histx.set_xlim(ax_middle.get_xlim())
	ax_middle_histy.set_ylim(ax_middle.get_ylim())

	# ------------------ BOTTOM PLOT (consensus_NMR vs consensus_AF2) -------------------
	ax_bottom = fig.add_subplot(gs[0])
	sns.scatterplot(ax=ax_bottom, x='consensus_NMR', y='consensus_AF2', data=df)
	ax_bottom.plot([0, 1], [0, 1], color='red')
	# ax_bottom.set_xlabel('PDB CSP_Rank Score', fontsize=12)
	# ax_bottom.set_ylabel('AF2 CSP_Rank Score', fontsize=12)
	ax_bottom.set_title('PDB vs AF2 CSP_Rank Score', fontsize=12)
	ax_bottom.set_ylabel('')
	ax_bottom.set_xlabel('')
	ax_bottom.text(0.7, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=22,
	verticalalignment='bottom', horizontalalignment='left', transform=ax_bottom.transAxes)
	ax_bottom.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=22,
	verticalalignment='top', horizontalalignment='left', transform=ax_bottom.transAxes)

	# Histogram of residuals (not displayed on the figure, just calculations)
	data = df['residual_3']
	max_abs_value = max(abs(data.min()), abs(data.max()))
	symmetric_range = (-max_abs_value, max_abs_value)
	bin_width = (symmetric_range[1] - symmetric_range[0]) / 20
	bins = np.arange(symmetric_range[0], symmetric_range[1] + bin_width, bin_width)

	percent_above_threshold = (df['residual_3'] > 0.1).mean() * 100
	print(f"Percent of rows where 'residual_3' value is > 0.1: {percent_above_threshold:.2f}%")
	percent_below_threshold = (df['residual_3'] < -0.1).mean() * 100
	print(f"Percent of rows where 'residual_3' value is < -0.1: {percent_below_threshold:.2f}%")
	percent_between_thresholds = ((df['residual_3'] >= -0.1) & (df['residual_3'] <= 0.1)).mean() * 100
	print(f"Percent of rows where 'residual_3' value is between -0.1 and 0.1: {percent_between_thresholds:.2f}%")

	# Get number of rows where residual_3 > 0
	num_rows_above_zero = (df['residual_3'] > 0).sum()
	print(f"Number of rows where 'residual_3' value is > 0: {num_rows_above_zero}")

	fig.subplots_adjust(hspace=0.3)


	# plt.tight_layout()
	plt.show()