Skip to content
Permalink
b9fe25bf2d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
124 lines (100 sloc) 5.58 KB
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from mpl_toolkits.axes_grid1 import make_axes_locatable
CSmethod = "UCBShift"
data_source_file = './CSPRANK.csv'
df = pd.read_csv(data_source_file)
# Calculate residuals
df['residual_1'] = df['F_AF2'] - df['F_NMR']
df['residual_2'] = df['MCC_AF2'] - df['MCC_NMR']
df['residual_3'] = df['consensus_AF2'] - df['consensus_NMR']
# Create figure and define a GridSpec for layout
fig = plt.figure(figsize=(15, 15))
gs = fig.add_gridspec(3, 1)
# ------------------ TOP PLOT (TM vs residual_3) -------------------
ax_top = fig.add_subplot(gs[1])
sns.scatterplot(ax=ax_top, x='AF2_TM', y='residual_3', data=df, alpha=0.7, color='blue')
# Reference line at y=0
ax_top.axhline(y=0, color='red', linestyle='--', linewidth=2)
# ax_top.set_xlabel('TM Score', fontsize=12)
# ax_top.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
ax_top.set_ylabel('')
ax_top.set_xlabel('')
# Add marginal histograms for the top plot
divider_top = make_axes_locatable(ax_top)
ax_top_histx = divider_top.append_axes("top", 0.5, pad=0.1, sharex=ax_top)
ax_top_histy = divider_top.append_axes("right", 0.5, pad=0.1, sharey=ax_top)
# Move the title above the top histogram
ax_top_histx.set_title('TM Score vs AF2 - PDB CSP_Rank Score', fontsize=12)
ax_top_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
verticalalignment='bottom', horizontalalignment='left', transform=ax_top.transAxes)
ax_top_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
verticalalignment='top', horizontalalignment='left', transform=ax_top.transAxes)
# Histogram for TM on top
ax_top_histx.hist(df['AF2_TM'], bins=30, color='gray', edgecolor='black')
ax_top_histx.axis('off') # Hide axis labels
# Histogram for residual_3 on the right
ax_top_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
ax_top_histy.axis('off') # Hide axis labels
# Adjust limits so that histograms align correctly
ax_top_histx.set_xlim(ax_top.get_xlim())
ax_top_histy.set_ylim(ax_top.get_ylim())
# ------------------ MIDDLE PLOT (DockQ vs residual_3) -------------------
ax_middle = fig.add_subplot(gs[2])
sns.scatterplot(ax=ax_middle, x='AF2_DockQ', y='residual_3', data=df, alpha=0.7, color='green')
ax_middle.axhline(y=0, color='red', linestyle='--', linewidth=2)
# ax_middle.set_xlabel('AF2 DockQ', fontsize=12)
# ax_middle.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12)
ax_middle.set_ylabel('')
ax_middle.set_xlabel('')
# Add marginal histograms for the middle plot
divider_middle = make_axes_locatable(ax_middle)
ax_middle_histx = divider_middle.append_axes("top", 0.5, pad=0.1, sharex=ax_middle)
ax_middle_histy = divider_middle.append_axes("right", 0.5, pad=0.1, sharey=ax_middle)
# Move the title above the middle histogram
ax_middle_histx.set_title('DockQ vs AF2 - PDB CSP_Rank Score', fontsize=12)
ax_middle_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16,
verticalalignment='bottom', horizontalalignment='left', transform=ax_middle.transAxes)
ax_middle_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16,
verticalalignment='top', horizontalalignment='left', transform=ax_middle.transAxes)
# Histogram for DockQ on top
ax_middle_histx.hist(df['AF2_DockQ'], bins=30, color='gray', edgecolor='black')
ax_middle_histx.axis('off')
# Histogram for residual_3 on the right
ax_middle_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black')
ax_middle_histy.axis('off')
ax_middle_histx.set_xlim(ax_middle.get_xlim())
ax_middle_histy.set_ylim(ax_middle.get_ylim())
# ------------------ BOTTOM PLOT (consensus_NMR vs consensus_AF2) -------------------
ax_bottom = fig.add_subplot(gs[0])
sns.scatterplot(ax=ax_bottom, x='consensus_NMR', y='consensus_AF2', data=df)
ax_bottom.plot([0, 1], [0, 1], color='red')
# ax_bottom.set_xlabel('PDB CSP_Rank Score', fontsize=12)
# ax_bottom.set_ylabel('AF2 CSP_Rank Score', fontsize=12)
ax_bottom.set_title('PDB vs AF2 CSP_Rank Score', fontsize=12)
ax_bottom.set_ylabel('')
ax_bottom.set_xlabel('')
ax_bottom.text(0.7, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=22,
verticalalignment='bottom', horizontalalignment='left', transform=ax_bottom.transAxes)
ax_bottom.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=22,
verticalalignment='top', horizontalalignment='left', transform=ax_bottom.transAxes)
# Histogram of residuals (not displayed on the figure, just calculations)
data = df['residual_3']
max_abs_value = max(abs(data.min()), abs(data.max()))
symmetric_range = (-max_abs_value, max_abs_value)
bin_width = (symmetric_range[1] - symmetric_range[0]) / 20
bins = np.arange(symmetric_range[0], symmetric_range[1] + bin_width, bin_width)
percent_above_threshold = (df['residual_3'] > 0.1).mean() * 100
print(f"Percent of rows where 'residual_3' value is > 0.1: {percent_above_threshold:.2f}%")
percent_below_threshold = (df['residual_3'] < -0.1).mean() * 100
print(f"Percent of rows where 'residual_3' value is < -0.1: {percent_below_threshold:.2f}%")
percent_between_thresholds = ((df['residual_3'] >= -0.1) & (df['residual_3'] <= 0.1)).mean() * 100
print(f"Percent of rows where 'residual_3' value is between -0.1 and 0.1: {percent_between_thresholds:.2f}%")
# Get number of rows where residual_3 > 0
num_rows_above_zero = (df['residual_3'] > 0).sum()
print(f"Number of rows where 'residual_3' value is > 0: {num_rows_above_zero}")
fig.subplots_adjust(hspace=0.3)
# plt.tight_layout()
plt.show()