Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSP_Rank/fig2_1.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
124 lines (100 sloc)
5.58 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
import seaborn as sns | |
import numpy as np | |
from mpl_toolkits.axes_grid1 import make_axes_locatable | |
CSmethod = "UCBShift" | |
data_source_file = './CSPRANK.csv' | |
df = pd.read_csv(data_source_file) | |
# Calculate residuals | |
df['residual_1'] = df['F_AF2'] - df['F_NMR'] | |
df['residual_2'] = df['MCC_AF2'] - df['MCC_NMR'] | |
df['residual_3'] = df['consensus_AF2'] - df['consensus_NMR'] | |
# Create figure and define a GridSpec for layout | |
fig = plt.figure(figsize=(15, 15)) | |
gs = fig.add_gridspec(3, 1) | |
# ------------------ TOP PLOT (TM vs residual_3) ------------------- | |
ax_top = fig.add_subplot(gs[1]) | |
sns.scatterplot(ax=ax_top, x='AF2_TM', y='residual_3', data=df, alpha=0.7, color='blue') | |
# Reference line at y=0 | |
ax_top.axhline(y=0, color='red', linestyle='--', linewidth=2) | |
# ax_top.set_xlabel('TM Score', fontsize=12) | |
# ax_top.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12) | |
ax_top.set_ylabel('') | |
ax_top.set_xlabel('') | |
# Add marginal histograms for the top plot | |
divider_top = make_axes_locatable(ax_top) | |
ax_top_histx = divider_top.append_axes("top", 0.5, pad=0.1, sharex=ax_top) | |
ax_top_histy = divider_top.append_axes("right", 0.5, pad=0.1, sharey=ax_top) | |
# Move the title above the top histogram | |
ax_top_histx.set_title('TM Score vs AF2 - PDB CSP_Rank Score', fontsize=12) | |
ax_top_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16, | |
verticalalignment='bottom', horizontalalignment='left', transform=ax_top.transAxes) | |
ax_top_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16, | |
verticalalignment='top', horizontalalignment='left', transform=ax_top.transAxes) | |
# Histogram for TM on top | |
ax_top_histx.hist(df['AF2_TM'], bins=30, color='gray', edgecolor='black') | |
ax_top_histx.axis('off') # Hide axis labels | |
# Histogram for residual_3 on the right | |
ax_top_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black') | |
ax_top_histy.axis('off') # Hide axis labels | |
# Adjust limits so that histograms align correctly | |
ax_top_histx.set_xlim(ax_top.get_xlim()) | |
ax_top_histy.set_ylim(ax_top.get_ylim()) | |
# ------------------ MIDDLE PLOT (DockQ vs residual_3) ------------------- | |
ax_middle = fig.add_subplot(gs[2]) | |
sns.scatterplot(ax=ax_middle, x='AF2_DockQ', y='residual_3', data=df, alpha=0.7, color='green') | |
ax_middle.axhline(y=0, color='red', linestyle='--', linewidth=2) | |
# ax_middle.set_xlabel('AF2 DockQ', fontsize=12) | |
# ax_middle.set_ylabel('AF2 - PDB CSP_Rank Score', fontsize=12) | |
ax_middle.set_ylabel('') | |
ax_middle.set_xlabel('') | |
# Add marginal histograms for the middle plot | |
divider_middle = make_axes_locatable(ax_middle) | |
ax_middle_histx = divider_middle.append_axes("top", 0.5, pad=0.1, sharex=ax_middle) | |
ax_middle_histy = divider_middle.append_axes("right", 0.5, pad=0.1, sharey=ax_middle) | |
# Move the title above the middle histogram | |
ax_middle_histx.set_title('DockQ vs AF2 - PDB CSP_Rank Score', fontsize=12) | |
ax_middle_histx.text(0.1, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=16, | |
verticalalignment='bottom', horizontalalignment='left', transform=ax_middle.transAxes) | |
ax_middle_histx.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=16, | |
verticalalignment='top', horizontalalignment='left', transform=ax_middle.transAxes) | |
# Histogram for DockQ on top | |
ax_middle_histx.hist(df['AF2_DockQ'], bins=30, color='gray', edgecolor='black') | |
ax_middle_histx.axis('off') | |
# Histogram for residual_3 on the right | |
ax_middle_histy.hist(df['residual_3'], bins=30, orientation='horizontal', color='gray', edgecolor='black') | |
ax_middle_histy.axis('off') | |
ax_middle_histx.set_xlim(ax_middle.get_xlim()) | |
ax_middle_histy.set_ylim(ax_middle.get_ylim()) | |
# ------------------ BOTTOM PLOT (consensus_NMR vs consensus_AF2) ------------------- | |
ax_bottom = fig.add_subplot(gs[0]) | |
sns.scatterplot(ax=ax_bottom, x='consensus_NMR', y='consensus_AF2', data=df) | |
ax_bottom.plot([0, 1], [0, 1], color='red') | |
# ax_bottom.set_xlabel('PDB CSP_Rank Score', fontsize=12) | |
# ax_bottom.set_ylabel('AF2 CSP_Rank Score', fontsize=12) | |
ax_bottom.set_title('PDB vs AF2 CSP_Rank Score', fontsize=12) | |
ax_bottom.set_ylabel('') | |
ax_bottom.set_xlabel('') | |
ax_bottom.text(0.7, 0.1, 'PDB>AF2', color='red', fontweight='bold', fontsize=22, | |
verticalalignment='bottom', horizontalalignment='left', transform=ax_bottom.transAxes) | |
ax_bottom.text(0.1, 0.9, 'AF2>PDB', color='red', fontweight='bold', fontsize=22, | |
verticalalignment='top', horizontalalignment='left', transform=ax_bottom.transAxes) | |
# Histogram of residuals (not displayed on the figure, just calculations) | |
data = df['residual_3'] | |
max_abs_value = max(abs(data.min()), abs(data.max())) | |
symmetric_range = (-max_abs_value, max_abs_value) | |
bin_width = (symmetric_range[1] - symmetric_range[0]) / 20 | |
bins = np.arange(symmetric_range[0], symmetric_range[1] + bin_width, bin_width) | |
percent_above_threshold = (df['residual_3'] > 0.1).mean() * 100 | |
print(f"Percent of rows where 'residual_3' value is > 0.1: {percent_above_threshold:.2f}%") | |
percent_below_threshold = (df['residual_3'] < -0.1).mean() * 100 | |
print(f"Percent of rows where 'residual_3' value is < -0.1: {percent_below_threshold:.2f}%") | |
percent_between_thresholds = ((df['residual_3'] >= -0.1) & (df['residual_3'] <= 0.1)).mean() * 100 | |
print(f"Percent of rows where 'residual_3' value is between -0.1 and 0.1: {percent_between_thresholds:.2f}%") | |
# Get number of rows where residual_3 > 0 | |
num_rows_above_zero = (df['residual_3'] > 0).sum() | |
print(f"Number of rows where 'residual_3' value is > 0: {num_rows_above_zero}") | |
fig.subplots_adjust(hspace=0.3) | |
# plt.tight_layout() | |
plt.show() |