Skip to content
Permalink
b9fe25bf2d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
193 lines (157 sloc) 6.87 KB
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
NMR_source_file = './CSPRANK.csv'
data = pd.read_csv(NMR_source_file)
holo_pdbs = data['holo_pdb'].values
apo_bmrbs = data['apo_bmrb'].values
AF2_CSPRANK = data['consensus_AF2'].values
NMR_CSPRANK = data['consensus_NMR'].values
# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
holo_pdbs = holo_pdbs[valid_indices]
apo_bmrbs = apo_bmrbs[valid_indices]
AF2_CSPRANK = AF2_CSPRANK[valid_indices]
NMR_CSPRANK = NMR_CSPRANK[valid_indices]
assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."
# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 Ensemble CSP_RANK Scores', fontsize=50)
# plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
# plt.legend()
print("ENSEMBLE")
# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")
# Set the x and y axis limits to [0, 1]
plt.xlim(0.3, 0.9)
plt.ylim(0.3, 0.9)
# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)
# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')
# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot.png'
plt.savefig(join(output_dir, output_file))
# plt.show()
plt.cla()
plt.clf()
# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 CSP_RANK Scores', fontsize=50)
plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
plt.legend()
# Set the x and y axis limits to [0, 1]
plt.xlim(0, 1)
plt.ylim(0, 1)
# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)
# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')
# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot_labels.png'
plt.savefig(join(output_dir, output_file))
# plt.show()
############################################################################################################
NMR_source_file = './CSPRANK_top_rank.csv'
data = pd.read_csv(NMR_source_file)
holo_pdbs = data['holo_pdb'].values
apo_bmrbs = data['apo_bmrb'].values
AF2_CSPRANK = data['consensus_AF2'].values
NMR_CSPRANK = data['consensus_NMR'].values
# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
holo_pdbs = holo_pdbs[valid_indices]
apo_bmrbs = apo_bmrbs[valid_indices]
AF2_CSPRANK = AF2_CSPRANK[valid_indices]
NMR_CSPRANK = NMR_CSPRANK[valid_indices]
assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."
# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=50)
# plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
# plt.legend()
# Set the x and y axis limits to [0, 1]
plt.xlim(0.3, 0.9)
plt.ylim(0.3, 0.9)
# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)
# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')
# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot.png'
plt.savefig(join(output_dir, output_file))
# plt.show()
print("TOP_RANK")
# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")
plt.cla()
plt.clf()
# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=30)
plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=30)
plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
plt.legend()
# Set the x and y axis limits to [0, 1]
plt.xlim(0, 1)
plt.ylim(0, 1)
# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)
# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')
# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot_labels.png'
plt.savefig(join(output_dir, output_file))
# plt.show()
############################################################################################################