AF2vsNMR.py

from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

NMR_source_file = './CSPRANK.csv'

data = pd.read_csv(NMR_source_file)
holo_pdbs = data['holo_pdb'].values
apo_bmrbs = data['apo_bmrb'].values
AF2_CSPRANK = data['consensus_AF2'].values
NMR_CSPRANK = data['consensus_NMR'].values

# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
holo_pdbs = holo_pdbs[valid_indices]
apo_bmrbs = apo_bmrbs[valid_indices]
AF2_CSPRANK = AF2_CSPRANK[valid_indices]
NMR_CSPRANK = NMR_CSPRANK[valid_indices]

assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."

# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 Ensemble CSP_RANK Scores', fontsize=50)
# plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
# plt.legend()
print("ENSEMBLE")
# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")
# Set the x and y axis limits to [0, 1]
plt.xlim(0.3, 0.9)
plt.ylim(0.3, 0.9)

# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)

# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')

# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot.png'
plt.savefig(join(output_dir, output_file))
# plt.show()

plt.cla()
plt.clf()

# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 CSP_RANK Scores', fontsize=50)
plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
plt.legend()

# Set the x and y axis limits to [0, 1]
plt.xlim(0, 1)
plt.ylim(0, 1)

# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)

# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')

# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot_labels.png'
plt.savefig(join(output_dir, output_file))
# plt.show()


############################################################################################################

NMR_source_file = './CSPRANK_top_rank.csv'

data = pd.read_csv(NMR_source_file)
holo_pdbs = data['holo_pdb'].values
apo_bmrbs = data['apo_bmrb'].values
AF2_CSPRANK = data['consensus_AF2'].values
NMR_CSPRANK = data['consensus_NMR'].values

# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
holo_pdbs = holo_pdbs[valid_indices]
apo_bmrbs = apo_bmrbs[valid_indices]
AF2_CSPRANK = AF2_CSPRANK[valid_indices]
NMR_CSPRANK = NMR_CSPRANK[valid_indices]

assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."

# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font

plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=50)
plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=50)
# plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
# plt.legend()

# Set the x and y axis limits to [0, 1]
plt.xlim(0.3, 0.9)
plt.ylim(0.3, 0.9)

# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)

# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')

# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot.png'
plt.savefig(join(output_dir, output_file))
# plt.show()
print("TOP_RANK")

# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")


plt.cla()
plt.clf()

# Create the scatter plot
plt.figure(figsize=(20, 20))
plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
from adjustText import adjust_text
# Plot the data as a scatter plot
# print(NMR_CSPRANK)
# print(AF2_CSPRANK)
texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
# Add labels and title with bigger font
plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=30)
plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=30)
plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
plt.legend()

# Set the x and y axis limits to [0, 1]
plt.xlim(0, 1)
plt.ylim(0, 1)

# Set the x and y axis ticks font size
plt.tick_params(axis='both', which='major', labelsize=30)

# Plot the x=y line in red
plt.plot([0, 1], [0, 1], 'r-', label='x=y')

# Save the figure
output_dir = './Figures/'
output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot_labels.png'
plt.savefig(join(output_dir, output_file))
# plt.show()


############################################################################################################
	from os import listdir
	from os.path import isfile, join
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd

	NMR_source_file = './CSPRANK.csv'

	data = pd.read_csv(NMR_source_file)
	holo_pdbs = data['holo_pdb'].values
	apo_bmrbs = data['apo_bmrb'].values
	AF2_CSPRANK = data['consensus_AF2'].values
	NMR_CSPRANK = data['consensus_NMR'].values

	# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
	valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
	holo_pdbs = holo_pdbs[valid_indices]
	apo_bmrbs = apo_bmrbs[valid_indices]
	AF2_CSPRANK = AF2_CSPRANK[valid_indices]
	NMR_CSPRANK = NMR_CSPRANK[valid_indices]

	assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."

	# Create the scatter plot
	plt.figure(figsize=(20, 20))
	plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
	from adjustText import adjust_text
	# Plot the data as a scatter plot
	# print(NMR_CSPRANK)
	# print(AF2_CSPRANK)
	# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
	# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
	# Add labels and title with bigger font
	plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
	plt.ylabel('AF2 Ensemble CSP_RANK Scores', fontsize=50)
	# plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
	plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
	plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
	# plt.legend()
	print("ENSEMBLE")
	# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
	num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
	print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
	num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
	print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")
	# Set the x and y axis limits to [0, 1]
	plt.xlim(0.3, 0.9)
	plt.ylim(0.3, 0.9)

	# Set the x and y axis ticks font size
	plt.tick_params(axis='both', which='major', labelsize=30)

	# Plot the x=y line in red
	plt.plot([0, 1], [0, 1], 'r-', label='x=y')

	# Save the figure
	output_dir = './Figures/'
	output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot.png'
	plt.savefig(join(output_dir, output_file))
	# plt.show()

	plt.cla()
	plt.clf()

	# Create the scatter plot
	plt.figure(figsize=(20, 20))
	plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
	from adjustText import adjust_text
	# Plot the data as a scatter plot
	# print(NMR_CSPRANK)
	# print(AF2_CSPRANK)
	texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
	adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
	# Add labels and title with bigger font
	plt.xlabel('NMR Ensemble CSP_RANK Scores', fontsize=50)
	plt.ylabel('AF2 CSP_RANK Scores', fontsize=50)
	plt.title('AF2 vs NMR ensemble average CSP_RANK', fontsize=40)
	plt.legend()

	# Set the x and y axis limits to [0, 1]
	plt.xlim(0, 1)
	plt.ylim(0, 1)

	# Set the x and y axis ticks font size
	plt.tick_params(axis='both', which='major', labelsize=30)

	# Plot the x=y line in red
	plt.plot([0, 1], [0, 1], 'r-', label='x=y')

	# Save the figure
	output_dir = './Figures/'
	output_file = 'AF2_vs_NMR_CSPRANK_scatter_plot_labels.png'
	plt.savefig(join(output_dir, output_file))
	# plt.show()


	############################################################################################################

	NMR_source_file = './CSPRANK_top_rank.csv'

	data = pd.read_csv(NMR_source_file)
	holo_pdbs = data['holo_pdb'].values
	apo_bmrbs = data['apo_bmrb'].values
	AF2_CSPRANK = data['consensus_AF2'].values
	NMR_CSPRANK = data['consensus_NMR'].values

	# Remove entries where either AF2_CSPRANK or NMR_CSPRANK is nan
	valid_indices = ~np.isnan(AF2_CSPRANK) & ~np.isnan(NMR_CSPRANK)
	holo_pdbs = holo_pdbs[valid_indices]
	apo_bmrbs = apo_bmrbs[valid_indices]
	AF2_CSPRANK = AF2_CSPRANK[valid_indices]
	NMR_CSPRANK = NMR_CSPRANK[valid_indices]

	assert len(NMR_CSPRANK) == len(AF2_CSPRANK), "Data length mismatch."

	# Create the scatter plot
	plt.figure(figsize=(20, 20))
	plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
	from adjustText import adjust_text
	# Plot the data as a scatter plot
	# print(NMR_CSPRANK)
	# print(AF2_CSPRANK)
	# texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
	# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
	# Add labels and title with bigger font

	plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=50)
	plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=50)
	# plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
	plt.text(0.7, 0.4, 'PDB>AF2', color='red', fontweight='bold', fontsize=50, verticalalignment='bottom', horizontalalignment='left' )
	plt.text(0.35, 0.8, 'AF2>PDB', color='red', fontweight='bold', fontsize=50, verticalalignment='top', horizontalalignment='left' )
	# plt.legend()

	# Set the x and y axis limits to [0, 1]
	plt.xlim(0.3, 0.9)
	plt.ylim(0.3, 0.9)

	# Set the x and y axis ticks font size
	plt.tick_params(axis='both', which='major', labelsize=30)

	# Plot the x=y line in red
	plt.plot([0, 1], [0, 1], 'r-', label='x=y')

	# Save the figure
	output_dir = './Figures/'
	output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot.png'
	plt.savefig(join(output_dir, output_file))
	# plt.show()
	print("TOP_RANK")

	# Get the number of rows where AF2_CSPRANK > NMR_CSPRANK
	num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK)
	print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK: {num_rows_AF2_greater}")
	num_rows_AF2_greater = np.sum(AF2_CSPRANK > NMR_CSPRANK - 0.1)
	print(f"Number of rows where AF2_CSPRANK > NMR_CSPRANK - 0.1: {num_rows_AF2_greater}")


	plt.cla()
	plt.clf()

	# Create the scatter plot
	plt.figure(figsize=(20, 20))
	plt.scatter(NMR_CSPRANK, AF2_CSPRANK, color='b', label='AF2 vs NMR Mean')
	from adjustText import adjust_text
	# Plot the data as a scatter plot
	# print(NMR_CSPRANK)
	# print(AF2_CSPRANK)
	texts = [plt.text(NMR_CSPRANK[i], AF2_CSPRANK[i], ' ' + txt, fontsize=20) for i, txt in enumerate(holo_pdbs)]
	adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
	# Add labels and title with bigger font
	plt.xlabel('NMR medoid CSP_RANK Scores', fontsize=30)
	plt.ylabel('AF2 top-rank CSP_RANK Scores', fontsize=30)
	plt.title('AF2 top-rank vs NMR medoid CSP_RANK', fontsize=40)
	plt.legend()

	# Set the x and y axis limits to [0, 1]
	plt.xlim(0, 1)
	plt.ylim(0, 1)

	# Set the x and y axis ticks font size
	plt.tick_params(axis='both', which='major', labelsize=30)

	# Plot the x=y line in red
	plt.plot([0, 1], [0, 1], 'r-', label='x=y')

	# Save the figure
	output_dir = './Figures/'
	output_file = 'AF2_vs_NMR_CSPRANK_top_rank_scatter_plot_labels.png'
	plt.savefig(join(output_dir, output_file))
	# plt.show()


	############################################################################################################