calc_tm.py

import sys
from tqdm import tqdm
from util import compute_structure_similarity

def calculate_tm_score(ref_pdb, target_pdb):
    """
    Calculate TM-score between reference and target PDB structures.

    Args:
        ref_pdb (str): Path to reference PDB file
        target_pdb (str): Path to target PDB file

    Returns:
        float: TM-score between the structures
    """
    return compute_structure_similarity(ref_pdb, target_pdb, multimer=True)


if len(sys.argv) < 2:
    print("Usage: python script.py <pdb_id>")
    sys.exit(1)

pdb_id = sys.argv[1].upper()

# Get CSP rank score file path
csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'

import os
from os import listdir
from os.path import isfile, join

# Get path to PDB files directory
pdb_files_dir = f'./PDB_FILES/{pdb_id.upper()}_aligned/'

# Check if directory exists
if not os.path.exists(pdb_files_dir):
    print(f"Error: Directory {pdb_files_dir} does not exist")
    sys.exit(1)

# Get list of all files in directory
aligned_files = [f for f in listdir(pdb_files_dir) if isfile(join(pdb_files_dir, f))]

# Get path to experimental reference structure
ref_file = f'./PDB_FILES/experimental_structures/exp_{pdb_id.lower()}.pdb'

# Check if reference file exists
if not os.path.exists(ref_file):
    print(f"Error: Reference file {ref_file} does not exist")
    sys.exit(1)


# Initialize array to store TM scores
tm_scores = []

# Loop through each file
for aligned_file in tqdm(aligned_files):
    aligned_file_path = os.path.join(pdb_files_dir, aligned_file)
    tm_score = calculate_tm_score(ref_file, aligned_file_path)
    tm_scores.append(tm_score)
    print(f"Processing {aligned_file}")

# Print TM scores
print(f"TM scores: {tm_scores}")

import pandas as pd

# Read the CSP rank scores file
try:
    df = pd.read_csv(csp_rank_score_file)
except Exception as e:
    print(f"Error reading CSP rank scores file: {e}")
    sys.exit(1)

# Create a dictionary mapping filenames to TM scores
tm_score_dict = {}
for score, aligned_file in zip(tm_scores, aligned_files):
    # Extract just the filename without path
    filename = os.path.basename(aligned_file)
    tm_score_dict[filename] = score

# Add TM scores to dataframe by matching filenames
df['tm_score'] = df['holo_model_path'].apply(lambda x: tm_score_dict.get(os.path.basename(x), None))

# Save updated dataframe
try:
    df.to_csv(csp_rank_score_file, index=False)
    print(f"Successfully added TM scores to {csp_rank_score_file}")
except Exception as e:
    print(f"Error saving updated CSP rank scores file: {e}")


# Create scatter plot of consensus vs TM scores
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(df['consensus'], df['tm_score'], alpha=0.5)
plt.xlabel('Consensus Score')
plt.ylabel('TM Score')
plt.title('Consensus Score vs TM Score')
plt.grid(True, linestyle='--', alpha=0.7)

# Save the plot
plt.savefig(f'consensus_tm_scatter_{pdb_id.lower()}.png')
plt.close()
	import sys
	from tqdm import tqdm
	from util import compute_structure_similarity

	def calculate_tm_score(ref_pdb, target_pdb):
	"""
	Calculate TM-score between reference and target PDB structures.

	Args:
	ref_pdb (str): Path to reference PDB file
	target_pdb (str): Path to target PDB file

	Returns:
	float: TM-score between the structures
	"""
	return compute_structure_similarity(ref_pdb, target_pdb, multimer=True)


	if len(sys.argv) < 2:
	print("Usage: python script.py <pdb_id>")
	sys.exit(1)

	pdb_id = sys.argv[1].upper()

	# Get CSP rank score file path
	csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'

	import os
	from os import listdir
	from os.path import isfile, join

	# Get path to PDB files directory
	pdb_files_dir = f'./PDB_FILES/{pdb_id.upper()}_aligned/'

	# Check if directory exists
	if not os.path.exists(pdb_files_dir):
	print(f"Error: Directory {pdb_files_dir} does not exist")
	sys.exit(1)

	# Get list of all files in directory
	aligned_files = [f for f in listdir(pdb_files_dir) if isfile(join(pdb_files_dir, f))]

	# Get path to experimental reference structure
	ref_file = f'./PDB_FILES/experimental_structures/exp_{pdb_id.lower()}.pdb'

	# Check if reference file exists
	if not os.path.exists(ref_file):
	print(f"Error: Reference file {ref_file} does not exist")
	sys.exit(1)


	# Initialize array to store TM scores
	tm_scores = []

	# Loop through each file
	for aligned_file in tqdm(aligned_files):
	aligned_file_path = os.path.join(pdb_files_dir, aligned_file)
	tm_score = calculate_tm_score(ref_file, aligned_file_path)
	tm_scores.append(tm_score)
	print(f"Processing {aligned_file}")

	# Print TM scores
	print(f"TM scores: {tm_scores}")

	import pandas as pd

	# Read the CSP rank scores file
	try:
	df = pd.read_csv(csp_rank_score_file)
	except Exception as e:
	print(f"Error reading CSP rank scores file: {e}")
	sys.exit(1)

	# Create a dictionary mapping filenames to TM scores
	tm_score_dict = {}
	for score, aligned_file in zip(tm_scores, aligned_files):
	# Extract just the filename without path
	filename = os.path.basename(aligned_file)
	tm_score_dict[filename] = score

	# Add TM scores to dataframe by matching filenames
	df['tm_score'] = df['holo_model_path'].apply(lambda x: tm_score_dict.get(os.path.basename(x), None))

	# Save updated dataframe
	try:
	df.to_csv(csp_rank_score_file, index=False)
	print(f"Successfully added TM scores to {csp_rank_score_file}")
	except Exception as e:
	print(f"Error saving updated CSP rank scores file: {e}")


	# Create scatter plot of consensus vs TM scores
	import matplotlib.pyplot as plt

	plt.figure(figsize=(10, 6))
	plt.scatter(df['consensus'], df['tm_score'], alpha=0.5)
	plt.xlabel('Consensus Score')
	plt.ylabel('TM Score')
	plt.title('Consensus Score vs TM Score')
	plt.grid(True, linestyle='--', alpha=0.7)

	# Save the plot
	plt.savefig(f'consensus_tm_scatter_{pdb_id.lower()}.png')
	plt.close()