Skip to content
Permalink
b9fe25bf2d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
67 lines (52 sloc) 2.45 KB
import os
import pandas as pd
from tabulate import tabulate
# Get all directories in PDB_FILES that end with 'files'
pdb_dirs = sorted([d for d in os.listdir('./PDB_FILES') if os.path.isdir(os.path.join('./PDB_FILES', d)) and d.endswith('files')])
# Initialize lists to store results for table
table_data = []
headers = ['Directory', 'Avg DP', 'Avg RPF Recall', 'Avg RPF Precision', 'Files Processed']
for dir_name in pdb_dirs:
dir_path = os.path.join('./PDB_FILES', dir_name)
pdb_id = dir_name.split('_')[0]
if pdb_id not in ['7jq8', '7jyn']:
continue
print(f"\nProcessing directory: {dir_path}")
# Get CSP rank score file path
csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv'
# Read the CSP rank scores file if it exists
if os.path.exists(csp_rank_score_file):
try:
df = pd.read_csv(csp_rank_score_file)
# Get all PDB files in the directory
pdb_files = [f for f in os.listdir(dir_path) if f.endswith('.pdb')]
# Initialize lists to store metrics for all files
all_dp = []
all_rpf_recall = []
all_rpf_precision = []
for pdb_file in pdb_files:
# Find matching row in dataframe
row = df[df['holo_model_path'].str.contains(pdb_file, regex=False)]
if not row.empty:
all_dp.append(row['DP'].mean())
all_rpf_recall.append(row['RPF_RECALL'].mean())
all_rpf_precision.append(row['RPF_PRECISION'].mean())
else:
print(f"No matching data found for {pdb_file}")
if all_dp: # Only add to table if we have data
table_data.append([
dir_name,
f"{sum(all_dp)/len(all_dp):.3f}",
f"{sum(all_rpf_recall)/len(all_rpf_recall):.3f}",
f"{sum(all_rpf_precision)/len(all_rpf_precision):.3f}",
len(all_dp)
])
except Exception as e:
print(f"Error processing {csp_rank_score_file}: {e}")
else:
print(f"CSP rank score file not found for {pdb_id}")
# Sort table_data by directory name
table_data.sort(key=lambda x: x[0])
# Print final table
print("\nResults Summary:")
print(tabulate(table_data, headers=headers, tablefmt='grid'))