Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSP_Rank/get_RPF_stats_ES_ensembles.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
67 lines (52 sloc)
2.45 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
from tabulate import tabulate | |
# Get all directories in PDB_FILES that end with 'files' | |
pdb_dirs = sorted([d for d in os.listdir('./PDB_FILES') if os.path.isdir(os.path.join('./PDB_FILES', d)) and d.endswith('files')]) | |
# Initialize lists to store results for table | |
table_data = [] | |
headers = ['Directory', 'Avg DP', 'Avg RPF Recall', 'Avg RPF Precision', 'Files Processed'] | |
for dir_name in pdb_dirs: | |
dir_path = os.path.join('./PDB_FILES', dir_name) | |
pdb_id = dir_name.split('_')[0] | |
if pdb_id not in ['7jq8', '7jyn']: | |
continue | |
print(f"\nProcessing directory: {dir_path}") | |
# Get CSP rank score file path | |
csp_rank_score_file = f'./CSP_Rank_Scores/CSP_{pdb_id.lower()}_CSpred.csv' | |
# Read the CSP rank scores file if it exists | |
if os.path.exists(csp_rank_score_file): | |
try: | |
df = pd.read_csv(csp_rank_score_file) | |
# Get all PDB files in the directory | |
pdb_files = [f for f in os.listdir(dir_path) if f.endswith('.pdb')] | |
# Initialize lists to store metrics for all files | |
all_dp = [] | |
all_rpf_recall = [] | |
all_rpf_precision = [] | |
for pdb_file in pdb_files: | |
# Find matching row in dataframe | |
row = df[df['holo_model_path'].str.contains(pdb_file, regex=False)] | |
if not row.empty: | |
all_dp.append(row['DP'].mean()) | |
all_rpf_recall.append(row['RPF_RECALL'].mean()) | |
all_rpf_precision.append(row['RPF_PRECISION'].mean()) | |
else: | |
print(f"No matching data found for {pdb_file}") | |
if all_dp: # Only add to table if we have data | |
table_data.append([ | |
dir_name, | |
f"{sum(all_dp)/len(all_dp):.3f}", | |
f"{sum(all_rpf_recall)/len(all_rpf_recall):.3f}", | |
f"{sum(all_rpf_precision)/len(all_rpf_precision):.3f}", | |
len(all_dp) | |
]) | |
except Exception as e: | |
print(f"Error processing {csp_rank_score_file}: {e}") | |
else: | |
print(f"CSP rank score file not found for {pdb_id}") | |
# Sort table_data by directory name | |
table_data.sort(key=lambda x: x[0]) | |
# Print final table | |
print("\nResults Summary:") | |
print(tabulate(table_data, headers=headers, tablefmt='grid')) |