model_select_bayes.py


from util import *
import shutil
import os
from os.path import basename

bound = '7jq8'
normalized_7jq8_clusters = {2: 0.06598934010659893, 8: 0.06054939450605494, 20: 0.05180948190518095, 4: 0.07371926280737193, 12: 0.06607933920660794, 14: 0.06216937830621694, 6: 0.06738932610673894, 5: 0.058719412805871944, 1: 0.06604933950660494, 15: 0.06678933210667894, 3: 0.06509934900650993, 10: 0.022649773502264978, 18: 0.05411945880541195, 16: 0.03615963840361596, 13: 0.03555964440355597, 11: 0.03449965500344997, 17: 0.03362966370336297, 7: 0.010409895901040989, 9: 0.03398966010339897, 19: 0.034619653803461964}


csv_file_path = './CLUSTERING_RESULTS/7JQ8_aligned_CSPREDB_TSNE_chain_B_data.csv'

data_source_file = CSP_Rank_Scores + './CSP_'+bound+'_CSpred.csv'
parsed_data = parse_csv(data_source_file)
holo_model_files = [data['holo_model_path'][data['holo_model_path'].rfind('/')+1:] for data in parsed_data]
holo_model_files_raw = [data['holo_model_path'] for data in parsed_data]
consensus_scores = []#[float(data['consensus']) for data in parsed_data]
Confidence_scores = []
dp_scores = []
recall_scores = []
precision_scores = []
plddt_scores = []
Q_scores = []
for data in parsed_data:
    try:
        conf = float(data['Confidence'])
        cons = float(data['consensus'])
        plddt = float(data['plddt'])
        consensus_scores.append(cons)
        #consensus_scores.append(cons)# * df / (cons + df))
        Confidence_scores.append(conf)
        plddt_scores.append(plddt)
    except:
        consensus_scores.append(0)
        Confidence_scores.append(0)
        plddt_scores.append(0)
    try:
        dp = float(data['DP'])
        precision = float(data['RPF_PRECISION'])
        recall = float(data['RPF_RECALL'])
        recall_scores.append(recall)
        precision_scores.append(precision)
        dp_scores.append(dp)
    except Exception as e:
        recall_scores.append(0)
        precision_scores.append(0)
        dp_scores.append(0)
    try:
        Q_score = float(data['Q-score'])
        Q_scores.append(Q_score)
    except:
        Q_score = 0
        Q_scores.append(0)

bayesian_selection_metric = consensus_scores

TSNE_file = csv_file_path
TSNE_data = parse_csv(TSNE_file)
TSNE_files = [ data['pdb_file'] for data in TSNE_data ]
TSNE_clusters = [ int(data['Cluster']) for data in TSNE_data ]
#print(TSNE_clusters)
print("getting TSNE cluster scores")
TSNE_cluster_scores = {}
TSNE_CSPRank_scores = {}
TSNE_cluster_files = {}
for i, pdb_file in enumerate(TSNE_files):
    cluster_number = TSNE_clusters[i]
    if cluster_number not in list(TSNE_cluster_scores):
        TSNE_cluster_scores[cluster_number] = []
        TSNE_cluster_files[cluster_number] = []
        TSNE_CSPRank_scores[cluster_number] = []
    try:
        index = holo_model_files.index(pdb_file)
    except:
        print("couldn't find " + pdb_file)
        continue
    TSNE_cluster_files[cluster_number].append(holo_model_files_raw[index])
    TSNE_CSPRank_scores[cluster_number].append(bayesian_selection_metric[index])
    if bayesian_selection_metric[index] <= 0:
        TSNE_cluster_scores[cluster_number].append(0)
    else:
        #TSNE_cluster_scores[cluster_number].append(math.sqrt(bayesian_selection_metric[index] * Confidence_scores[index]))
        TSNE_cluster_scores[cluster_number].append(bayesian_selection_metric[index] * Confidence_scores[index])

ensemble = []

for cluster in normalized_7jq8_clusters:
    number_of_models_from_cluster = int(normalized_7jq8_clusters[cluster] * 100)

    if number_of_models_from_cluster == 0:
        continue

    top_indices = sorted(range(len(TSNE_cluster_scores[cluster])), key=lambda i: TSNE_cluster_scores[cluster][i], reverse=True)[:number_of_models_from_cluster]
    for index in top_indices:
        ensemble.append(TSNE_cluster_files[cluster][index])

print(ensemble)

source_dir = './PDB_FILES/7JQ8_aligned/'
destination_dir = './PDB_FILES/7JQ8_MCMC_WEIGHTED/'

if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

for file in ensemble:
    source_file = file
    destination_file = os.path.join(destination_dir, basename(file))
    try:
        shutil.move(source_file, destination_file)
        print(f"Moved {file} to {destination_dir}")
    except Exception as e:
        print(f"Error moving {file}: {e}")

	from util import *
	import shutil
	import os
	from os.path import basename

	bound = '7jq8'
	normalized_7jq8_clusters = {2: 0.06598934010659893, 8: 0.06054939450605494, 20: 0.05180948190518095, 4: 0.07371926280737193, 12: 0.06607933920660794, 14: 0.06216937830621694, 6: 0.06738932610673894, 5: 0.058719412805871944, 1: 0.06604933950660494, 15: 0.06678933210667894, 3: 0.06509934900650993, 10: 0.022649773502264978, 18: 0.05411945880541195, 16: 0.03615963840361596, 13: 0.03555964440355597, 11: 0.03449965500344997, 17: 0.03362966370336297, 7: 0.010409895901040989, 9: 0.03398966010339897, 19: 0.034619653803461964}


	csv_file_path = './CLUSTERING_RESULTS/7JQ8_aligned_CSPREDB_TSNE_chain_B_data.csv'

	data_source_file = CSP_Rank_Scores + './CSP_'+bound+'_CSpred.csv'
	parsed_data = parse_csv(data_source_file)
	holo_model_files = [data['holo_model_path'][data['holo_model_path'].rfind('/')+1:] for data in parsed_data]
	holo_model_files_raw = [data['holo_model_path'] for data in parsed_data]
	consensus_scores = []#[float(data['consensus']) for data in parsed_data]
	Confidence_scores = []
	dp_scores = []
	recall_scores = []
	precision_scores = []
	plddt_scores = []
	Q_scores = []
	for data in parsed_data:
	try:
	conf = float(data['Confidence'])
	cons = float(data['consensus'])
	plddt = float(data['plddt'])
	consensus_scores.append(cons)
	#consensus_scores.append(cons)# * df / (cons + df))
	Confidence_scores.append(conf)
	plddt_scores.append(plddt)
	except:
	consensus_scores.append(0)
	Confidence_scores.append(0)
	plddt_scores.append(0)
	try:
	dp = float(data['DP'])
	precision = float(data['RPF_PRECISION'])
	recall = float(data['RPF_RECALL'])
	recall_scores.append(recall)
	precision_scores.append(precision)
	dp_scores.append(dp)
	except Exception as e:
	recall_scores.append(0)
	precision_scores.append(0)
	dp_scores.append(0)
	try:
	Q_score = float(data['Q-score'])
	Q_scores.append(Q_score)
	except:
	Q_score = 0
	Q_scores.append(0)

	bayesian_selection_metric = consensus_scores

	TSNE_file = csv_file_path
	TSNE_data = parse_csv(TSNE_file)
	TSNE_files = [ data['pdb_file'] for data in TSNE_data ]
	TSNE_clusters = [ int(data['Cluster']) for data in TSNE_data ]
	#print(TSNE_clusters)
	print("getting TSNE cluster scores")
	TSNE_cluster_scores = {}
	TSNE_CSPRank_scores = {}
	TSNE_cluster_files = {}
	for i, pdb_file in enumerate(TSNE_files):
	cluster_number = TSNE_clusters[i]
	if cluster_number not in list(TSNE_cluster_scores):
	TSNE_cluster_scores[cluster_number] = []
	TSNE_cluster_files[cluster_number] = []
	TSNE_CSPRank_scores[cluster_number] = []
	try:
	index = holo_model_files.index(pdb_file)
	except:
	print("couldn't find " + pdb_file)
	continue
	TSNE_cluster_files[cluster_number].append(holo_model_files_raw[index])
	TSNE_CSPRank_scores[cluster_number].append(bayesian_selection_metric[index])
	if bayesian_selection_metric[index] <= 0:
	TSNE_cluster_scores[cluster_number].append(0)
	else:
	#TSNE_cluster_scores[cluster_number].append(math.sqrt(bayesian_selection_metric[index] * Confidence_scores[index]))
	TSNE_cluster_scores[cluster_number].append(bayesian_selection_metric[index] * Confidence_scores[index])

	ensemble = []

	for cluster in normalized_7jq8_clusters:
	number_of_models_from_cluster = int(normalized_7jq8_clusters[cluster] * 100)

	if number_of_models_from_cluster == 0:
	continue

	top_indices = sorted(range(len(TSNE_cluster_scores[cluster])), key=lambda i: TSNE_cluster_scores[cluster][i], reverse=True)[:number_of_models_from_cluster]
	for index in top_indices:
	ensemble.append(TSNE_cluster_files[cluster][index])

	print(ensemble)

	source_dir = './PDB_FILES/7JQ8_aligned/'
	destination_dir = './PDB_FILES/7JQ8_MCMC_WEIGHTED/'

	if not os.path.exists(destination_dir):
	os.makedirs(destination_dir)

	for file in ensemble:
	source_file = file
	destination_file = os.path.join(destination_dir, basename(file))
	try:
	shutil.move(source_file, destination_file)
	print(f"Moved {file} to {destination_dir}")
	except Exception as e:
	print(f"Error moving {file}: {e}")