calc_CSP_RANK.py

# calc_CSP_metrics.py
# calculate Fmeasure, Precision, Recall for each apo/holo pair with CSP data
# determine binding residues using UCBShift predictions for holo form, spectra aligned to holo shifts

import sys
from os.path import basename
from util import *
from paths import *
from os.path import exists, isfile, isdir, join
from os import listdir
from tqdm import tqdm

data_source_file = './CSPRANK.csv'
method = "MONTE"
z_value = 0

def align_shifts_to_seq(aligned_sequence, sequence, shifts):
    new_shifts = []
    seq_index = 0
    for i in range(len(aligned_sequence)):
        if seq_index < len(sequence) and aligned_sequence[i] == sequence[seq_index]:
            new_shifts.append(shifts[seq_index])
            seq_index += 1
        else:
            new_shifts.append(-1)
    return new_shifts

if __name__ == "__main__":

    data = parse_csv(data_source_file)
    apos = [str(data['apo_bmrb']) for data in data]
    bounds = [data['holo_pdb'] for data in data]
    for i, holo in enumerate(bounds):
        apo = apos[i]
        #if holo not in ['2lgf', '2kri', '1klq', '2lv6']:
        #    continue
        # if holo != '2n23':
        #     continue

        print(apo)
        print(holo)
        def add_data(apo, holo):

            try:
                TP, FP, FN, TN = get_confusion(apo, holo, "MONTE", "UCBShift", "", "", structure_source = "NMR")
                F, MCC, consensus = get_F_MCC_cons(TP, FP, FN, TN)
                new_values = [F, MCC, consensus]
                new_columns = ["F_NMR", "MCC_NMR", "consensus_NMR"]
                update_row(data_source_file, apo, holo, new_values, new_columns)
            except Exception as e:
                print(e)
                if e == "TypeError: cannot unpack non-iterable NoneType object":
                    print("No NMR data")
                    return
                raise

            try:
                TP, FP, FN, TN = get_confusion(apo, holo, "MONTE", "UCBShift", "", "", structure_source = "AF2")
                F, MCC, consensus = get_F_MCC_cons(TP, FP, FN, TN)
                new_values = [F, MCC, consensus]
                new_columns = ["F_AF2", "MCC_AF2", "consensus_AF2"]
                update_row(data_source_file, apo, holo, new_values, new_columns)
            except Exception as e:
                print(e)
                if e == "TypeError: cannot unpack non-iterable NoneType object":
                    print("No NMR data")
                    return
                raise

        try:
            add_data(apo, holo)
        except Exception as e:
            print(e)
            if e == "TypeError: cannot unpack non-iterable NoneType object":
                print("No NMR data")
                continue
            raise
	# calc_CSP_metrics.py
	# calculate Fmeasure, Precision, Recall for each apo/holo pair with CSP data
	# determine binding residues using UCBShift predictions for holo form, spectra aligned to holo shifts

	import sys
	from os.path import basename
	from util import *
	from paths import *
	from os.path import exists, isfile, isdir, join
	from os import listdir
	from tqdm import tqdm

	data_source_file = './CSPRANK.csv'
	method = "MONTE"
	z_value = 0

	def align_shifts_to_seq(aligned_sequence, sequence, shifts):
	new_shifts = []
	seq_index = 0
	for i in range(len(aligned_sequence)):
	if seq_index < len(sequence) and aligned_sequence[i] == sequence[seq_index]:
	new_shifts.append(shifts[seq_index])
	seq_index += 1
	else:
	new_shifts.append(-1)
	return new_shifts

	if __name__ == "__main__":

	data = parse_csv(data_source_file)
	apos = [str(data['apo_bmrb']) for data in data]
	bounds = [data['holo_pdb'] for data in data]
	for i, holo in enumerate(bounds):
	apo = apos[i]
	#if holo not in ['2lgf', '2kri', '1klq', '2lv6']:
	# continue
	# if holo != '2n23':
	# continue

	print(apo)
	print(holo)
	def add_data(apo, holo):

	try:
	TP, FP, FN, TN = get_confusion(apo, holo, "MONTE", "UCBShift", "", "", structure_source = "NMR")
	F, MCC, consensus = get_F_MCC_cons(TP, FP, FN, TN)
	new_values = [F, MCC, consensus]
	new_columns = ["F_NMR", "MCC_NMR", "consensus_NMR"]
	update_row(data_source_file, apo, holo, new_values, new_columns)
	except Exception as e:
	print(e)
	if e == "TypeError: cannot unpack non-iterable NoneType object":
	print("No NMR data")
	return
	raise

	try:
	TP, FP, FN, TN = get_confusion(apo, holo, "MONTE", "UCBShift", "", "", structure_source = "AF2")
	F, MCC, consensus = get_F_MCC_cons(TP, FP, FN, TN)
	new_values = [F, MCC, consensus]
	new_columns = ["F_AF2", "MCC_AF2", "consensus_AF2"]
	update_row(data_source_file, apo, holo, new_values, new_columns)
	except Exception as e:
	print(e)
	if e == "TypeError: cannot unpack non-iterable NoneType object":
	print("No NMR data")
	return
	raise

	try:
	add_data(apo, holo)
	except Exception as e:
	print(e)
	if e == "TypeError: cannot unpack non-iterable NoneType object":
	print("No NMR data")
	continue
	raise