Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import sys
import pandas as pd
from util import *
from paths import *
import pickle
# function accomodates both .json format and .pkl format, calculates confidence according to
# Wallner, B. (2023b). Improved multimer prediction using massive sampling with AlphaFold in
# CASP15. PROTEINS: Structure, Function and Bioinformatics 91, 1734-1746.
# https://doi.org/10.1002/prot.26562.
def find_confidence(json_file_path):
if json_file_path.endswith('json'):
with open(json_file_path) as file:
try:
data = json.load(file)
iptm = round(data.get('iptm', 0), 2)
ptm = round(data.get('ptm', 0), 2)
confidence = round(0.8 * iptm + 0.2 * ptm, 2)
return confidence, ptm, iptm
except:
return None, None, None
elif json_file_path.endswith('pkl'):
data = pickle.load(open(json_file_path, 'rb'))
iptm = round(float(data.get('iptm', 0)), 2)
ptm = round(float(data.get('ptm', 0)), 2)
confidence = round(0.8 * iptm + 0.2 * ptm, 2)
return confidence, ptm, iptm
else:
print("BAD JSON FILE")
raise
def find_avg_plddt(pdb_file_path):
parser = PDBParser(QUIET=True)
structure = parser.get_structure('structure', pdb_file_path)
bfactor_dict = defaultdict(list)
for model in structure:
for chain in model:
for residue in chain:
if is_aa(residue, standard=True):
for atom in residue:
bfactor_dict[residue.get_id()].append(atom.get_bfactor())
# Calculate the average B-factor per residue
num_bfactor = 0.0
bfactor_sum = 0.0
for residue_id in sorted(bfactor_dict.keys()):
average_bfactor = sum(bfactor_dict[residue_id]) / len(bfactor_dict[residue_id])
num_bfactor += 1.0
bfactor_sum += average_bfactor
return bfactor_sum / num_bfactor
def get_PostProcess_pdb_file(pdb_file, pdb):
pdb_file_basename = ''
if pdb_file.find('min_') != -1:
pdb_file_basename = pdb_file[pdb_file.rfind('min_')+4:pdb_file.rfind('_af2')]
else:
pdb_file_basename = pdb_file
pdb_file = PDB_FILES + pdb.lower() +'/unrelaxed_'+pdb_file_basename+'.pdb'
#pdb_file = '/home/tiburon/Desktop/ROT4/PostProcess/'+pdb+'/unrelaxed_'+pdb_file_basename+'.pdb'
return pdb_file
if len(sys.argv) < 2 :
print("Usage: python script.py <bound>")
print('e.g python3 get_confidences.py 2nd1"')
sys.exit(1)
bound = sys.argv[1].lower()
# Input file
input_file = CSP_Rank_Scores + './CSP_'+bound.lower()+'_CSpred.csv'
#input_file = './CSP_'+bound+'_CSpred.csv'
# Read the input data
data = pd.read_csv(input_file)
# Directory containing zip files
zip_file_dir = PDB_FILES + bound.upper() + '_json/'
#zip_file_dir = '/home/tiburon/Desktop/ROT4/AFS_FINAL/'+bound+'_json/'
model_file_dir = PDB_FILES + bound.upper() + '/'
#model_file_dir = '/home/tiburon/Desktop/ROT4/AFS_FINAL/'+bound+'/'
# Iterate over each pdb in data
for index, pdb in tqdm(data['holo_model_path'].items()):
if pdb.find('multimer') != -1 and pdb.find('min') != -1:
basename_match = pdb[pdb.rfind('model_'):pdb.rfind('_af2')]
# Find the corresponding zip file
try:
zip_file = [zip_file_dir + f for f in listdir(zip_file_dir) if f.find(basename_match) != -1][0]
print(zip_file)
# Get confidence, ptm, iptm values
confidence, ptm, iptm = find_confidence(zip_file)
# Update the data DataFrame with the extracted values
data.at[index, 'Confidence'] = confidence
data.at[index, 'ptm'] = ptm
data.at[index, 'iptm'] = iptm
except Exception as e:
print(e)
print("couldn't find " + basename_match + ' in ' + zip_file_dir)
continue
raise
pdb_file = get_PostProcess_pdb_file(basename_match, bound)
plddt = find_avg_plddt(pdb_file)
data.at[index, 'plddt'] = plddt
# Save the updated data to a new file or overwrite the existing one
data.to_csv(input_file, index=False)
print("FINISHED getting ipTM/pTM, Confidence, and plDDT")