Skip to content
Permalink
5ab4ed8e4a
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
101 lines (90 sloc) 3.8 KB
import os
import re
import subprocess
import sys
import pandas as pd
import csv
import ast
def parse_list(value):
try:
return ast.literal_eval(value)
except ValueError:
return value
except SyntaxError:
return value
def parse_csv(file_name):
data = []
with open(file_name, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data.append({k: parse_list(v) for k, v in row.items()})
return data
def update_row(csv_filename, apo, bound, new_values, new_columns):
try:
# Load the DataFrame if the CSV file exists
df = pd.read_csv(csv_filename, low_memory=False)
except (pd.errors.EmptyDataError, FileNotFoundError):
# Create an empty DataFrame if the CSV file is empty or doesn't exist
df = pd.DataFrame()
data_dict = {col: val for col, val in zip(new_columns, new_values)}
#print(data_dict)
#print(data_dict.items())
# Check if 'apo' and 'bound' columns exist
if 'apo_bmrb' not in df.columns or 'holo_model_path' not in df.columns:
df = df._append(data_dict, ignore_index=True)
else:
# Update or create the row
row_index = df[(df['holo_model_path'] == bound)].index
print("UPDATING ROW INDEX : " + str(row_index))
if not row_index.empty:
for col, val in data_dict.items():
try:
df.loc[row_index[0], col] = val
except:
data_dict['apo_bmrb'] = apo
data_dict['holo_model_path'] = bound
df = df._append(data_dict, ignore_index=True)
else:
data_dict['apo_bmrb'] = apo
data_dict['holo_model_path'] = bound
df = df._append(data_dict, ignore_index=True)
# Save the DataFrame back to the CSV file
df.to_csv(csv_filename, index=False)
if len(sys.argv) < 2:
print("Usage: python getDP.py <pdb>")
sys.exit(1)
pdb = sys.argv[1].upper()
directory = f"./{pdb}/"
data_source_file = f'../CSP_Rank_Scores/CSP_{pdb.lower()}_CSpred.csv'
parsed_data = parse_csv(data_source_file)
apos = [str(data['apo_bmrb']) for data in parsed_data]
bounds = [data['holo_pdb'] for data in parsed_data]
apo = apos[1]
for root, dirs, files in os.walk(directory):
for name in dirs:
if re.search(r"\.pdb", name):
try:
dn = os.path.abspath(os.path.join(root, name))
if not os.path.exists(dn):
continue
cmd = "grep -E 'DP|Recall|Precision' " + dn + "/run1/*ovw"
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
text = p.communicate()[0]
text = text.decode('ISO-8859-1')
# print name
pattern = "Final Recall-score for input query structures: -*\d\.\d+"
RPF_RECALL = re.search(pattern, text)
RPF_RECALL = float(RPF_RECALL.group(0)[RPF_RECALL.group(0).rfind(':') + 1:])
pattern = "Final Precision-score for input query structures: -*\d\.\d+"
RPF_PRECISION = re.search(pattern, text)
RPF_PRECISION = float(RPF_PRECISION.group(0)[RPF_PRECISION.group(0).rfind(':') + 1:])
dp = re.search('DP-Score: -*\d\.\d+', text)
dp = float(dp.group(0)[dp.group(0).rfind(':') + 1:])
bound_path = './PDB_FILES/'+pdb+'_aligned/'+name.replace('_one_chain', '')
new_values = [dp, RPF_RECALL, RPF_PRECISION]
new_columns = ['DP', 'RPF_RECALL', 'RPF_PRECISION']
print("new_values = " + str(new_values))
print("bound_path = " + bound_path)
update_row(data_source_file, apo.upper(), bound_path, new_values, new_columns)
except:
continue