Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
FilteringAF2_scripts/TrimmModels.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
170 lines (155 sloc)
6.13 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import os | |
files = "" | |
myf = "" | |
pLDDT_cut = 0 # cutoff to pLDDT based trim (ex. -pl 55) | |
Trim_pLDDT = 0 # (0 or 1) logical for pLDDT trim | |
Welldef = "" # range for well defined (ex. B2..B50,C4..C32) | |
Trim_WD = 0 # (0 or 1) logical for well defined trim | |
inDir = "" # directory to get the files from | |
outDir = "" # directory to ouput the trimmed files | |
debug = 0 # default debug level, 0 | |
Group_fn = "" # name for final multimodel file | |
ranges = [] # ranges for well defined | |
Names = [] # store file names | |
TrimmedFiles = [] # list of final trimmed coords files names | |
# Show description and usage. | |
sname = os.path.basename(__file__) | |
if len(sys.argv) < 4 or sys.argv[1] == "-h": | |
print("") | |
print(f"\t ** {sname} (RTT, 2024) ") | |
print("\t Python script to output AF2 coords trimmed using a well-defined range or pLDDT cutoff") | |
print("\t Well-defined OR pLDDT options are mutually exclusive, only one can be used \n") | |
print("\t (1) outDir will hold: ") | |
print("\t - the trimmed models from inDir ") | |
print("\t (2) Current dir, from where the script was called, will hold: ") | |
print("\t - (a) multimodel file with trimmed models grouped together (ex. Trimmed_plDDT_45.pdb) ") | |
print("\t - (b) text file with a list of trimmed models (ex. ListToMini) can be used to minimize \n") | |
print(f"\t USAGE:\t {sname} <ARGUMENTS> \n") | |
print("\t ARGUMENTS:") | |
print("\t\t -help \t ( displays this text help message ) ") | |
print("\t\t -inD <string> \t ( directory path to take input coords from ) ") | |
print("\t\t -outD <string> \t ( directory path to put the trimmed output ) ") | |
print("\t\t -w[ell] <string> \t ( well defined to trim, ex: A20..A98,B2..B52 ) ") | |
print("\t\t -pl[DDT] <int> \t ( plDDT cutoff to use in triming the models ) ") | |
print("\t\t -file <string> \t ( files to trim, ex: unrelax ) ") | |
print("\t\t -deb <int> \t ( level of debug info, from 1-10 ) \n") | |
print("\t Examples:") | |
print(f"\t\t {sname} -inD Unrelaxed -outD Trimmed -pl 55.0 -fi unrelax ") | |
print(f"\t\t {sname} -well A20..A90,B2..B54 -inD Unrelaxed -outD Trimmed -fi unrelax \n") | |
exit() | |
# -------------------- subroutines ---------------------- | |
# ---- Trimming and printing file | |
def TrimPrint(FromName, FinalName): | |
with open(FromName, "r") as IN, open(FinalName, "w") as RL: | |
for line in IN: | |
if line.startswith("ATOM"): | |
if Trim_WD: | |
chain = line[21] | |
resno = int(line[23:27]) | |
allow = 1 | |
for t in ranges: | |
ch, rn1, rn2 = t.split(" ") | |
if chain == ch and (resno < int(rn1) or resno > int(rn2)): | |
allow = -1 | |
if allow < 0: | |
continue | |
elif Trim_pLDDT: | |
pLDDT = float(line[61:65]) | |
if pLDDT < float(pLDDT_cut): | |
continue | |
RL.write(line) | |
# ---- Group all models into a single multimodel file | |
def AgrupaModels(Allfname,TrimmedFiles): | |
OldF = "" | |
nm = 0 | |
it = 0 | |
#Ff = TrimmedFiles.copy() | |
with open(Allfname, "w") as OA: | |
for f in TrimmedFiles: | |
with open(f, "r") as IF: | |
if f != OldF: | |
nm += 1 | |
OldF = f | |
filename = f.split("/")[-1] | |
OA.write(f"MODEL {nm}\n") | |
OA.write(f"REMARK File: {filename}\n") | |
for line in IF: | |
if line.startswith("ATOM") or line.startswith("REMARK") or line.startswith("TER"): | |
OA.write(line) | |
OA.write("ENDMDL\n") | |
OA.write("END\n") | |
# -- analyze args in command line | |
for x in range(1, len(sys.argv)): | |
if "-in" in sys.argv[x] : | |
x += 1 | |
inDir = sys.argv[x] | |
elif "-ou" in sys.argv[x] : | |
x += 1 | |
outDir = sys.argv[x] | |
elif "-we" in sys.argv[x] : | |
x += 1 | |
Welldef = sys.argv[x] | |
Trim_WD = 1 | |
elif sys.argv[x] == "-fi": | |
x += 1 | |
files = sys.argv[x] | |
elif sys.argv[x] == "-pl": | |
x += 1 | |
pLDDT_cut = int(sys.argv[x]) | |
Trim_pLDDT = 1 | |
elif sys.argv[x] == "-de": | |
x += 1 | |
debug = sys.argv[x] | |
# -- get ranges from well defined, if supplied | |
if Trim_WD: | |
Welldef = Welldef.replace("..", " ") | |
wd = Welldef.split(",") | |
for rng in wd: | |
r1, r2 = rng.split(" ") | |
ch1 = r1[0] | |
ch2 = r2[0] | |
rn1 = int(r1[1:]) | |
rn2 = int(r2[1:]) | |
if ch1 == ch2: | |
ranges.append(f"{ch1} {rn1} {rn2}") | |
for allr in ranges: | |
print(" Well defined ranges: ", allr) | |
# -- create directory, if asked, for ouput | |
if outDir != "" and os.path.exists(outDir) and os.path.isdir(outDir): | |
print(" Directory", outDir, "already exists, pick other name \n") | |
sys.exit() | |
else: | |
os.makedirs(outDir, exist_ok=True) | |
# -- Open input dir and load selected file names to trim | |
with os.scandir(inDir) as entries: | |
for entry in entries: | |
if files in entry.name: | |
Names.append(entry.name) | |
# -- Call to trim files and print | |
for inf in Names: | |
inputfile = os.path.join(inDir, inf) | |
outfile = os.path.join(outDir, "trim_" + inf) | |
TrimPrint(inputfile, outfile) | |
TrimmedFiles.append(outfile) | |
# -- Finally grouping all trimmed files into one | |
Welldef = Welldef.replace(" ", "-") | |
Welldef = Welldef.replace(",", "_") | |
pLDDT = str(pLDDT_cut).split(".") | |
if Trim_pLDDT: | |
Group_fn += "Trimmed_plDDT_" + pLDDT[0] + ".pdb" | |
if Trim_WD: | |
Group_fn += "Trimmed_WellDefined_" + Welldef + ".pdb" | |
AgrupaModels(Group_fn,TrimmedFiles) | |
# -- Print a file with the list of names for further minimization | |
with open("ListToMini", "w") as OL: | |
for fnm in TrimmedFiles: | |
try: | |
idx = fnm.rindex('/') | |
myp = fnm[0:idx+1] | |
except: | |
myp = "" | |
name = fnm.removeprefix(myp) | |
OL.write(f"{name}\n") | |
sys.exit() |