TrimmModels.py

#!/usr/bin/env python

import sys
import os

files = ""
myf = ""
pLDDT_cut = 0      # cutoff to pLDDT based trim (ex. -pl 55)
Trim_pLDDT = 0     # (0 or 1) logical for pLDDT trim
Welldef = ""       # range for well defined (ex. B2..B50,C4..C32)
Trim_WD = 0        # (0 or 1) logical for well defined trim
inDir = ""         # directory to get the files from
outDir = ""        # directory to ouput the trimmed files
debug = 0          # default debug level, 0
Group_fn = ""      # name for final multimodel file
ranges = []        # ranges for well defined
Names = []         # store file names
TrimmedFiles = []  # list of final trimmed coords files names

# Show description and usage.
sname = os.path.basename(__file__)
if len(sys.argv) < 4 or sys.argv[1] == "-h":
    print("")
    print(f"\t ** {sname} (RTT, 2024) ")
    print("\t Python script to output AF2 coords trimmed using a well-defined range or pLDDT cutoff")
    print("\t Well-defined OR pLDDT options are mutually exclusive, only one can be used \n")
    print("\t  (1) outDir will hold: ")
    print("\t      - the trimmed models from inDir ")
    print("\t  (2) Current dir, from where the script was called, will hold: ")
    print("\t      - (a) multimodel file with trimmed models grouped together (ex. Trimmed_plDDT_45.pdb) ")
    print("\t      - (b) text file with a list of trimmed models (ex. ListToMini) can be used to minimize \n")
    print(f"\t USAGE:\t {sname} <ARGUMENTS>   \n")
    print("\t ARGUMENTS:")
    print("\t\t -help             \t ( displays this text help message            )        ")
    print("\t\t -inD     <string> \t ( directory path to take input coords from   )        ")
    print("\t\t -outD    <string> \t ( directory path to put the trimmed  output  )        ")
    print("\t\t -w[ell]  <string> \t ( well defined to trim, ex: A20..A98,B2..B52 )        ")
    print("\t\t -pl[DDT] <int>    \t ( plDDT cutoff to use in triming the models  )        ")
    print("\t\t -file    <string> \t ( files to trim, ex: unrelax                 )        ")
    print("\t\t -deb     <int>    \t ( level of debug info, from 1-10             )      \n")
    print("\t Examples:")
    print(f"\t\t {sname} -inD Unrelaxed -outD Trimmed -pl 55.0 -fi unrelax ")
    print(f"\t\t {sname} -well A20..A90,B2..B54 -inD Unrelaxed -outD Trimmed -fi unrelax \n")
    exit()

# -------------------- subroutines ----------------------
# ---- Trimming and printing file
def TrimPrint(FromName, FinalName):
    with open(FromName, "r") as IN, open(FinalName, "w") as RL:
        for line in IN:
            if line.startswith("ATOM"):
                if Trim_WD:
                    chain = line[21]
                    resno = int(line[23:27])
                    allow = 1
                    for t in ranges:
                        ch, rn1, rn2 = t.split(" ")
                        if chain == ch and (resno < int(rn1) or resno > int(rn2)):
                            allow = -1
                    if allow < 0:
                        continue
                elif Trim_pLDDT:
                    pLDDT = float(line[61:65])
                    if pLDDT < float(pLDDT_cut):
                        continue
            RL.write(line)

# ---- Group all models into a single multimodel file
def AgrupaModels(Allfname,TrimmedFiles):
    OldF = ""
    nm = 0
    it = 0
    #Ff = TrimmedFiles.copy()
    with open(Allfname, "w") as OA:
        for f in TrimmedFiles:
            with open(f, "r") as IF:
                if f != OldF:
                    nm += 1
                    OldF = f
                    filename = f.split("/")[-1]
                    OA.write(f"MODEL      {nm}\n")
                    OA.write(f"REMARK File: {filename}\n")
                    for line in IF:
                        if line.startswith("ATOM") or line.startswith("REMARK") or line.startswith("TER"):
                           OA.write(line)
                    OA.write("ENDMDL\n")
        OA.write("END\n")

# -- analyze args in command line
for x in range(1, len(sys.argv)):
    if "-in" in sys.argv[x] :
        x += 1
        inDir = sys.argv[x]
    elif "-ou" in sys.argv[x] :
        x += 1
        outDir = sys.argv[x]
    elif "-we" in sys.argv[x] :
        x += 1
        Welldef = sys.argv[x]
        Trim_WD = 1
    elif sys.argv[x] == "-fi":
        x += 1
        files = sys.argv[x]
    elif sys.argv[x] == "-pl":
        x += 1
        pLDDT_cut = int(sys.argv[x])
        Trim_pLDDT = 1
    elif sys.argv[x] == "-de":
        x += 1
        debug = sys.argv[x]

# -- get ranges from well defined, if supplied
if Trim_WD:
    Welldef = Welldef.replace("..", " ")
    wd = Welldef.split(",")
    for rng in wd:
        r1, r2 = rng.split(" ")
        ch1 = r1[0]
        ch2 = r2[0]
        rn1 = int(r1[1:])
        rn2 = int(r2[1:])
        if ch1 == ch2:
            ranges.append(f"{ch1} {rn1} {rn2}")
    for allr in ranges:
        print(" Well defined ranges: ", allr)

# -- create directory, if asked, for ouput
if outDir != "" and os.path.exists(outDir) and os.path.isdir(outDir):
    print(" Directory", outDir, "already exists, pick other name \n")
    sys.exit()
else:
    os.makedirs(outDir, exist_ok=True)

# -- Open input dir and load selected file names to trim
with os.scandir(inDir) as entries:
    for entry in entries:
        if files in entry.name:
            Names.append(entry.name)

# -- Call to trim files and print
for inf in Names:
    inputfile = os.path.join(inDir, inf)
    outfile = os.path.join(outDir, "trim_" + inf)
    TrimPrint(inputfile, outfile)
    TrimmedFiles.append(outfile)

# -- Finally grouping all trimmed files into one
Welldef = Welldef.replace(" ", "-")
Welldef = Welldef.replace(",", "_")
pLDDT = str(pLDDT_cut).split(".")

if Trim_pLDDT:
   Group_fn += "Trimmed_plDDT_" + pLDDT[0] + ".pdb"
if Trim_WD:
   Group_fn += "Trimmed_WellDefined_" + Welldef + ".pdb"

AgrupaModels(Group_fn,TrimmedFiles)

# -- Print a file with the list of names for further minimization
with open("ListToMini", "w") as OL:
    for fnm in TrimmedFiles:
        try:
              idx = fnm.rindex('/')
              myp = fnm[0:idx+1]
        except:
              myp = ""
        name = fnm.removeprefix(myp)
        OL.write(f"{name}\n")

sys.exit()
	#!/usr/bin/env python

	import sys
	import os

	files = ""
	myf = ""
	pLDDT_cut = 0 # cutoff to pLDDT based trim (ex. -pl 55)
	Trim_pLDDT = 0 # (0 or 1) logical for pLDDT trim
	Welldef = "" # range for well defined (ex. B2..B50,C4..C32)
	Trim_WD = 0 # (0 or 1) logical for well defined trim
	inDir = "" # directory to get the files from
	outDir = "" # directory to ouput the trimmed files
	debug = 0 # default debug level, 0
	Group_fn = "" # name for final multimodel file
	ranges = [] # ranges for well defined
	Names = [] # store file names
	TrimmedFiles = [] # list of final trimmed coords files names

	# Show description and usage.
	sname = os.path.basename(__file__)
	if len(sys.argv) < 4 or sys.argv[1] == "-h":
	print("")
	print(f"\t ** {sname} (RTT, 2024) ")
	print("\t Python script to output AF2 coords trimmed using a well-defined range or pLDDT cutoff")
	print("\t Well-defined OR pLDDT options are mutually exclusive, only one can be used \n")
	print("\t (1) outDir will hold: ")
	print("\t - the trimmed models from inDir ")
	print("\t (2) Current dir, from where the script was called, will hold: ")
	print("\t - (a) multimodel file with trimmed models grouped together (ex. Trimmed_plDDT_45.pdb) ")
	print("\t - (b) text file with a list of trimmed models (ex. ListToMini) can be used to minimize \n")
	print(f"\t USAGE:\t {sname} <ARGUMENTS> \n")
	print("\t ARGUMENTS:")
	print("\t\t -help \t ( displays this text help message ) ")
	print("\t\t -inD <string> \t ( directory path to take input coords from ) ")
	print("\t\t -outD <string> \t ( directory path to put the trimmed output ) ")
	print("\t\t -w[ell] <string> \t ( well defined to trim, ex: A20..A98,B2..B52 ) ")
	print("\t\t -pl[DDT] <int> \t ( plDDT cutoff to use in triming the models ) ")
	print("\t\t -file <string> \t ( files to trim, ex: unrelax ) ")
	print("\t\t -deb <int> \t ( level of debug info, from 1-10 ) \n")
	print("\t Examples:")
	print(f"\t\t {sname} -inD Unrelaxed -outD Trimmed -pl 55.0 -fi unrelax ")
	print(f"\t\t {sname} -well A20..A90,B2..B54 -inD Unrelaxed -outD Trimmed -fi unrelax \n")
	exit()

	# -------------------- subroutines ----------------------
	# ---- Trimming and printing file
	def TrimPrint(FromName, FinalName):
	with open(FromName, "r") as IN, open(FinalName, "w") as RL:
	for line in IN:
	if line.startswith("ATOM"):
	if Trim_WD:
	chain = line[21]
	resno = int(line[23:27])
	allow = 1
	for t in ranges:
	ch, rn1, rn2 = t.split(" ")
	if chain == ch and (resno < int(rn1) or resno > int(rn2)):
	allow = -1
	if allow < 0:
	continue
	elif Trim_pLDDT:
	pLDDT = float(line[61:65])
	if pLDDT < float(pLDDT_cut):
	continue
	RL.write(line)

	# ---- Group all models into a single multimodel file
	def AgrupaModels(Allfname,TrimmedFiles):
	OldF = ""
	nm = 0
	it = 0
	#Ff = TrimmedFiles.copy()
	with open(Allfname, "w") as OA:
	for f in TrimmedFiles:
	with open(f, "r") as IF:
	if f != OldF:
	nm += 1
	OldF = f
	filename = f.split("/")[-1]
	OA.write(f"MODEL {nm}\n")
	OA.write(f"REMARK File: {filename}\n")
	for line in IF:
	if line.startswith("ATOM") or line.startswith("REMARK") or line.startswith("TER"):
	OA.write(line)
	OA.write("ENDMDL\n")
	OA.write("END\n")

	# -- analyze args in command line
	for x in range(1, len(sys.argv)):
	if "-in" in sys.argv[x] :
	x += 1
	inDir = sys.argv[x]
	elif "-ou" in sys.argv[x] :
	x += 1
	outDir = sys.argv[x]
	elif "-we" in sys.argv[x] :
	x += 1
	Welldef = sys.argv[x]
	Trim_WD = 1
	elif sys.argv[x] == "-fi":
	x += 1
	files = sys.argv[x]
	elif sys.argv[x] == "-pl":
	x += 1
	pLDDT_cut = int(sys.argv[x])
	Trim_pLDDT = 1
	elif sys.argv[x] == "-de":
	x += 1
	debug = sys.argv[x]

	# -- get ranges from well defined, if supplied
	if Trim_WD:
	Welldef = Welldef.replace("..", " ")
	wd = Welldef.split(",")
	for rng in wd:
	r1, r2 = rng.split(" ")
	ch1 = r1[0]
	ch2 = r2[0]
	rn1 = int(r1[1:])
	rn2 = int(r2[1:])
	if ch1 == ch2:
	ranges.append(f"{ch1} {rn1} {rn2}")
	for allr in ranges:
	print(" Well defined ranges: ", allr)

	# -- create directory, if asked, for ouput
	if outDir != "" and os.path.exists(outDir) and os.path.isdir(outDir):
	print(" Directory", outDir, "already exists, pick other name \n")
	sys.exit()
	else:
	os.makedirs(outDir, exist_ok=True)

	# -- Open input dir and load selected file names to trim
	with os.scandir(inDir) as entries:
	for entry in entries:
	if files in entry.name:
	Names.append(entry.name)

	# -- Call to trim files and print
	for inf in Names:
	inputfile = os.path.join(inDir, inf)
	outfile = os.path.join(outDir, "trim_" + inf)
	TrimPrint(inputfile, outfile)
	TrimmedFiles.append(outfile)

	# -- Finally grouping all trimmed files into one
	Welldef = Welldef.replace(" ", "-")
	Welldef = Welldef.replace(",", "_")
	pLDDT = str(pLDDT_cut).split(".")

	if Trim_pLDDT:
	Group_fn += "Trimmed_plDDT_" + pLDDT[0] + ".pdb"
	if Trim_WD:
	Group_fn += "Trimmed_WellDefined_" + Welldef + ".pdb"

	AgrupaModels(Group_fn,TrimmedFiles)

	# -- Print a file with the list of names for further minimization
	with open("ListToMini", "w") as OL:
	for fnm in TrimmedFiles:
	try:
	idx = fnm.rindex('/')
	myp = fnm[0:idx+1]
	except:
	myp = ""
	name = fnm.removeprefix(myp)
	OL.write(f"{name}\n")

	sys.exit()