Skip to content
Permalink
5ab4ed8e4a
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
71 lines (61 sloc) 2.67 KB
import glob
import os
import sys
import tempfile
from os import listdir
def parse_ranges(ranges_str):
"""Parse the input string to extract chains and residue ranges."""
ranges = {}
for part in ranges_str.split(', '):
chain, start_end = part.split(":")
start_res, end_res = map(int, start_end.split(".."))
if chain not in ranges:
ranges[chain] = []
ranges[chain].append((start_res, end_res))
return ranges
def adjust_residue_numbers(ranges):
"""Adjust residue numbers to start from 1 for each chain."""
adjustment_maps = {}
for chain, chain_ranges in ranges.items():
adjustment_map = {}
new_residue_num = 1
for start_res, end_res in chain_ranges:
for original_res_num in range(start_res, end_res + 1):
adjustment_map[original_res_num] = new_residue_num
new_residue_num += 1
adjustment_maps[chain] = adjustment_map
return adjustment_maps
def trim_pdb_by_residues(pdb_file_path, ranges_str):
"""Trim and reindex residues in a PDB file based on provided ranges string."""
ranges = parse_ranges(ranges_str)
adjustment_maps = adjust_residue_numbers(ranges)
temp_file, temp_file_path = tempfile.mkstemp()
with open(pdb_file_path, 'r') as pdb_file, os.fdopen(temp_file, 'w') as output_file:
for line in pdb_file:
if line.startswith("ATOM") or line.startswith("HETATM"):
chain_id = line[21]
residue_num = int(line[22:26].strip())
if chain_id in ranges:
for start, end in ranges[chain_id]:
if start <= residue_num <= end:
# Adjust residue number
adjusted_residue_num = adjustment_maps[chain_id][residue_num]
# Rewrite line with adjusted residue number
new_line = line[:22] + "{:>4}".format(adjusted_residue_num) + line[26:]
output_file.write(new_line)
break
else:
# Write lines for chains not in ranges as they are
output_file.write(line)
# Replace the original file with the filtered content
os.replace(temp_file_path, pdb_file_path)
return pdb_file_path
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python script.py <directory> <residue_ranges>")
sys.exit(1)
directory = sys.argv[1]
ranges_str = sys.argv[2]
pdb_files = [ directory + f for f in listdir(directory) if f.endswith('.pdb') ]
for f in pdb_files:
trim_pdb_by_residues(f, ranges_str)