Skip to content
Permalink
5ab4ed8e4a
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
52 lines (40 sloc) 1.9 KB
import sys
import os
from paths import PDB_FILES
def process_files(pdb_files, output_file, start_index):
with open(output_file, 'w') as outfile:
for i, pdb_file in enumerate(pdb_files, start=start_index):
model_number = i
pdb_path = os.path.join(pdb_dir, pdb_file)
# Write MODEL line
outfile.write(f"MODEL {model_number}\n")
# Write REMARK line with the basename of the PDB file
outfile.write(f"REMARK {os.path.basename(pdb_file)}\n")
# Concatenate the contents of the PDB file
with open(pdb_path, 'r') as infile:
for line in infile:
if line.startswith('END') == False and line.startswith('MODEL') == False:
outfile.write(line)
# Write ENDMDL line
outfile.write("ENDMDL\n")
outfile.write("END")
print(f"All PDB files have been combined into {output_file}")
if __name__ == "__main__":
directory = sys.argv[1]
# Specify the directory containing the PDB files
pdb_dir = directory
base_output_filename = directory.rstrip(os.sep).split(os.sep)[-1] # Base name for output files
# Sort files to maintain a consistent order
pdb_files = sorted([f for f in os.listdir(pdb_dir) if f.endswith('.pdb')])
# Determine how many files will be processed into each output file
files_per_output = 3000
suffix = True
if len(pdb_files) < files_per_output:
suffix = False
for start_idx in range(0, len(pdb_files), files_per_output):
end_idx = min(start_idx + files_per_output, len(pdb_files))
if suffix:
output_file = f"{PDB_FILES}{base_output_filename}_{start_idx//files_per_output + 1}.pdb"
else:
output_file = f"{PDB_FILES}{base_output_filename}.pdb"
process_files(pdb_files[start_idx:end_idx], output_file, start_idx + 1)