Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import argparse
import json
import os
import re
import sys
bmrb_addr = 'https://bmrb.io/ftp/pub/bmrb/entry_directories/bmr'
def bruker_acqus(acqus=None):
assert(acqus != None)
if not os.path.isfile(os.path.abspath(acqus)): return False
with open(acqus, 'r') as fp:
for line in fp.readlines():
line = line.rstrip()
if '##$PULPROG= ' in line:
info = line.split('= ')
pulse = info[-1]
pulse = pulse.replace('<','')
pulse = pulse.replace('>','')
return pulse
return False
def varian_procpar(procpar=None):
assert(procpar != None)
if not os.path.isfile(os.path.abspath(procpar)): return False
with open(procpar, 'r') as fp:
seqfil = False
for line in fp.readlines():
line = line.rstrip()
if re.search(r'^seqfil ', line):
seqfil = True
continue
if seqfil:
info = line.split()
pulse = info[1]
pulse = pulse.replace('"','')
return pulse
return False
def get_data(bmrbid=None, bmrbpath=None, output=None):
data_path = os.path.join(
os.path.abspath(bmrbpath), f'{bmrbid}', 'timedomain_data')
if not os.path.isdir(data_path):
return False, f'bmrb id {bmrbid} does not have timedomain_data'
out = os.path.join(
os.path.abspath(output),
f'{bmrbid}')
cmd = f'mkdir -p {out}'
os.system(cmd)
for root, dirs, files in os.walk(data_path):
if 'fid' in files or 'ser' in files:
if 'procpar' in files:
pulse = varian_procpar(procpar=os.path.join(root,'procpar'))
if not pulse: return False, f'no seqfil?\naborting'
elif 'acqus' in files:
pulse = bruker_acqus(acqus=os.path.join(root,'acqus'))
if not pulse: return False, f'no pulprog?\naborting'
else:
return False, ''.join(
f'no acquistion/param files?\npath: {root}\naborting')
#if 'noesy' in pulse: continue
source_folder = root.split('timedomain_data')[-1]
source_folder = source_folder.replace('/','_')
#source_folder = f'{pulse}_{source_folder}.tar.gz'
exp_dir = os.path.join(
out,
f'{pulse}_{source_folder}')
cmd = f'mkdir -p {exp_dir}'
os.system(cmd)
cmd = f'cp {root}/* {exp_dir} 2>/dev/null'
#cmd = f'tar -czf {out}/{source_folder} --no-recursion {root}/*'
os.system(cmd)
if 'fid' in files:
cmd = f'tar -czf - {exp_dir}/fid | split -b49M - {exp_dir}/sfid.'
else:
cmd = f'tar -czf - {exp_dir}/ser | split -b49M - {exp_dir}/sser.'
os.system(cmd)
cmd = f'rm {exp_dir}/fid'
os.system(cmd)
cmd = f'rm {exp_dir}/ser'
os.system(cmd)
continue
return True, True
parser = argparse.ArgumentParser(description='Collect FIDs')
parser.add_argument('--artinaset', '-a', required=False, type=str,
metavar='<str>',
help='path to artina set meta data, JSON file included with repository')
parser.add_argument('--bmrb', '-b', required=False, type=str,
metavar='<str>', help='path to BMRB data rsync directory')
parser.add_argument('--output', '-o', required=True, type=str,
metavar='<str>', help='path to where to save fid datasets collected')
parser.add_argument('--num', '-n', required=False, type=int,
metavar='<int>',
help='number of fid datasets to collect from artina set')
parser.add_argument('--ids', '-i', nargs='+', required=False,
type=str, metavar='<str>',
help='provide a list of bmrb ids to collect fid datasets from')
arg = parser.parse_args()
# check args
if arg.bmrb:
assert(os.path.isdir(
os.path.abspath(arg.bmrb)))
if arg.artinaset:
assert(os.path.isfile(
os.path.abspath(arg.artinaset)))
if arg.num: assert(arg.num > 0)
if arg.ids:
for id in arg.ids:
try:
intid = int(id)
except:
print(f'{id} is not int')
print('probably not a valid bmrb id')
print('aborting')
sys.exit()
if arg.num:
print('--num not necessary when using --ids')
print('re-run without --num')
print('aborting')
sys.exit()
errs = []
if arg.ids:
for bmrbid in arg.ids:
bmrbid = 'bmr'+bmrbid
if arg.bmrb:
status, msg = get_data(
bmrbid=bmrbid,
bmrbpath=arg.bmrb,
output=arg.output)
if not status:
if 'aborting' in msg:
print(msg)
sys.exit()
else: errs.append(msg)
else:
pass
# wget bmrb
else:
with open(arg.artinaset, 'r') as fp:
entries = json.load(fp)
count = 0
for entry in entries:
bmrb = entry['BMRB code']
try:
id = 'bmr'+bmrb.split(',')[0]
except:
continue
status, msg = get_data(
bmrbid=id,
bmrbpath=arg.bmrb,
output=arg.output)
if not status:
if 'aborting' in msg:
print(msg)
sys.exit()
else: errs.append(msg)
#sys.exit()
count += 1
if count == arg.num: break
if len(errs) > 0: print(json.dumps(errs,indent=2))
"""
iterate down each subdirectory of timedomain
detect if has procpar or pulprog
if procpar -- seqfil
i cant remember the other way for bruker, specdb knows
os.system()
cmd = cp ./* outputdir/id/seqfil
make a github
git clone
git add data/*
git commit -m "all data added"
git push
do only nesg
ask for a limit
default is 10
summary.json
pulse sequence: [paths]
readme
no processed data, need to do that separately
to get more -- the bmrb can be downloaded separetely -- can take a full day --
but not no more
wget ... ?
"""