Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
sharing_fids/getdata.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
231 lines (179 sloc)
5.08 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import json | |
import os | |
import re | |
import sys | |
bmrb_addr = 'https://bmrb.io/ftp/pub/bmrb/entry_directories/bmr' | |
def bruker_acqus(acqus=None): | |
assert(acqus != None) | |
if not os.path.isfile(os.path.abspath(acqus)): return False | |
with open(acqus, 'r') as fp: | |
for line in fp.readlines(): | |
line = line.rstrip() | |
if '##$PULPROG= ' in line: | |
info = line.split('= ') | |
pulse = info[-1] | |
pulse = pulse.replace('<','') | |
pulse = pulse.replace('>','') | |
return pulse | |
return False | |
def varian_procpar(procpar=None): | |
assert(procpar != None) | |
if not os.path.isfile(os.path.abspath(procpar)): return False | |
with open(procpar, 'r') as fp: | |
seqfil = False | |
for line in fp.readlines(): | |
line = line.rstrip() | |
if re.search(r'^seqfil ', line): | |
seqfil = True | |
continue | |
if seqfil: | |
info = line.split() | |
pulse = info[1] | |
pulse = pulse.replace('"','') | |
return pulse | |
return False | |
def get_data(bmrbid=None, bmrbpath=None, output=None): | |
data_path = os.path.join( | |
os.path.abspath(bmrbpath), f'{bmrbid}', 'timedomain_data') | |
if not os.path.isdir(data_path): | |
return False, f'bmrb id {bmrbid} does not have timedomain_data' | |
out = os.path.join( | |
os.path.abspath(output), | |
f'{bmrbid}') | |
cmd = f'mkdir -p {out}' | |
os.system(cmd) | |
for root, dirs, files in os.walk(data_path): | |
if 'fid' in files or 'ser' in files: | |
if 'procpar' in files: | |
pulse = varian_procpar(procpar=os.path.join(root,'procpar')) | |
if not pulse: return False, f'no seqfil?\naborting' | |
elif 'acqus' in files: | |
pulse = bruker_acqus(acqus=os.path.join(root,'acqus')) | |
if not pulse: return False, f'no pulprog?\naborting' | |
else: | |
return False, ''.join( | |
f'no acquistion/param files?\npath: {root}\naborting') | |
#if 'noesy' in pulse: continue | |
source_folder = root.split('timedomain_data')[-1] | |
source_folder = source_folder.replace('/','_') | |
#source_folder = f'{pulse}_{source_folder}.tar.gz' | |
exp_dir = os.path.join( | |
out, | |
f'{pulse}_{source_folder}') | |
cmd = f'mkdir -p {exp_dir}' | |
os.system(cmd) | |
cmd = f'cp {root}/* {exp_dir} 2>/dev/null' | |
#cmd = f'tar -czf {out}/{source_folder} --no-recursion {root}/*' | |
os.system(cmd) | |
if 'fid' in files: | |
cmd = f'tar -czf - {exp_dir}/fid | split -b49M - {exp_dir}/sfid.' | |
else: | |
cmd = f'tar -czf - {exp_dir}/ser | split -b49M - {exp_dir}/sser.' | |
os.system(cmd) | |
cmd = f'rm {exp_dir}/fid' | |
os.system(cmd) | |
cmd = f'rm {exp_dir}/ser' | |
os.system(cmd) | |
continue | |
return True, True | |
parser = argparse.ArgumentParser(description='Collect FIDs') | |
parser.add_argument('--artinaset', '-a', required=False, type=str, | |
metavar='<str>', | |
help='path to artina set meta data, JSON file included with repository') | |
parser.add_argument('--bmrb', '-b', required=False, type=str, | |
metavar='<str>', help='path to BMRB data rsync directory') | |
parser.add_argument('--output', '-o', required=True, type=str, | |
metavar='<str>', help='path to where to save fid datasets collected') | |
parser.add_argument('--num', '-n', required=False, type=int, | |
metavar='<int>', | |
help='number of fid datasets to collect from artina set') | |
parser.add_argument('--ids', '-i', nargs='+', required=False, | |
type=str, metavar='<str>', | |
help='provide a list of bmrb ids to collect fid datasets from') | |
arg = parser.parse_args() | |
# check args | |
if arg.bmrb: | |
assert(os.path.isdir( | |
os.path.abspath(arg.bmrb))) | |
if arg.artinaset: | |
assert(os.path.isfile( | |
os.path.abspath(arg.artinaset))) | |
if arg.num: assert(arg.num > 0) | |
if arg.ids: | |
for id in arg.ids: | |
try: | |
intid = int(id) | |
except: | |
print(f'{id} is not int') | |
print('probably not a valid bmrb id') | |
print('aborting') | |
sys.exit() | |
if arg.num: | |
print('--num not necessary when using --ids') | |
print('re-run without --num') | |
print('aborting') | |
sys.exit() | |
errs = [] | |
if arg.ids: | |
for bmrbid in arg.ids: | |
bmrbid = 'bmr'+bmrbid | |
if arg.bmrb: | |
status, msg = get_data( | |
bmrbid=bmrbid, | |
bmrbpath=arg.bmrb, | |
output=arg.output) | |
if not status: | |
if 'aborting' in msg: | |
print(msg) | |
sys.exit() | |
else: errs.append(msg) | |
else: | |
pass | |
# wget bmrb | |
else: | |
with open(arg.artinaset, 'r') as fp: | |
entries = json.load(fp) | |
count = 0 | |
for entry in entries: | |
bmrb = entry['BMRB code'] | |
try: | |
id = 'bmr'+bmrb.split(',')[0] | |
except: | |
continue | |
status, msg = get_data( | |
bmrbid=id, | |
bmrbpath=arg.bmrb, | |
output=arg.output) | |
if not status: | |
if 'aborting' in msg: | |
print(msg) | |
sys.exit() | |
else: errs.append(msg) | |
#sys.exit() | |
count += 1 | |
if count == arg.num: break | |
if len(errs) > 0: print(json.dumps(errs,indent=2)) | |
""" | |
iterate down each subdirectory of timedomain | |
detect if has procpar or pulprog | |
if procpar -- seqfil | |
i cant remember the other way for bruker, specdb knows | |
os.system() | |
cmd = cp ./* outputdir/id/seqfil | |
make a github | |
git clone | |
git add data/* | |
git commit -m "all data added" | |
git push | |
do only nesg | |
ask for a limit | |
default is 10 | |
summary.json | |
pulse sequence: [paths] | |
readme | |
no processed data, need to do that separately | |
to get more -- the bmrb can be downloaded separetely -- can take a full day -- | |
but not no more | |
wget ... ? | |
""" | |