From 5953517c81c8dd1b1654996d602fad485856fd2e Mon Sep 17 00:00:00 2001 From: xperthunter Date: Fri, 1 Jul 2022 19:25:30 -0700 Subject: [PATCH] update for archival dataset --- specdb/Forms.py | 8 ++++++++ specdb/Insert.py | 52 ++++++++++++++++++++++++++++++------------------ sql/specdb.sql | 8 ++++---- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/specdb/Forms.py b/specdb/Forms.py index 9cfca1c..75d1caa 100644 --- a/specdb/Forms.py +++ b/specdb/Forms.py @@ -116,14 +116,22 @@ def forms(table=None, num=None, input_dict=None): form = ruamel.yaml.comments.CommentedMap() for i, tbl in enumerate(table): + if tbl not in form_dic: + if input_dict: + form[tbl] = input_dict[tbl] + continue form[tbl] = ruamel.yaml.comments.CommentedMap() #print(tbl) + comment = form_dic.ca.items[tbl][2].value form.yaml_add_eol_comment(comment, tbl, column=25) for x in range(num[i]): form[tbl][x] = ruamel.yaml.comments.CommentedMap() for k,v in form_dic[tbl].ca.items.items(): + if input_dict: + print(tbl,x,k) + if k not in input_dict[tbl][str(x)]: continue comment = v[2].value[1:] if input_dict: form[tbl][x][k] = input_dict[tbl][str(x)][k] else: form[tbl][x][k] = form_dic[tbl][k] diff --git a/specdb/Insert.py b/specdb/Insert.py index 80990f8..7c86461 100644 --- a/specdb/Insert.py +++ b/specdb/Insert.py @@ -56,25 +56,32 @@ def check_yaml(file=None): record = yaml.load(fp) # convert record to dictionary + #print(record) rec = json.loads(json.dumps(record)) for tbl, dic in rec.items(): #print(tbl) template = forms(table=[tbl]) temp = ruamel.yaml.safe_load(template) - + #print(temp, type(temp), tbl) + if not temp: continue + if not temp[tbl]: continue temp = json.loads(json.dumps(temp))[tbl]["0"] #print(json.dumps(temp,indent=2)) rec_keys = list(rec[tbl].keys()) assert(isinstance(rec[tbl][rec_keys[0]], dict)) - for ind, data in record[tbl].items(): + for ind, data in rec[tbl].items(): for key in data: if key not in temp: print(f'unknown key {key} in {file}') return False - return True + +# for key in temp: +# if key not in data: +# rec[tbl][ind][key] = '' + return True, rec def empty_form_check(data=None): @@ -231,13 +238,16 @@ def table_inserter(table=None, record=None, cursor=None): assert(record is not None) assert(cursor is not None) - cursor.execute(f"select * from {table} limit 1") - table_cols = [i[0] for i in cursor.description] - if table == 'buffer_components': - assert('isotopic_labeling' in table_cols) - table_cols.remove('isotopic_labeling') +# cursor.execute(f"select * from {table} limit 1") +# table_cols = [i[0] for i in cursor.description] +# if table == 'buffer_components': +# assert('isotopic_labeling' in table_cols) +# table_cols.remove('isotopic_labeling') + + table_cols = list(record.keys()) #print('table_cols', record.keys()) + #print(json.dumps(record,indent=2)) columns, vals_place, values = insert_constructor(table_cols, record) #print('columns', columns) #print('vals_place', vals_place) @@ -270,8 +280,10 @@ def table_updater(table=None, record=None, cursor=None): assert(record is not None) assert(cursor is not None) - cursor.execute(f"SELECT * from {table} limit 1") - table_cols = [i[0] for i in cursor.description] +# cursor.execute(f"SELECT * from {table} limit 1") +# table_cols = [i[0] for i in cursor.description] + + table_cols = list(record.keys()) columns, vals_place, values = insert_constructor(table_cols, record) if values == [None] * len(values): return None @@ -748,18 +760,20 @@ def insert(file=None, db=None, write=False): c.execute(sql) # check yaml file to be inserted that it has the expected keys - if not check_yaml(file=file): + status, record = check_yaml(file=file) + + if not status: print(f"JSON file {file} does not have expected keys") print("Aborting") sys.exit() - - # read YAML file to be inserted - with open(file, 'rt') as fp: - yaml = ruamel.yaml.YAML() - record = yaml.load(fp) - - # convert record to plain dictionary - record = json.loads(json.dumps(record)) +# +# # read YAML file to be inserted +# with open(file, 'rt') as fp: +# yaml = ruamel.yaml.YAML() +# record = yaml.load(fp) +# +# # convert record to plain dictionary +# record = json.loads(json.dumps(record)) # insert data from yaml in the specific table order for table in table_order: diff --git a/sql/specdb.sql b/sql/specdb.sql index e940ed9..3e17cf8 100644 --- a/sql/specdb.sql +++ b/sql/specdb.sql @@ -40,8 +40,8 @@ CREATE TABLE target ( -- molecular target information, REQUIRED: `target_id` target_sequence TEXT CHECK( target_sequence NOT LIKE '% %' and length(target_sequence) <= 1024 ), -- target's molecular seq., any type of code/alphabet, no spaces, len <= 1024, Ex: MGSHHHHILVAM organism_source TEXT CHECK( length(organism_source) <= 128 ), -- organism name for target source, can indicate if target is synthetic, len <= 128, Ex: synthetic gene_name TEXT CHECK( length(gene_name) <= 64 ), -- gene name, len <= 64, Ex: SpikeCoV2 - project_id TEXT DEFAULT "NONE" NOT NULL, -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga - target_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` that assigned the target, must be in user table, Ex: KJF + project_id TEXT DEFAULT "NONE", -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga + target_preparer TEXT DEFAULT "NONE", -- `user_id` that assigned the target, must be in user table, Ex: KJF UNIQUE(target_id), FOREIGN KEY ([project_id]) REFERENCES "project" ([project_id]) ON DELETE NO ACTION ON UPDATE CASCADE FOREIGN KEY ([target_preparer]) REFERENCES "user" ([user_id]) ON DELETE NO ACTION ON UPDATE CASCADE @@ -61,7 +61,7 @@ CREATE TABLE construct ( -- molecular construct from a target, REQUIRED: `constr construct_comment TEXT CHECK( length(construct_comment) <= 128 ), -- free field comment, anything to note about construct, len <= 128, Ex: with primers RE1A/RE1B plasmid_id TEXT CHECK( length(plasmid_id) <= 64 ), -- plasmid construct DNA sequence is cloned in, len <= 64, Ex: pET218(+) plasmid_id_comment TEXT CHECK( length(plasmid_id_comment) <= 128 ), -- comments on plasmid, len <= 256, Ex: from Underwood lab - construct_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` who prepared the construct, must already be in user table, Ex: KJF + construct_preparer TEXT DEFAULT "NONE", -- `user_id` who prepared the construct, must already be in user table, Ex: KJF construct_company TEXT CHECK( length(construct_company) <= 128 ), -- if the construct was produced by a company, indicate where it came from, len <= 128, Ex: Genscript UNIQUE(construct_id), FOREIGN KEY ([target_id]) REFERENCES "target" ([target_id]) ON DELETE NO ACTION ON UPDATE CASCADE @@ -176,7 +176,7 @@ CREATE TABLE pst ( -- description of a protein sample tube, not only proteins, R id INTEGER PRIMARY KEY NOT NULL, pst_id TEXT CHECK( pst_id NOT LIKE '% %' and length(pst_id) <= 32 ) NOT NULL, -- text identifier for the pst, must be unique, no spaces, len <= 32, Ex: Db0515A.001 prev_pst_id TEXT , -- if pst derived from another pst, give previous `pst_id`, must already be in pst table, Ex: Db0515A.000 - pst_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF + pst_preparer TEXT DEFAULT "NONE", -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF pst_comment TEXT CHECK( length(pst_comment) <= 128 ), -- free field comment about pst, len <= 128, Ex: new sample sample_type TEXT CHECK( sample_type IN ('solution', 'solid state') ), -- is sample a solution or solid-state sample, one of (`solution`, `solid state`), Ex: solution solvent_system TEXT CHECK( length(solvent_system) <= 64 ), -- solvent system for the sample, len <= 64, Ex: DMSO