Skip to content
Permalink
Browse files
update for archival dataset
  • Loading branch information
xperthunter committed Jul 2, 2022
1 parent 7efdcdd commit 5953517c81c8dd1b1654996d602fad485856fd2e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 23 deletions.
@@ -116,14 +116,22 @@ def forms(table=None, num=None, input_dict=None):

form = ruamel.yaml.comments.CommentedMap()
for i, tbl in enumerate(table):
if tbl not in form_dic:
if input_dict:
form[tbl] = input_dict[tbl]
continue
form[tbl] = ruamel.yaml.comments.CommentedMap()
#print(tbl)

comment = form_dic.ca.items[tbl][2].value
form.yaml_add_eol_comment(comment, tbl, column=25)

for x in range(num[i]):
form[tbl][x] = ruamel.yaml.comments.CommentedMap()
for k,v in form_dic[tbl].ca.items.items():
if input_dict:
print(tbl,x,k)
if k not in input_dict[tbl][str(x)]: continue
comment = v[2].value[1:]
if input_dict: form[tbl][x][k] = input_dict[tbl][str(x)][k]
else: form[tbl][x][k] = form_dic[tbl][k]
@@ -56,25 +56,32 @@ def check_yaml(file=None):
record = yaml.load(fp)

# convert record to dictionary
#print(record)
rec = json.loads(json.dumps(record))

for tbl, dic in rec.items():
#print(tbl)
template = forms(table=[tbl])
temp = ruamel.yaml.safe_load(template)

#print(temp, type(temp), tbl)
if not temp: continue
if not temp[tbl]: continue
temp = json.loads(json.dumps(temp))[tbl]["0"]
#print(json.dumps(temp,indent=2))
rec_keys = list(rec[tbl].keys())

assert(isinstance(rec[tbl][rec_keys[0]], dict))

for ind, data in record[tbl].items():
for ind, data in rec[tbl].items():
for key in data:
if key not in temp:
print(f'unknown key {key} in {file}')
return False
return True

# for key in temp:
# if key not in data:
# rec[tbl][ind][key] = ''
return True, rec


def empty_form_check(data=None):
@@ -231,13 +238,16 @@ def table_inserter(table=None, record=None, cursor=None):
assert(record is not None)
assert(cursor is not None)

cursor.execute(f"select * from {table} limit 1")
table_cols = [i[0] for i in cursor.description]
if table == 'buffer_components':
assert('isotopic_labeling' in table_cols)
table_cols.remove('isotopic_labeling')
# cursor.execute(f"select * from {table} limit 1")
# table_cols = [i[0] for i in cursor.description]
# if table == 'buffer_components':
# assert('isotopic_labeling' in table_cols)
# table_cols.remove('isotopic_labeling')

table_cols = list(record.keys())

#print('table_cols', record.keys())
#print(json.dumps(record,indent=2))
columns, vals_place, values = insert_constructor(table_cols, record)
#print('columns', columns)
#print('vals_place', vals_place)
@@ -270,8 +280,10 @@ def table_updater(table=None, record=None, cursor=None):
assert(record is not None)
assert(cursor is not None)

cursor.execute(f"SELECT * from {table} limit 1")
table_cols = [i[0] for i in cursor.description]
# cursor.execute(f"SELECT * from {table} limit 1")
# table_cols = [i[0] for i in cursor.description]

table_cols = list(record.keys())

columns, vals_place, values = insert_constructor(table_cols, record)
if values == [None] * len(values): return None
@@ -748,18 +760,20 @@ def insert(file=None, db=None, write=False):
c.execute(sql)

# check yaml file to be inserted that it has the expected keys
if not check_yaml(file=file):
status, record = check_yaml(file=file)

if not status:
print(f"JSON file {file} does not have expected keys")
print("Aborting")
sys.exit()

# read YAML file to be inserted
with open(file, 'rt') as fp:
yaml = ruamel.yaml.YAML()
record = yaml.load(fp)

# convert record to plain dictionary
record = json.loads(json.dumps(record))
#
# # read YAML file to be inserted
# with open(file, 'rt') as fp:
# yaml = ruamel.yaml.YAML()
# record = yaml.load(fp)
#
# # convert record to plain dictionary
# record = json.loads(json.dumps(record))

# insert data from yaml in the specific table order
for table in table_order:
@@ -40,8 +40,8 @@ CREATE TABLE target ( -- molecular target information, REQUIRED: `target_id`
target_sequence TEXT CHECK( target_sequence NOT LIKE '% %' and length(target_sequence) <= 1024 ), -- target's molecular seq., any type of code/alphabet, no spaces, len <= 1024, Ex: MGSHHHHILVAM
organism_source TEXT CHECK( length(organism_source) <= 128 ), -- organism name for target source, can indicate if target is synthetic, len <= 128, Ex: synthetic
gene_name TEXT CHECK( length(gene_name) <= 64 ), -- gene name, len <= 64, Ex: SpikeCoV2
project_id TEXT DEFAULT "NONE" NOT NULL, -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga
target_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` that assigned the target, must be in user table, Ex: KJF
project_id TEXT DEFAULT "NONE", -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga
target_preparer TEXT DEFAULT "NONE", -- `user_id` that assigned the target, must be in user table, Ex: KJF
UNIQUE(target_id),
FOREIGN KEY ([project_id]) REFERENCES "project" ([project_id]) ON DELETE NO ACTION ON UPDATE CASCADE
FOREIGN KEY ([target_preparer]) REFERENCES "user" ([user_id]) ON DELETE NO ACTION ON UPDATE CASCADE
@@ -61,7 +61,7 @@ CREATE TABLE construct ( -- molecular construct from a target, REQUIRED: `constr
construct_comment TEXT CHECK( length(construct_comment) <= 128 ), -- free field comment, anything to note about construct, len <= 128, Ex: with primers RE1A/RE1B
plasmid_id TEXT CHECK( length(plasmid_id) <= 64 ), -- plasmid construct DNA sequence is cloned in, len <= 64, Ex: pET218(+)
plasmid_id_comment TEXT CHECK( length(plasmid_id_comment) <= 128 ), -- comments on plasmid, len <= 256, Ex: from Underwood lab
construct_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` who prepared the construct, must already be in user table, Ex: KJF
construct_preparer TEXT DEFAULT "NONE", -- `user_id` who prepared the construct, must already be in user table, Ex: KJF
construct_company TEXT CHECK( length(construct_company) <= 128 ), -- if the construct was produced by a company, indicate where it came from, len <= 128, Ex: Genscript
UNIQUE(construct_id),
FOREIGN KEY ([target_id]) REFERENCES "target" ([target_id]) ON DELETE NO ACTION ON UPDATE CASCADE
@@ -176,7 +176,7 @@ CREATE TABLE pst ( -- description of a protein sample tube, not only proteins, R
id INTEGER PRIMARY KEY NOT NULL,
pst_id TEXT CHECK( pst_id NOT LIKE '% %' and length(pst_id) <= 32 ) NOT NULL, -- text identifier for the pst, must be unique, no spaces, len <= 32, Ex: Db0515A.001
prev_pst_id TEXT , -- if pst derived from another pst, give previous `pst_id`, must already be in pst table, Ex: Db0515A.000
pst_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF
pst_preparer TEXT DEFAULT "NONE", -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF
pst_comment TEXT CHECK( length(pst_comment) <= 128 ), -- free field comment about pst, len <= 128, Ex: new sample
sample_type TEXT CHECK( sample_type IN ('solution', 'solid state') ), -- is sample a solution or solid-state sample, one of (`solution`, `solid state`), Ex: solution
solvent_system TEXT CHECK( length(solvent_system) <= 64 ), -- solvent system for the sample, len <= 64, Ex: DMSO

0 comments on commit 5953517

Please sign in to comment.