update for archival dataset

RPIBioinformatics · Jul 2, 2022 · 5953517c81c8dd1b1654996d602fad485856fd2e · 5953517
1 parent 7efdcdd
commit 5953517c81c8dd1b1654996d602fad485856fd2e
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 23 deletions.
diff --git a/specdb/Forms.py b/specdb/Forms.py
@@ -116,14 +116,22 @@ def forms(table=None, num=None, input_dict=None):
 
 	form = ruamel.yaml.comments.CommentedMap()
 	for i, tbl in enumerate(table):
+		if tbl not in form_dic:
+			if input_dict:
+				form[tbl] = input_dict[tbl]
+			continue
 		form[tbl] = ruamel.yaml.comments.CommentedMap()
 		#print(tbl)
+
 		comment = form_dic.ca.items[tbl][2].value
 		form.yaml_add_eol_comment(comment, tbl, column=25)
 
 		for x in range(num[i]):
 			form[tbl][x] = ruamel.yaml.comments.CommentedMap()
 			for k,v in form_dic[tbl].ca.items.items():
+				if input_dict:
+					print(tbl,x,k)
+					if k not in input_dict[tbl][str(x)]: continue
 				comment = v[2].value[1:]
 				if input_dict: form[tbl][x][k] = input_dict[tbl][str(x)][k]
 				else:          form[tbl][x][k] = form_dic[tbl][k]

diff --git a/specdb/Insert.py b/specdb/Insert.py
@@ -56,25 +56,32 @@ def check_yaml(file=None):
 		record = yaml.load(fp)
 
 	# convert record to dictionary
+	#print(record)
 	rec = json.loads(json.dumps(record))
 
 	for tbl, dic in rec.items():
 		#print(tbl)
 		template = forms(table=[tbl])
 		temp = ruamel.yaml.safe_load(template)
-
+		#print(temp, type(temp), tbl)
+		if not temp: continue
+		if not temp[tbl]: continue
 		temp = json.loads(json.dumps(temp))[tbl]["0"]
 		#print(json.dumps(temp,indent=2))
 		rec_keys = list(rec[tbl].keys())
 
 		assert(isinstance(rec[tbl][rec_keys[0]], dict))
 
-		for ind, data in record[tbl].items():
+		for ind, data in rec[tbl].items():
 			for key in data:
 				if key not in temp:
 					print(f'unknown key {key} in {file}')
 					return False
-	return True
+
+# 			for key in temp:
+# 				if key not in data:
+# 					rec[tbl][ind][key] = ''
+	return True, rec
 
 
 def empty_form_check(data=None):
@@ -231,13 +238,16 @@ def table_inserter(table=None, record=None, cursor=None):
 	assert(record is not None)
 	assert(cursor is not None)
 
-	cursor.execute(f"select * from {table} limit 1")
-	table_cols = [i[0] for i in cursor.description]
-	if table == 'buffer_components':
-		assert('isotopic_labeling' in table_cols)
-		table_cols.remove('isotopic_labeling')
+# 	cursor.execute(f"select * from {table} limit 1")
+# 	table_cols = [i[0] for i in cursor.description]
+# 	if table == 'buffer_components':
+# 		assert('isotopic_labeling' in table_cols)
+# 		table_cols.remove('isotopic_labeling')
+
+	table_cols = list(record.keys())
 
 	#print('table_cols', record.keys())
+	#print(json.dumps(record,indent=2))
 	columns, vals_place, values = insert_constructor(table_cols, record)
 	#print('columns', columns)
 	#print('vals_place', vals_place)
@@ -270,8 +280,10 @@ def table_updater(table=None, record=None, cursor=None):
 	assert(record is not None)
 	assert(cursor is not None)
 
-	cursor.execute(f"SELECT * from {table} limit 1")
-	table_cols = [i[0] for i in cursor.description]
+# 	cursor.execute(f"SELECT * from {table} limit 1")
+# 	table_cols = [i[0] for i in cursor.description]
+
+	table_cols = list(record.keys())
 
 	columns, vals_place, values = insert_constructor(table_cols, record)
 	if values == [None] * len(values): return None
@@ -748,18 +760,20 @@ def insert(file=None, db=None, write=False):
 	c.execute(sql)
 
 	# check yaml file to be inserted that it has the expected keys
-	if not check_yaml(file=file):
+	status, record = check_yaml(file=file)
+
+	if not status:
 		print(f"JSON file {file} does not have expected keys")
 		print("Aborting")
 		sys.exit()
-
-	# read YAML file to be inserted
-	with open(file, 'rt') as fp:
-		yaml = ruamel.yaml.YAML()
-		record = yaml.load(fp)
-
-	# convert record to plain dictionary
-	record = json.loads(json.dumps(record))
+# 	
+# 	# read YAML file to be inserted
+# 	with open(file, 'rt') as fp:
+# 		yaml = ruamel.yaml.YAML()
+# 		record = yaml.load(fp)
+# 	
+# 	# convert record to plain dictionary
+# 	record = json.loads(json.dumps(record))
 
 	# insert data from yaml in the specific table order
 	for table in table_order:

diff --git a/sql/specdb.sql b/sql/specdb.sql
@@ -40,8 +40,8 @@ CREATE TABLE target ( -- molecular target information, REQUIRED: `target_id`
 	target_sequence  TEXT CHECK( target_sequence NOT LIKE '% %' and length(target_sequence)    <= 1024 ),          -- target's molecular seq., any type of code/alphabet, no spaces, len <= 1024, Ex: MGSHHHHILVAM
 	organism_source  TEXT CHECK( length(organism_source)                                       <= 128  ),          -- organism name for target source, can indicate if target is synthetic, len <= 128, Ex: synthetic
 	gene_name        TEXT CHECK( length(gene_name)                                             <= 64   ),          -- gene name, len <= 64, Ex: SpikeCoV2
-	project_id       TEXT DEFAULT "NONE" NOT NULL,                                                                 -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga
-	target_preparer  TEXT DEFAULT "NONE" NOT NULL,                                                                 -- `user_id` that assigned the target, must be in user table, Ex: KJF
+	project_id       TEXT DEFAULT "NONE",                                                                 -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga
+	target_preparer  TEXT DEFAULT "NONE",                                                                 -- `user_id` that assigned the target, must be in user table, Ex: KJF
 	UNIQUE(target_id),
 	FOREIGN KEY ([project_id])      REFERENCES "project" ([project_id]) ON DELETE NO ACTION ON UPDATE CASCADE
 	FOREIGN KEY ([target_preparer]) REFERENCES "user"    ([user_id])    ON DELETE NO ACTION ON UPDATE CASCADE
@@ -61,7 +61,7 @@ CREATE TABLE construct ( -- molecular construct from a target, REQUIRED: `constr
 	construct_comment   TEXT CHECK( length(construct_comment)  <= 128 ),                                                 -- free field comment, anything to note about construct, len <= 128, Ex: with primers RE1A/RE1B
 	plasmid_id          TEXT CHECK( length(plasmid_id)         <= 64  ),                                                 -- plasmid construct DNA sequence is cloned in, len <= 64, Ex: pET218(+)
 	plasmid_id_comment  TEXT CHECK( length(plasmid_id_comment) <= 128 ),                                                 -- comments on plasmid, len <= 256, Ex: from Underwood lab
-	construct_preparer  TEXT DEFAULT "NONE" NOT NULL,                                                                    -- `user_id` who prepared the construct, must already be in user table, Ex: KJF
+	construct_preparer  TEXT DEFAULT "NONE",                                                                    -- `user_id` who prepared the construct, must already be in user table, Ex: KJF
 	construct_company   TEXT CHECK( length(construct_company)  <= 128 ),                                                 -- if the construct was produced by a company, indicate where it came from, len <= 128, Ex: Genscript
 	UNIQUE(construct_id),
 	FOREIGN KEY ([target_id])          REFERENCES "target" ([target_id]) ON DELETE NO ACTION ON UPDATE CASCADE
@@ -176,7 +176,7 @@ CREATE TABLE pst ( -- description of a protein sample tube, not only proteins, R
 	id                   INTEGER PRIMARY KEY NOT NULL, 
 	pst_id               TEXT CHECK( pst_id NOT LIKE '% %' and length(pst_id)       <= 32  ) NOT NULL, -- text identifier for the pst, must be unique, no spaces, len <= 32, Ex: Db0515A.001
 	prev_pst_id          TEXT ,                                                                        -- if pst derived from another pst, give previous `pst_id`, must already be in pst table, Ex: Db0515A.000
-	pst_preparer         TEXT DEFAULT "NONE" NOT NULL,                                                 -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF
+	pst_preparer         TEXT DEFAULT "NONE",                                                          -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF
 	pst_comment          TEXT CHECK( length(pst_comment)                            <= 128 ),          -- free field comment about pst, len <= 128, Ex: new sample
 	sample_type          TEXT CHECK( sample_type IN ('solution', 'solid state')            ),          -- is sample a solution or solid-state sample, one of (`solution`, `solid state`), Ex: solution
 	solvent_system       TEXT CHECK( length(solvent_system)                         <= 64  ),          -- solvent system for the sample, len <= 64, Ex: DMSO