specdb.sql

PRAGMA foreign_keys = ON;

CREATE TABLE user ( -- provide information about a user, REQUIRED: `user_id`
	id                         INTEGER PRIMARY KEY NOT NULL,
	user_id                    TEXT CHECK( user_id NOT LIKE '% %' and length(user_id) <= 8   ) NOT NULL,                                -- no spaces, must be unique, len <= 8, Ex: KJF
	given_name                 TEXT CHECK( length(given_name)                    <= 64  ),                                              -- len <= 64, Ex: Keith
	family_name                TEXT CHECK( length(family_name)                   <= 64  ),                                              -- len <= 64, Ex: Fraga
	middle_initials            TEXT CHECK( length(middle_initials)               <= 16  ),                                              -- len <= 16, Ex: J
	department_and_institution TEXT CHECK( length(department_and_institution)    <= 128 ),                                              -- len <= 128, Ex: MCB UCD
	country                    TEXT CHECK( length(country)                       <= 32  ),                                              -- len <= 32, Ex: USA
	state_province             TEXT CHECK( length(state_province)                <= 32  ),                                              -- len <= 32, Ex: NY
	city                       TEXT CHECK( length(city)                          <= 32  ),                                              -- len <= 32, Ex: Troy
	postal_code                TEXT CHECK( length(postal_code)                   <= 32  ),                                              -- len <= 32, Ex: 12180
	role                       TEXT CHECK( length(role)                          <= 64  ),                                              -- len <= 64, Ex: postdoc
	organization_type          TEXT CHECK( length(organization_type)             <= 64  ),                                              -- len <= 64, Ex: academic
	email_address              TEXT CHECK( email_address NOT LIKE '% %' and email_address LIKE '%@%' and length(email_address) <= 64 ), -- no spaces, must have `@`, len <= 64, Ex: user@mail.com
	UNIQUE(user_id)
);

INSERT INTO user (user_id)
VALUES
	("NONE");

CREATE TABLE project ( -- provide information describing a project, REQUIRED: `project_id`
	id                  INTEGER PRIMARY KEY NOT NULL,
	structural_genomics TEXT CHECK( structural_genomics IN ('yes', 'no')             ),                -- not required, only yes or no
	project_id          TEXT CHECK( project_id NOT LIKE '% %' and length(project_id) <= 32 ) NOT NULL, -- required, must be unique, no spaces, len <= 32, Ex: SpikeFraga
	project_details     TEXT CHECK( length(project_details) <= 128                      ),             -- not required, short description, len <= 128, Ex: Spike protein project 2022
	UNIQUE(project_id)
);

INSERT INTO project (project_id)
VALUES
	("NONE");

CREATE TABLE target ( -- molecular target information, REQUIRED: `target_id`
	id               INTEGER PRIMARY KEY NOT NULL,
	target_id        TEXT CHECK( target_id NOT LIKE '% %' and length(target_id)                <= 32   ) NOT NULL, -- text identifier for target, must be unique, no spaces, len <= 32, Ex: Db0515
	target_comment   TEXT CHECK( length(target_comment)                                        <= 128  ),          -- free field comment, anything to note about target, len <= 128, Ex: from UW-Madison
	target_sequence  TEXT CHECK( target_sequence NOT LIKE '% %' and length(target_sequence)    <= 1024 ),          -- target's molecular seq., any type of code/alphabet, no spaces, len <= 1024, Ex: MGSHHHHILVAM
	organism_source  TEXT CHECK( length(organism_source)                                       <= 128  ),          -- organism name for target source, can indicate if target is synthetic, len <= 128, Ex: synthetic
	gene_name        TEXT CHECK( length(gene_name)                                             <= 64   ),          -- gene name, len <= 64, Ex: SpikeCoV2
	project_id       TEXT DEFAULT "NONE",                                                                 -- `project_id` target is a member of, must be in project table, Ex: SpikeFraga
	target_preparer  TEXT DEFAULT "NONE",                                                                 -- `user_id` that assigned the target, must be in user table, Ex: KJF
	UNIQUE(target_id),
	FOREIGN KEY ([project_id])      REFERENCES "project" ([project_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([target_preparer]) REFERENCES "user"    ([user_id])    ON DELETE NO ACTION ON UPDATE CASCADE
);

INSERT INTO target (target_id)
VALUES
	("NONE");

CREATE TABLE construct ( -- molecular construct from a target, REQUIRED: `construct_id`,
	id                  INTEGER PRIMARY KEY NOT NULL,
	construct_id        TEXT CHECK( construct_id       NOT LIKE '% %' and length(construct_id)       <= 32   ) NOT NULL, -- text identifier for construct, must be unique, no spaces, len <= 32, Ex: Db0515A
	construct_sequence  TEXT CHECK( construct_sequence NOT LIKE '% %' and length(construct_sequence) <= 1024 ),          -- construct sequence, any type of code/alphabet, no spaces, len <= 1024, Ex: MGSHHHINLVAE
	target_id           TEXT DEFAULT "NONE" NOT NULL,                                                                    -- `target id` this construct was made for, must already be in target table, Ex: Db0515
	ext_coefficient_280 FLOAT ,                                                                                          -- extinction coefficient at 280 nm, Ex: 108.7
	expected_mw         FLOAT ,                                                                                          -- expected molecular weight of construct in Da, Ex: 1211.8
	construct_comment   TEXT CHECK( length(construct_comment)  <= 128 ),                                                 -- free field comment, anything to note about construct, len <= 128, Ex: with primers RE1A/RE1B
	plasmid_id          TEXT CHECK( length(plasmid_id)         <= 64  ),                                                 -- plasmid construct DNA sequence is cloned in, len <= 64, Ex: pET218(+)
	plasmid_id_comment  TEXT CHECK( length(plasmid_id_comment) <= 128 ),                                                 -- comments on plasmid, len <= 256, Ex: from Underwood lab
	construct_preparer  TEXT DEFAULT "NONE",                                                                    -- `user_id` who prepared the construct, must already be in user table, Ex: KJF
	construct_company   TEXT CHECK( length(construct_company)  <= 128 ),                                                 -- if the construct was produced by a company, indicate where it came from, len <= 128, Ex: Genscript
	UNIQUE(construct_id),
	FOREIGN KEY ([target_id])          REFERENCES "target" ([target_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([construct_preparer]) REFERENCES "user"   ([user_id])   ON DELETE NO ACTION ON UPDATE CASCADE
);

INSERT INTO construct (construct_id)
VALUES
	("NONE");

CREATE TABLE expression ( -- describe a molecular expression experiment/results, REQUIRED: `expression_id`
	id                  INTEGER PRIMARY KEY NOT NULL,
	expression_id       TEXT CHECK( expression_id NOT LIKE '% %' and length(expression_id) <= 32  ) NOT NULL,  -- text identifier for expression, must be unique, no spaces, len <= 32, Ex: Db0515A.X
	expression_comment  TEXT CHECK( length(expression_comment)                             <= 128 ),           -- free field comment, anything of note for expression, len <= 128, Ex: reached 4.5 OD
	construct_id        TEXT DEFAULT "NONE" NOT NULL,                                                          -- `construct_id` this expression was for, must already be in construct table, Ex: Db0515A
	expression_media    TEXT CHECK( length(expression_media)                               <= 32  ),           -- growth media expression was in, len <= 32, Ex: MJ9
	strain              TEXT CHECK( length(strain)                                         <= 32  ),           -- cellular strain expression was in, len <= 32, Ex: BL21A (+)
	expression_organism TEXT CHECK( length(expression_organism)                            <= 32  ),           -- organism expression was in, len <- 32, Ex: E. colli
	expression_preparer TEXT DEFAULT "NONE" NOT NULL,                                                          -- `user_id` of person who did the expression, must already be in user table, Ex: KJF
	UNIQUE(expression_id),
	FOREIGN KEY ([construct_id])        REFERENCES "construct" ([construct_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([expression_preparer]) REFERENCES "user"      ([user_id])      ON DELETE NO ACTION ON UPDATE CASCADE
);

INSERT INTO expression (expression_id)
VALUES
	("NONE");

CREATE TABLE purification_batch ( -- describe the purification run for an expression product, REQUIRED: `batch_id`
	id                             INTEGER PRIMARY KEY NOT NULL,
	batch_id                       TEXT CHECK( batch_id NOT LIKE '% %'        and length(batch_id)        <= 32   ) NOT NULL, -- text identifier for batch, must be unique, no spaces, len <= 32, Ex: Db0515.X.batch
	expression_id                  TEXT DEFAULT "NONE" NOT NULL,                                                              -- expression run this purification batch is for, must already be in expression table Ex: Db0515.X
	sample_sequence                TEXT CHECK( sample_sequence NOT LIKE '% %' and length(sample_sequence) <= 1024 ),          -- sequence of molecule after purification (i.e if there was tag cleaving), any type of code/alphabet, no spaces, len <= 1024, Ex: MQNGPILA
	purif_comment                  TEXT CHECK( length(purif_comment)            <= 128 ),                                     -- free text comment about the purification, len <= 128, Ex: HPLC in RM 204
	production_method              TEXT CHECK( length(production_method)        <= 64  ),                                     -- indicate the purification method, len <= 64, Ex: Ni column only
	isotopic_labeling_remark       TEXT CHECK( length(isotopic_labeling_remark) <= 64  ),                                     -- provide the isotopic labeling, preferred method to indicate labeling in the appropriate column below, len <= 64, Ex: NC5
	iso_13c_enrichment             TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_13c_enrichment             LIKE '%.% 13C%'      ESCAPE '.' ), -- 13C labeling percentage, must contain `% 13C`, len <= 32, Ex: 100% 13C
	iso_15n_enrichment             TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_15n_enrichment             LIKE '%.% 15N%'      ESCAPE '.' ), -- 15N labeling percentage, must contain `% 15N`, len <= 32, Ex: 99% 15N
	iso_2h_enrichment  		       TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_2h_enrichment              LIKE '%.% 2H%'       ESCAPE '.' ), -- 2H labeling percentage, must contain `% 2H`, len <= 32, Ex: 100% 2H
	iso_19f_Trp_enrichment         TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_19f_Trp_enrichment         LIKE '%.% 19F-Trp%'  ESCAPE '.' ), -- 19F-Trp labeling percentage, must contain `% 19F-Trp`, len <= 32, Ex: 11% 19F-Trp
	iso_19f_Phe_enrichment         TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_19f_Phe_enrichment         LIKE '%.% 19F-Phe%'  ESCAPE '.' ), -- 19F-Phe labeling percentage, must contain `% 19F-Phe`, len <= 32, Ex: 9.1% 19F-Phe
	iso_1hd1_Leu_methyl_enrichment TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hd1_Leu_methyl_enrichment LIKE '%.% 1HD1-Leu%' ESCAPE '.' ), -- 1HD1-Leu stereospecific labeling percentage, must contain `% 1HD1-Leu`, len <= 32, Ex: 13.3% 1HD1-Leu
	iso_1hd2_Leu_methyl_enrichment TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hd2_Leu_methyl_enrichment LIKE '%.% 1HD2-Leu%' ESCAPE '.' ), -- 1HD2-Leu stereospecific labeling percentage, must contain `% 1HD2-Leu`, len <= 32, Ex: 15% 1HD2-Leu
	iso_1hd_Ile_methyl_enrichment  TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hd_Ile_methyl_enrichment  LIKE '%.% 1HD-Ile%'  ESCAPE '.' ), -- 1HD-Ile labeling percentage, must contain `% 1HD-Ile`, len <= 32, Ex: 50% 1HD-Ile
	iso_1hg1_Val_methyl_enrichment TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hg1_Val_methyl_enrichment LIKE '%.% 1HG1-Val%' ESCAPE '.' ), -- 1HG1-Val stererospecific labeling percentage, must contain `% 1HG1-Val`, len <= 32, Ex: 25% 1HG1-Val
	iso_1hg2_Val_methyl_enrichment TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hg2_Val_methyl_enrichment LIKE '%.% 1HG2-Val%' ESCAPE '.' ), -- 1HG2-Val stereospecific labeling percentage, must contain `% 1HG2-Val`, len <= 32, Ex: 27%% 1HG2-Val
	iso_1hb_Ala_methyl_enrichment  TEXT CHECK( length(iso_13c_enrichment) <= 32 and iso_1hb_Ala_methyl_enrichment  LIKE '%.% 1HB-Ala%'  ESCAPE '.' ), -- 1HB-Ala labeling percentage, must contain `% 1HB-Ala`, len <= 32, Ex: 100% 1HB-Ala
	batch_preparer                 TEXT DEFAULT "NONE", -- `user_id` of person who did the purification, must already be in user table, Ex: KJF
	UNIQUE(batch_id),
	FOREIGN KEY ([expression_id])  REFERENCES "expression" ([expression_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([batch_preparer]) REFERENCES "user"       ([user_id])       ON DELETE NO ACTION ON UPDATE CASCADE
);

INSERT INTO purification_batch (batch_id)
VALUES
	("NONE");

CREATE TABLE buffer ( -- store all buffers used, their id and their pH, REQUIRED: `buffer_id`, `buffer_pH`, NOTE: the buffer components go in the `buffer_components` table
	id             INTEGER PRIMARY KEY NOT NULL,
	buffer_id      TEXT CHECK( buffer_id NOT LIKE '% %' and length(buffer_id) <= 32  ) NOT NULL, -- text identifier for the buffer, must be unique, no spaces, len <= 32, Ex: hn4071
	buffer_ph      FLOAT DEFAULT "7.0" NOT NULL,                                                 -- pH of buffer, must be a number, Ex: 8.1
	buffer_comment TEXT CHECK( length(buffer_comment)                         <= 128 ),          -- free text field for any notes about the buffer, len <= 128, Ex: made by KJF 12/04
	UNIQUE(buffer_id)
);

INSERT INTO buffer (buffer_id)
VALUES
	("NONE");

CREATE TABLE buffer_components ( -- describe the component(s) of a buffer, REQUIRED: `buffer_id`, `buffer_component`, `buffer_component_value`, `buffer_component_unit`
	id                     INTEGER PRIMARY KEY NOT NULL,
	buffer_id              TEXT NOT NULL,                                                              -- `buffer_id` this component goes to, must already be in buffer table, Ex: hn4071
	buffer_component       TEXT CHECK( length(buffer_component)                 <= 64 ) NOT NULL,      -- name of the component, len <= 64, Ex: NaCl
	buffer_component_value FLOAT NOT NULL,                                                             -- the numeric concentration value of the component, Ex: 100.0
	buffer_component_unit  TEXT CHECK( buffer_component_unit in ('mM', '% (v/v)', 'mg/mL') ) NOT NULL, -- unit of concentration, one of (`mM`, `% (v/v)`, `mg/mL`), Ex: mM
	isotopic_labeling      TEXT DEFAULT "natural abundance",
	UNIQUE(buffer_id, buffer_component),
	FOREIGN KEY ([buffer_id]) REFERENCES "buffer" ([buffer_id]) ON DELETE NO ACTION ON UPDATE CASCADE
);

CREATE TABLE tubes (
	id        INTEGER PRIMARY KEY NOT NULL,
	tube_type TEXT NOT NULL,
	UNIQUE(tube_type)
);

INSERT INTO tubes (tube_type)
VALUES
	("0.5 mL Eppendorf"),
	("1.7 mL Eppendorf"),
	("15 mL cent. tube"),
	("50 mL cent. tube"),
	("1 L cent. tube"),
	("8-well PCR strip"),
	("NMR tube"),
	("plate well"),
	("1-mm NMR tube"),
	("1.7-mm NMR tube"),
	("3-mm NMR tube"),
	("3-mm Shigemi tube"),
	("4-mm NMR tube"),
	("4-mm Shigemi tube"),
	("5-mm NMR tube"),
	("5-mm Shigemi tube"),
	("8-mm NMR tube"),
	("8-mm Shigemi tube"),
	("10-mm NMR tube"),
	("10-mm Shigemi tube"),
	("NONE");

CREATE TABLE pst ( -- description of a protein sample tube, not only proteins, REQUIRED: `pst_id`, `pst_preparer`, `sample_ph`, `total_volume`, `volume_unit`, `tube_type`
	id                   INTEGER PRIMARY KEY NOT NULL,
	pst_id               TEXT CHECK( pst_id NOT LIKE '% %' and length(pst_id)       <= 32  ) NOT NULL, -- text identifier for the pst, must be unique, no spaces, len <= 32, Ex: Db0515A.001
	prev_pst_id          TEXT ,                                                                        -- if pst derived from another pst, give previous `pst_id`, must already be in pst table, Ex: Db0515A.000
	pst_preparer         TEXT DEFAULT "NONE",                                                          -- `user_id` of user that prepared the sample tube, must be in user table, Ex: KJF
	pst_comment          TEXT CHECK( length(pst_comment)                            <= 128 ),          -- free field comment about pst, len <= 128, Ex: new sample
	sample_type          TEXT CHECK( sample_type IN ('solution', 'solid state')            ),          -- is sample a solution or solid-state sample, one of (`solution`, `solid state`), Ex: solution
	solvent_system       TEXT CHECK( length(solvent_system)                         <= 64  ),          -- solvent system for the sample, len <= 64, Ex: DMSO
	sample_ph            FLOAT ,                                                                       -- pst pH, can be different from buffer ph, Ex: 7.1
	total_volume         FLOAT DEFAULT "0.0" NOT NULL,                                                 -- numeric volume for total volume in tube, Ex: 2.2
	volume_unit          TEXT CHECK( volume_unit IN ('nL', 'uL', 'mL', 'L') ) DEFAULT "L" NOT NULL,    -- unit of volume for volume in tube, one of (`nL`, `uL`, `mL`, `L`), Ex: mL
	curr_location        TEXT CHECK( length(curr_location) <= 64 ),                                    -- current location of tube, len <= 64, Ex: NMR room
	prev_location        TEXT CHECK( length(prev_location) <= 64 ),                                    -- previous location of tube, len <= 64, Ex: Underwood lab
	create_date          TEXT CHECK( create_date LIKE '____-__-__' ),                                  -- date record added to database, format "YYYY-MM-DD", Ex: 2022-01-01
	buffer_id            TEXT DEFAULT "NONE" NOT NULL,                                                 -- id of buffer in the tube, must already be in `buffer` table, no spaces, len <= 32, Ex: hn4071
	tube_type            TEXT DEFAULT "NONE" NOT NULL,                                                 -- the tube type for sample tube, use `specdb summary --table tubes` to find allowable tube types, Ex: 1-mm Shigemi tube
	UNIQUE(pst_id),
	FOREIGN KEY ([pst_preparer]) REFERENCES "user"   ([user_id])   ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([prev_pst_id])  REFERENCES "pst"    ([pst_id])    ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([buffer_id])    REFERENCES "buffer" ([buffer_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([tube_type])    REFERENCES "tubes"  ([tube_type]) ON DELETE NO ACTION ON UPDATE CASCADE
);

INSERT INTO pst (pst_id)
VALUES
	("NONE");

CREATE TABLE batch_components ( -- describe the purification batches in a pst, REQUIRED: `pst_id`, `batch_id`, `volume`, `volume_unit`, `conc`, `conc_unit`
	id          INTEGER PRIMARY KEY NOT NULL,
	pst_id      TEXT NOT NULL,                                                            -- text id of sample tube being described, must already be in pst table, Ex: Db0515A.001
	batch_id    TEXT NOT NULL,                                                            -- text id of batch in the `pst_id` above, must already be in purification_batch table, Ex: Db0515A.X.batch
	conc        FLOAT NOT NULL,                                                           -- concentration of sample in the sample tube, numeric, Ex: 10.2
	conc_unit   TEXT CHECK( conc_unit IN ('ug/mL', 'mg/mL', 'nM', 'uM', 'mM') ) NOT NULL, -- concentration units, one of (`ug/mL`, `mg/mL`, `nM`, `uM`, `mM`), Ex: mM
	mass        FLOAT ,                                                                   -- mg of protein sample in tube, Ex: 101.1
	UNIQUE(pst_id, batch_id)
	FOREIGN KEY ([pst_id])   REFERENCES "pst"                 ([pst_id])   ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([batch_id]) REFERENCES "purification_batch"  ([batch_id]) ON DELETE NO ACTION ON UPDATE CASCADE
);

CREATE TABLE spectrometer ( -- spectrometer information, REQUIRED: `spectrometer_id`, `manufacturer``, `field_strength`
	id                   INTEGER PRIMARY KEY NOT NULL,
	spectrometer_id      TEXT CHECK( spectrometer_id NOT LIKE '% %' and length(spectrometer_id) <= 32 ) NOT NULL,         -- text identifier for spectrometer, must be unique, no spaces, len <= 32, EX: HU800
	manufacturer         TEXT CHECK( manufacturer IN ('Bruker', 'Varian') ),                                               -- spectrometer manufacturer, one of 'Bruker' or 'Varian'
	model                TEXT CHECK( length(model)         <= 32 ),                                                        -- spectrometer model, len <= 32, Ex: Bruker
	serial_number        TEXT CHECK( length(serial_number) <= 64 ),                                                        -- spectrometer serial number, len <= 64, Ex: 16Z001
	field_strength       TEXT CHECK( field_strength LIKE '%Hz' and length(field_strength) <= 16 ) DEFAULT "0 Hz" NOT NULL, -- spectrometer field strength, text must end in `Hz`, len <=16, Ex: 800 MHz
	spectrometer_comment TEXT CHECK( length(spectrometer_comment) <= 128 ),                                                -- free field comment about the spectrometer, len <= 128, Ex: located in Briggs
	UNIQUE(spectrometer_id)
);

INSERT INTO spectrometer (spectrometer_id)
VALUES
	("NONE");

CREATE TABLE probe ( -- describe the NMR probe, REQUIRED: `probe_id`
	id                INTEGER PRIMARY KEY NOT NULL,
	probe_id          TEXT CHECK( probe_id NOT LIKE '% %' and length(probe_id) <= 32 ) NOT NULL, -- text identifier for probe, no spaces, len <= 32, must be unique, Ex: probe_bruker_12
	model             TEXT CHECK( length(model)                          <= 64 ),          -- probe model, len <= 64
	manufacturer      TEXT CHECK( length(manufacturer)                   <= 64 ),          -- probe manufacturer, len <= 64
	serial_number     TEXT CHECK( length(serial_number)                  <= 64 ),          -- probe serial number, len <= 64
	diameter          TEXT CHECK( length(diameter)                       <= 8  ),          -- diameter of probe, len <= 8
	rotor_length      TEXT CHECK( length(rotor_length)                   <= 8  ),          -- rotor length, len <= 8
	rotor_composition TEXT CHECK( length(rotor_composition)              <= 64 ),          -- len <= 64
	internal_vol      TEXT CHECK( length(internal_vol)                   <= 16 ),          -- len <= 16
	spacer_present    TEXT CHECK( length(spacer_present)                 <= 8  ),          -- len <= 8
	UNIQUE(probe_id)
);

INSERT INTO probe (probe_id)
VALUES
	("NONE");

CREATE TABLE session ( -- describe a data collection session, REQUIRED: pst_id (if not none, just use 'NONE')
	id INTEGER PRIMARY KEY NOT NULL,
	folder_name      TEXT ,
	session_preparer TEXT DEFAULT "NONE" NOT NULL, -- `user_id` of the person who prepared and collected the session, must already be in user table, Ex: KJF
	spectrometer_id  TEXT DEFAULT "NONE" NOT NULL, -- `spectrometer_id` session was collected at, must already be in spectrometer table, Ex: Hu800
	pst_id           TEXT DEFAULT "NONE" NOT NULL, -- `pst_id` the session is for
	UNIQUE(folder_name, session_preparer, spectrometer_id),
	FOREIGN KEY ([session_preparer]) REFERENCES "user"         ([user_id])         ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([spectrometer_id])  REFERENCES "spectrometer" ([spectrometer_id]) ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([pst_id])           REFERENCES "pst"          ([pst_id])          ON DELETE NO ACTION ON UPDATE CASCADE
);

CREATE TABLE pulse_sequence_names (
	id INTEGER PRIMARY KEY NOT NULL,
	nickname TEXT NOT NULL,
	UNIQUE(nickname)
);

INSERT INTO pulse_sequence_names (nickname)
VALUES
	("1D 1H"),
	("1D 13C"),
	("1D 15N"),
	("1D 31P"),
	("2D 1H-13C HMQC"),
	("2D 1H-15N HSQC"),
	("2D 1H-15N HSQC NH2 only"),
	("2D 1H-15N TROSY"),
	("2D 1H-13C HSQC"),
	("2D 1H-13C HSQC/HMQC"),
	("2D 1H-13C HSQC aliphatic"),
	("2D 1H-13C HSQC aromatic"),
	("2D 1H-1H TOCSY"),
	("2D 1H-1H ROESY"),
	("2D DQF-COSY"),
	("2D 1H-1H COSY"),
	("2D 1H-1H NOESY"),
	("2D (HB)CB(CGCD)HD"),
	("JR NOESY"),
	("L-PROSY NOESY"),
	("3D CBCACONH"),
	("3D CBCANH"),
	("3D C(CO)NH"),
	("3D HNCO"),
	("3D HNCA"),
	("3D HBHANH"),
	("3D HNCACB"),
	("3D HN(CA)CO"),
	("3D HNCACO"),
	("3D HBHA(CO)NH"),
	("3D HN(CO)CA"),
	("3D HN(CO)CACB"),
	("3D CCH-TOCSY"),
	("3D H(CCO)NH"),
	("3D HCCH-TOCSY"),
	("3D (C)CCH-TOCSY"),
	("3D 15N-separated NOESY"),
	("3D 13C-separated NOESY"),
	("3D 1H-15N NOESY"),
	("3D 1H-15N TOCSY"),
	("3D 1H-13C NOESY"),
	("3D 1H-13C NOESY aliphatic"),
	("3D 1H-13C NOESY aromatic"),
	("3D HNHA"),
	("3D HNHB"),
	("3D HCCH-COSY"),
	("3D HCACO"),
	("3D HN(COCA)CB"),
	("4D CC NOESY"),
	("4D 13C-separated NOESY"),
	("5D APSY CBCACONH"),
	("5D APSY HACACONH"),
	("5D HN(CA)CONH"),
	("15N-(1H) hetNOE"),
	("13C-(1H) hetNOE"),
	("3D 15N,13C sim NOESY"),
	("1D 15N T1"),
	("2D 15N T1"),
	("3D 15N T1"),
	("1D 15N T2"),
	("2D  15N T2"),
	("3D 15N T2"),
	("1D 15N T1rho"),
	("2D 15N T1rho"),
	("3D 15N T1rho"),
	("1D 15N CPMG"),
	("2D 15N CPMG"),
	("3D 15N CPMG"),
	("DARR"),
	("2D DARR"),
	("2D 13C-13C DARR"),
	("CANcoCX"),
	("NCACX"),
	("NCOCX"),
	("CONCA"),
	("3D NCOCX"),
	("3D NCACX"),
	("PAIN"),
	("PAR"),
	("15N-1H solid-state de-HETCOR"),
	("FS-REDOR"),
	("2D 15N-13C TEDOR"),
	("CN TEDOR"),
	("2D 13C/13C PDSD"),
	("2D 13C/13C TOBSY"),
	("2D HBCBCGCDCEHE"),
	("2D HBCBCGCD"),
	("2D 13C-detected CORD"),
	("2D 13C-detected NCA"),
	("1H-detected (H)NH HETCOR"),
	("3D RNCSA"),
	("3D ROCSA");


CREATE TABLE default_processing_scripts ( -- record a default nmrpipe processing script for every pulse sequence
	id                  INTEGER PRIMARY KEY NOT NULL,
	pulse_sequence_name TEXT NOT NULL, -- name of pulse sequence, from Bruker it is the value of the PULPROG tag
	nickname            TEXT NOT NULL, -- user provided nickname, references the pulse_sequence_nicknames table
	default_processing  BLOB NOT NULL, -- path to text file of a nmrpipe processing script that read a FID for this pulse sequence type
	UNIQUE(pulse_sequence_name)
	FOREIGN KEY ([nickname]) REFERENCES "pulse_sequence_names" ([nickname]) ON DELETE NO ACTION ON UPDATE CASCADE
);

CREATE TABLE time_domain_dataset ( -- record information about a time domain dataset, REQUIRED: `subdir_name`
	id                      INTEGER PRIMARY KEY NOT NULL,
	subdir_name             TEXT CHECK( length(subdir_name) <= 128 ) NOT NULL,   -- path to where fid/ser file is, can be a relative path, len <= 128, Ex: 1/
	pst_id                  TEXT DEFAULT "NONE" NOT NULL,                        -- `pst_id` of sample, must already be in pst table, Ex: Db0515.001
	pulse_sequence_nickname TEXT DEFAULT "NONE",                                 -- nickname for pulse sequence, controlled vocabulary
	probe_id                TEXT ,                                               -- `probe_id` for the probe, if blank probe information pulled from fid directory
	pulse_sequence          TEXT CHECK( length(pulse_sequence) <= 128 ),
	zipped_dir            	BLOB NOT NULL,
	md5checksum             TEXT NOT NULL,
	session_id            	INTEGER,
	temperature_from_data   FLOAT,
	experiment_date         TEXT ,
	probe_info              TEXT CHECK( length(probe_info) <= 256 ),
	UNIQUE(md5checksum),
	FOREIGN KEY ([session_id])              REFERENCES "session" ([id])                    ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([pst_id])                  REFERENCES "pst"     ([pst_id])                ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([probe_id])                REFERENCES "probe"   ([probe_id])              ON DELETE NO ACTION ON UPDATE CASCADE
	FOREIGN KEY ([pulse_sequence_nickname]) REFERENCES "pulse_sequence_names" ([nickname]) ON DELETE NO ACTION ON UPDATE CASCADE
);

CREATE TABLE processing_scripts( -- save processing scripts
	id          INTEGER PRIMARY KEY NOT NULL,
	name        TEXT NOT NULL,
	md5checksum TEXT NOT NULL,
	script      BLOB,
	UNIQUE(md5checksum)
);

CREATE TABLE scripts_to_fids ( -- table to save relationships between FIDs and processing scripts for those fids
	id        INTEGER PRIMARY KEY NOT NULL,
	fid_id    INTEGER NOT NULL,
	script_id INTEGER NOT NULL,
	UNIQUE(fid_id, script_id)
	FOREIGN KEY ([fid_id])    REFERENCES "time_domain_dataset" ([id]) ON DELETE CASCADE ON UPDATE CASCADE
	FOREIGN KEY ([script_id]) REFERENCES "processing_scripts"  ([id]) ON DELETE CASCADE ON UPDATE CASCADE
);

CREATE VIEW summary
AS
SELECT
	time_domain_dataset.id                      AS id,
	time_domain_dataset.experiment_date         AS experiment_date,
	time_domain_dataset.pulse_sequence_nickname AS pulse_sequence_nickname,
	time_domain_dataset.pulse_sequence          AS pulse_sequence,
	time_domain_dataset.md5checksum             AS md5checksum,
	time_domain_dataset.session_id              AS session_id,
	session.session_preparer                    AS session_preparer,
	temperature_from_data                       AS temperature,
	pst.buffer_id                               AS buffer_id,
	time_domain_dataset.pst_id                  AS pst_id,
	pst.pst_preparer                            AS pst_preparer,
	purification_batch.batch_id                 AS batch_id,
	purification_batch.batch_preparer           AS batch_preparer,
	purification_batch.sample_sequence          AS sample_sequence,
	expression.expression_id                    AS expression_id,
	expression.expression_preparer              AS expression_preparer,
	construct.construct_id                      AS construct_id,
	construct.construct_preparer                AS construct_preparer,
	target.target_id                            AS target_id,
	target.target_preparer                      AS target_preparer,
	target.target_sequence                      AS target_sequence,
	target.project_id                           AS project,
	construct.construct_sequence                AS construct_sequence,
	session.spectrometer_id                     AS spectrometer_id,
	spectrometer.field_strength                 AS field,
	time_domain_dataset.probe_id                AS probe_id,
	time_domain_dataset.probe_info              AS probehd,
	pst.tube_type                               AS tube_type,
	zipped_dir
FROM
	time_domain_dataset
INNER JOIN session             ON session.id                   = time_domain_dataset.session_id
INNER JOIN pst                 ON pst.pst_id                   = time_domain_dataset.pst_id
INNER JOIN batch_components    ON batch_components.pst_id      = pst.pst_id
INNER JOIN purification_batch  ON purification_batch.batch_id  = batch_components.batch_id
INNER JOIN expression          ON expression.expression_id     = purification_batch.expression_id
INNER JOIN construct           ON construct.construct_id       = expression.construct_id
INNER JOIN target              ON target.target_id             = construct.target_id
INNER JOIN project             ON project.project_id           = target.project_id
INNER JOIN spectrometer        ON spectrometer.spectrometer_id = session.spectrometer_id;

CREATE TABLE star_conversion (
	id           INTEGER PRIMARY KEY NOT NULL,
	specdb_col   TEXT,
	specdb_table TEXT,
	star_tag     TEXT,
	star_frame   TEXT
);

INSERT INTO star_conversion (specdb_col,specdb_table,star_tag,star_frame)
VALUES
	("pst_comment",               "pst",                "_Assembly.Details",                            "assembly"),
	("pst_id",                    "pst",                "_Assembly.Name",                               "assembly"),
	("pst_id",                    "pst",                "_Assembly.Sf_framecode",                       "assembly"),
	("target_id",                 "target",             "_Entity_assembly.Entity_assembly_name",        "assembly"),
	("target_id",                 "target",             "_Entity_assembly.Entity_label",                "assembly"),
	("sample_sequence",           "purification_batch", "_Entity.Polymer_seq_one_letter_code_can",      "entity"),
	("construct_comment",         "construct",          "_Entity.Details",                              "entity"),
	("target_id",                 "target",             "_Entity.Name",                                 "entity"),
	("target_id",                 "target",             "_Entity.Sf_framecode",                         "entity"),
	("project_id",                "target",             "_Entry_src.Project_name",                      "entry_information"),
	("project_details",           "project",            "_Entry_information.Details",                   "entry_information"),
	("given_name",                "user",               "_Contact_person.Given_name",                   "entry_information"),
	("family_name",               "user",               "_Contact_person.Family_name",                  "entry_information"),
	("given_name",                "user",               "_Entry_author.Given_name",                     "entry_information"),
	("family_name",               "user",               "_Entry_author.Family_name",                    "entry_information"),
	("middle_initials",           "user",               "_Entry_author.Middle_initials",                "entry_information"),
	("department_and_institution","user",               "_Contact_person.Department_and_institution",   "entry_information"),
	("country",                   "user",               "_Contact_person.Country",                      "entry_information"),
	("state_province",            "user",               "_Contact_person.State_province",               "entry_information"),
	("city",                      "user",               "_Contact_person.City",                         "entry_information"),
	("postal_code",               "user",               "_Contact_person.Postal_code",                  "entry_information"),
	("role",                      "user",               "_Contact_person.Role",                         "entry_information"),
	("organization_type",         "user",               "_Contact_person.Organization_type",            "entry_information"),
	("email_address",             "user",               "_Contact_person.Email_address",                "entry_information"),
	("structural_genomics",       "project",            "_Entry_interview.Structural_genomics",         "entry_interview"),
	("batch_id",                  "purification_batch", "_Experiment.Sample_label",                     "experiment_list"),
	("volume",                    "pst",                "_Experiment.Sample_volume",                    "experiment_list"),
	("volume_unit",               "pst",                "_Experiment.Sample_volume_units",              "experiment_list"),
	("sample_type",               "pst",                "_Experiment.Sample_state",                     "experiment_list"),
	("tube_type",                 "pst",                "_Experiment.NMR_tube_type",                    "experiment_list"),
	("spectrometer_id",           "session",            "_Experiment.NMR_spectrometer_label",           "experiment_list"),
	("probe_info",                "time_domain_dataset","_Experiment.NMR_spectrometer_probe_label",     "experiment_list"),
	("pulse_sequence",            "time_domain_dataset","_Experiment.Name",                             "experiment_list"),
	("pulse_sequence",            "time_domain_dataset","_Experiment_file.Experiment_name",             "experiment_list"),
	("target_id",                 "target",             "_Entity_experimental_src.Entity_label",        "experimental_source"),
	("plasmid_id",                "construct",          "_Entity_experimental_src.Vector_name",         "experimental_source"),
	("plasmid_id_comment",        "construct",          "_Entity_experimental_src.Vector_details",      "experimental_source"),
	("strain",                    "expression",         "_Entity_experimental_src.Host_org_strain",     "experimental_source"),
	("expression_organism",       "expression",         "_Entity_experimental_src.Host_org_name_common","experimental_source"),
	("production_method",         "purification_batch", "_Entity_experimental_src.Production_method",   "experimental_source"),
	("target",                    "target",             "_Entity_natural_src.Entity_label",             "natural_source"),
	("organism_source",           "target",             "_Entity_natural_src.Organism_name_common",     "natural_source"),
	("gene_name",                 "target",             "_Entity_natural_src.Gene_mnemonic",            "natural_source"),
	("spectrometer_id",           "session",            "_NMR_spectrometer.Sf_framecode",               "NMR_spectrometer"),
	("spectrometer_id",           "session",            "_NMR_spectrometer.Name",                       "NMR_spectrometer"),
	("manufacturer",              "spectrometer",       "_NMR_spectrometer.Manufacturer",               "NMR_spectrometer"),
	("model",                     "spectrometer",       "_NMR_spectrometer.Model",                      "NMR_spectrometer"),
	("serial_number",             "spectrometer",       "_NMR_spectrometer.Serial_number",              "NMR_spectrometer"),
	("field_strength",            "spectrometer",       "_NMR_spectrometer.Field_strength",             "NMR_spectrometer"),
	("spectrometer_comment",      "spectrometer",       "_NMR_spectrometer.Details",                    "NMR_spectrometer"),
	("probe_info",                "time_domain_dataset","_NMR_spectrometer_probe.Sf_framecode",         "NMR_spectrometer_probe"),
	("probe_info",                "time_domain_dataset","_NMR_spectrometer_probe.Details",              "NMR_spectrometer_probe"),
	("probe_model",               "probe",              "_NMR_spectrometer_probe.Model",                "NMR_spectrometer_probe"),
	("manufacturer",              "probe",              "_NMR_spectrometer_probe.Manufacturer",         "NMR_spectrometer_probe"),
	("serial_number",             "probe",              "_NMR_spectrometer_probe.Serial_number",        "NMR_spectrometer_probe"),
	("diameter",                  "probe",              "_NMR_spectrometer_probe.Diameter",             "NMR_spectrometer_probe"),
	("rotor_length",              "probe",              "_NMR_spectrometer_probe.Rotor_length",         "NMR_spectrometer_probe"),
	("rotor_composition",         "probe",              "_NMR_spectrometer_probe.Rotor_composition",    "NMR_spectrometer_probe"),
	("internal_vol",              "probe",              "_NMR_spectrometer_probe.Internal_volume",      "NMR_spectrometer_probe"),
	("spacer_present",            "probes",             "_NMR_spectrometer_probe.Spacer_present",       "NMR_spectrometer_probe"),
	("batch_id",                  "purification_batch", "_Sample.Sf_framecode",                         "sample"),
	("create_date",               "pst",                "_Sample.Preparation_date",                     "sample"),
	("solvent_system",            "pst",                "_Sample.Solvent_system",                       "sample"),
	("target_id",                 "target_id",          "_Sample_component.Entity_label",               "sample"),
	("conc",                      "batch_components",   "_Sample_component.Concentration_val",          "sample"),
	("conc_unit",                 "batch_components",   "_Sample_component.Concentration_val_units",    "sample"),
	("isotopic_labeling_remark",  "purification_batch", "_Sample_component.Isotopic_labeling",          "sample"),
	("isotopic_labeling",         "buffer_components",  "_Sample_component.Isotopic_labeling",          "sample"),
	("sample_ph",                 "pst",                "_Sample_condition_variable.Val",               "sample"),
	("buffer_component",          "buffer_components",  "_Sample_component.Mol_common_name",            "sample"),
	("target_id",                 "target",             "_Sample_component.Mol_common_name",            "sample"),
	("buffer_component_value",    "buffer_components",  "_Sample_component.Concentration_val",          "sample"),
	("buffer_component_unit",     "buffer_components",  "_Sample_component.Concentration_val_units",    "sample");