diff --git a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.Rmd b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.Rmd index 760e7bd..928796f 100644 --- a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.Rmd +++ b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.Rmd @@ -197,7 +197,7 @@ These include the results of various LLM on each trial. NOTES: - * All of these datasets can be found in `/academics/MATP-4910-F24/DAR-CTEval-F24/Data` + * All of these datasets can be found in `~/DAR-CTEval-F24/Data` ## Load the CTBench Eval _Data_ @@ -246,12 +246,12 @@ First we load the data and see that `CT_Pub.df` contains 103 trials with 10 dime ```{r} # Load the CT_Pub data -CT_Pub.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/CT_Pub_data.Rds") +CT_Pub.df<- readRDS("~/DAR-CTEval-F24/Data/CT_Pub_data.Rds") dim(CT_Pub.df) # Load the CT_Repo data -CT_Repo.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/CT_Repo_data.Rds") +CT_Repo.df<- readRDS("~/DAR-CTEval-F24/Data/CT_Repo_data.Rds") dim(CT_Repo.df) @@ -312,7 +312,7 @@ If the table has an entry such as ```{r} # Load the trials.responses -CT_Pub.responses.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/trials.responses.Rds") +CT_Pub.responses.df<- readRDS("~/DAR-CTEval-F24/Data/trials.responses.Rds") # convert model and type to factors CT_Pub.responses.df$trial_group <- as.factor(CT_Pub.responses.df$trial_group) @@ -322,7 +322,7 @@ CT_Pub.responses.df$model <- as.factor(CT_Pub.responses.df$model) dim(CT_Pub.responses.df) # Load the trials.matches -CT_Pub.matches.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/trials.matches.Rds") +CT_Pub.matches.df<- readRDS("~/DAR-CTEval-F24/Data/trials.matches.Rds") # convert model and type to factors CT_Pub.matches.df$model <- as.factor(CT_Pub.matches.df$model) diff --git a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.html b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.html index c2b7a22..60227e2 100644 --- a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.html +++ b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.html @@ -11,7 +11,7 @@ - + CTBench Eval Project Notebook: @@ -1624,7 +1624,7 @@

CTBench Eval Project Notebook:

DAR Assignment 3 (Fall 2024)

Your Name Here

-

10 September 2024

+

12 September 2024

@@ -1961,7 +1961,7 @@

4 DAR ASSIGNMENT 3 (Part

NOTES:

4.1 Load the CTBench Eval @@ -2011,12 +2011,12 @@

4.1 Load the CTBench Eval are reserved to be used as examples to be included in the prompts, a process called three shot learning.

# Load the CT_Pub data
-CT_Pub.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/CT_Pub_data.Rds")
+CT_Pub.df<- readRDS("~/DAR-CTEval-F24/Data/CT_Pub_data.Rds")
 
 dim(CT_Pub.df)
## [1] 103  10
# Load the CT_Repo data
-CT_Repo.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/CT_Repo_data.Rds")
+CT_Repo.df<- readRDS("~/DAR-CTEval-F24/Data/CT_Repo_data.Rds")
 
 dim(CT_Repo.df)
## [1] 1693    9
@@ -2074,7 +2074,7 @@

4.2 Load the CTBench Eval candidate descriptor D had no match in the reference list.

# Load the trials.responses
-CT_Pub.responses.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/trials.responses.Rds")
+CT_Pub.responses.df<- readRDS("~/DAR-CTEval-F24/Data/trials.responses.Rds")
 
 # convert model and type to factors
 CT_Pub.responses.df$trial_group <- as.factor(CT_Pub.responses.df$trial_group)
@@ -2084,7 +2084,7 @@ 

4.2 Load the CTBench Eval dim(CT_Pub.responses.df)

## [1] 400  11
# Load the trials.matches
-CT_Pub.matches.df<- readRDS("/academics/MATP-4910-F24/DAR-CTEval-F24/Data/trials.matches.Rds")
+CT_Pub.matches.df<- readRDS("~/DAR-CTEval-F24/Data/trials.matches.Rds")
 
 # convert model and type to factors
 CT_Pub.matches.df$model <- as.factor(CT_Pub.matches.df$model)
@@ -2179,14 +2179,16 @@ 

5.1 How do results differ Differences by Model on CT-Pub - - - + + + + +model meanPrecision sePrecision meanRecall @@ -2197,12 +2199,40 @@

5.1 How do results differ -0.4344882 -0.0084623 -0.5272768 -0.0104531 -0.4517983 -0.0072764 +gpt4-omni-ts +0.4194773 +0.0165780 +0.5465613 +0.0206740 +0.4519953 +0.0145372 + + +gpt4-omni-zs +0.4117923 +0.0191232 +0.4988831 +0.0196749 +0.4250843 +0.0150289 + + +llama3-70b-in-ts +0.4372443 +0.0146389 +0.5267929 +0.0209162 +0.4550647 +0.0137912 + + +llama3-70b-in-zs +0.4694388 +0.0167251 +0.5368701 +0.0222861 +0.4750490 +0.0146080 @@ -2227,21 +2257,26 @@

5.2 How do results differ meanRecall=mean(recall), seRecall=std.error(recall), meanF1=mean(f1), - sef1=std.error(f1)) - -kable(CT_Pub_MT_results.df, caption="Differences by Model and Subgroup on CT-pub")

+ sef1=std.error(f1)) +
## `summarise()` has grouped output by 'model'. You can override using the
+## `.groups` argument.
+
kable(CT_Pub_MT_results.df, caption="Differences by Model and Subgroup on CT-pub")
------++++++++ + + @@ -2252,12 +2287,204 @@

5.2 How do results differ

- - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Differences by Model and Subgroup on CT-pub
modeltrial_group meanPrecision sePrecision meanRecall
0.43448820.00846230.52727680.01045310.45179830.0072764gpt4-omni-tscancer0.33763330.03994970.54308990.03928550.39708810.0344699
gpt4-omni-tschronic kidney disease0.44300210.03035950.56253530.05182430.47892170.0327797
gpt4-omni-tsdiabetes0.43150310.02611830.59845200.03637880.48151790.0242987
gpt4-omni-tshypertension0.49368920.04216460.50763530.05706180.47088210.0340076
gpt4-omni-tsobesity0.38826680.04951000.46593320.04874580.40342060.0390612
gpt4-omni-zscancer0.35938960.06486990.51772480.03782280.38848220.0416444
gpt4-omni-zschronic kidney disease0.44982550.03591250.49617750.04221650.45505350.0337143
gpt4-omni-zsdiabetes0.43988740.02751780.56701200.03502870.47474530.0243584
gpt4-omni-zshypertension0.41104570.05257730.44042360.05248250.39163990.0329269
gpt4-omni-zsobesity0.36785170.04889320.40162090.04727450.35985840.0359429
llama3-70b-in-tscancer0.40937690.03214810.56665990.04833300.45196190.0284682
llama3-70b-in-tschronic kidney disease0.45383990.03677680.51582420.05688430.45916010.0350896
llama3-70b-in-tsdiabetes0.45710220.02480110.57087320.03432730.48623240.0235926
llama3-70b-in-tshypertension0.49835490.03700430.48183630.05994630.46579950.0376744
llama3-70b-in-tsobesity0.36037990.03288180.45402760.04379460.38650560.0317837
llama3-70b-in-zscancer0.41385440.04147520.63229740.04928220.48364210.0366250
llama3-70b-in-zschronic kidney disease0.52659880.04327490.57016150.06373680.50700080.0362418
llama3-70b-in-zsdiabetes0.49250060.02550360.53531390.03507230.49802890.0246477
llama3-70b-in-zshypertension0.50758600.04882540.51096070.06308180.47579890.0373848
llama3-70b-in-zsobesity0.38845610.03406090.44184560.04605360.39146920.0307577
diff --git a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.pdf b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.pdf index 69d83e7..4afbb1e 100644 Binary files a/StudentNotebooks/Assignment03/dar-f24-assignment3-template.pdf and b/StudentNotebooks/Assignment03/dar-f24-assignment3-template.pdf differ