From 21e5a99092116f582eaa5b83209dac617f62dc25 Mon Sep 17 00:00:00 2001
From: walczd3 <walczd3@idea-node-01.idea.rpi.edu>
Date: Tue, 5 Nov 2024 21:46:40 -0500
Subject: [PATCH 1/4] AqueousNotebook

---
 StudentNotebooks/Assignment05/Aqueous.Rmd | 203 ++++++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 StudentNotebooks/Assignment05/Aqueous.Rmd

diff --git a/StudentNotebooks/Assignment05/Aqueous.Rmd b/StudentNotebooks/Assignment05/Aqueous.Rmd
new file mode 100644
index 0000000..3b582c6
--- /dev/null
+++ b/StudentNotebooks/Assignment05/Aqueous.Rmd
@@ -0,0 +1,203 @@
+---
+title: "DAR F24 Project Status Notebook Template"
+author: "Student Name"
+date: "`r Sys.Date()`"
+output:
+  html_document:
+    toc: yes
+  pdf_document:
+    toc: yes
+subtitle: "DAR Project Name Here: 'Mars' or 'DeFi' or 'CTBench'"
+---
+
+## Instructions (DELETE BEFORE SUBMISSION)
+
+* Use this notebook is a template for your biweekly project status assignment. 
+* Use the sections starting with **BiWeekly Work Summary** as your outline for your submitted notebook.
+* Summarize ALL of your work in this notebook; **if you don't show and/or link to your work here, it doesn't exist for us!**
+
+1. Create a new copy of this notebook in the `AssignmentX` sub-directory of your team's github repository using the following naming convention
+
+   * `rcsid_assignmentX.Rmd` and `rcsid_assignmentX.pdf`
+   * For example, `bennek_assignment03.Rmd`
+
+2. Document **all** the work you did on your assigned project this week **using the outline below.** 
+
+3. You MUST include figures and/or tables to illustrate your work. *Screen shots are okay*, but include something!
+
+4. You MUST include links to other important resources (knitted HTMl files, Shiny apps). See the guide below for help.
+
+5. Commit the source (`.Rmd`) and knitted (`.html`) versions of your notebook and push to github
+
+6. **Submit a pull request.** Please notify Dr. Erickson if you don't see your notebook merged within one day. 
+
+7. **DO NOT MERGE YOUR PULL REQUESTS YOURSELF!!**
+
+See the Grading Rubric for guidance on how the contents of this notebook will be graded on LMS or GradeScope. 
+
+## Weekly Work Summary	
+
+**NOTE:** Follow an outline format; use bullets to express individual points. 
+
+* RCS ID: **Always** include this!
+* Project Name: **Always** include this!
+* Summary of work since last week 
+
+    * Describe the important aspects of what you worked on and accomplished
+
+* NEW: Summary of github issues added and worked 
+
+    * Issues that you've submitted
+    * Issues that you've self-assigned and addressed
+    
+* Summary of github commits 
+
+    * Include branch name(s)
+    * Include filenames for any added or changed files on github
+    * Include links to shared Shiny apps
+    
+* List of presentations,  papers, or other outputs
+
+    * Include browsable links (ie Google Slides, et.al.)
+    
+* List of references (if necessary) 
+* Indicate any use of group shared code base
+* Indicate which parts of your described work were done by you or as part of joint efforts
+
+* **Required:** Provide illustrating figures and/or tables
+
+## Personal Contribution	
+
+* Clearly defined, unique contribution(s) done by you: code, ideas, writing...
+* Include github issues you've addressed if any
+
+## Analysis: Question 1 (Provide short name)
+
+### Question being asked 
+
+_Provide in natural language a statement of what question you're trying to answer_
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_If you're re-using dataframes prepared in another section, simply re-state what data you're using_
+
+```{r, result01_data}
+# Include all data processing code (if necessary), clearly commented
+
+```
+
+### Analysis: Methods and results
+
+_Describe in natural language a statement of the analysis you're trying to do_
+
+_Provide clearly commented analysis code; include code for tables and figures!_
+
+```{r, result01_analysis}
+# Include all analysis code, clearly commented
+# If not possible, screen shots are acceptable. 
+# If your contributions included things that are not done in an R-notebook, 
+#   (e.g. researching, writing, and coding in Python), you still need to do 
+#   this status notebook in R.  Describe what you did here and put any products 
+#   that you created in github. If you are writing online documents (e.g. overleaf 
+#   or google docs), you can include links to the documents in this notebook 
+#   instead of actual text.
+
+```
+
+### Discussion of results
+
+_Provide in natural language a clear discussion of your observations._
+
+
+## Analysis: Question 2 (Provide short name)
+
+### Question being asked 
+
+_Provide in natural language a statement of what question you're trying to answer_
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_If you're re-using dataframes prepared in another section, simply re-state what data you're using_
+
+```{r, result02_data}
+# Include all data processing code (if necessary), clearly commented
+
+```
+
+### Analysis: Methods and Results  
+
+_Describe in natural language a statement of the analysis you're trying to do_
+
+_Provide clearly commented analysis code; include code for tables and figures!_
+
+```{r, result02_analysis}
+# Include all analysis code, clearly commented
+# If not possible, screen shots are acceptable. 
+# If your contributions included things that are not done in an R-notebook, 
+#   (e.g. researching, writing, and coding in Python), you still need to do 
+#   this status notebook in R.  Describe what you did here and put any products 
+#   that you created in github (documents, jupytor notebooks, etc). If you are writing online documents (e.g. overleaf 
+#   or google docs), you can include links to the documents in this notebook 
+#   instead of actual text.
+
+```
+
+### Discussion of results
+
+_Provide in natural language a clear discussion of your observations._
+
+
+## Analysis: Question 3 (Provide short name)
+
+### Question being asked 
+
+_Provide in natural language a statement of what question you're trying to answer_
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_If you're re-using dataframes prepared in another section, re-state what data you're using_
+
+```{r, result03_data}
+# Include all data processing code (if necessary), clearly commented
+
+```
+
+### Analysis methods used  
+
+_Describe in natural language a statement of the analysis you're trying to do_
+
+_Provide clearly commented analysis code; include code for tables and figures!_
+
+```{r, result03_analysis}
+# Include all analysis code, clearly commented
+# If not possible, screen shots are acceptable. 
+# If your contributions included things that are not done in an R-notebook, 
+#   (e.g. researching, writing, and coding in Python), you still need to do 
+#   this status notebook in R.  Describe what you did here and put any products 
+#   that you created in github. If you are writing online documents (e.g. overleaf 
+#   or google docs), you can include links to the documents in this notebook 
+#   instead of actual text.
+
+```
+
+
+### Discussion of results
+
+_Provide in natural language a clear discussion of your observations._
+
+## Summary and next steps
+
+_Provide in natural language a clear summary and your proposed next steps._
+

From 9e20abc9e075a6bb1cf531a998f7a60fe5c0474b Mon Sep 17 00:00:00 2001
From: walczd3 <walczd3@idea-node-01.idea.rpi.edu>
Date: Tue, 5 Nov 2024 21:47:42 -0500
Subject: [PATCH 2/4] aqueous_metadata

---
 StudentData/aqueous.Rds | Bin 0 -> 2705 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 StudentData/aqueous.Rds

diff --git a/StudentData/aqueous.Rds b/StudentData/aqueous.Rds
new file mode 100644
index 0000000000000000000000000000000000000000..ca671e58342e64bba57abd399909785b2f6631fd
GIT binary patch
literal 2705
zcmds3U2l_681A;(ugz}mD=~UOq8FgB59pTN8N`q-3#A4x%x;!rVTI(tsfSiqvkU*I
z|G{6NzQ8%qvQ4*SF-8(NJ<t2~ydU)NzE!DI7Aw{2LS=CYelBgmqYC#L+|S@w6V~NE
zM#F#%Fp50})tw1GM`ONi8Ayrekd5xtrJ&#nS41sqSW<qJD>Jt-3$vXIz{d21`4}sG
zj}ClHk~rL=-9yT#fSI3lJtTDGlSwQ%0S*QQ^Jd>C@yIWLMt@4lg%|PE>NXBO1^KpV
zAo_tsh5oAPDb`RJl;GL?nE|J``6h@7bv+8j;cU39VS3n&M~E`wwYL*Q$6t8W^${8e
zdDWLq?_D5}i@?p(uXPbF6k5lWM41BfP|&&}YTI;(qF^9|Xgu~ffo;~2m)A<S?6L1r
z!RNh(LdH3l)xJN)n8d8$d5T8TvV!E36GU(j0U(HpqoheXR$$tu5@X#kd`iN?q;ZTx
zFR@_q_0xb3NVNMJxvq!3X(8E2yOTN-Lg*qznDeMRBLHCg$6;@jrt-e`%QJJ#GP(x3
zzZr(NwKS5qLH_lzwPwdQALcpqKN;6{`rizX<a+h}ZfmApJj?cl<aT~<(&u>DKi_jX
zUpao$%Js@C*Pp>>&#o=8hhzgwIu@Nk*0JOSs(r-XvWKEIlub`IkmT5%zYT7RCvp!-
z2DUqw)=*4VNo6E{Cu=%^-FXppRhua<^>gx1lZ>ngGLlatNrc>VEE_b<`jDxwWLZmU
z;^%78hOONhR76YNwKPdpG(m)j0t6PUVYY|5!ymfTQ4R!qe^1%pXYRMt*2pK<_ofQ$
zcs14ybRa^Kk|vtaOxf=8lcAj)fwp#OdMd0un`#dkm_>m@QcV@KT)Z{|Wp%7nnm^%!
z8Jb_S@1N(;C@5$)D@i72hisURY;{kxyRs?q&|9pyhdOow4e|xCl+T-vq3Ei8kbvA`
zCFv5>%Ajh_u$HC8QmCEMv{js4#_Dkr37QN(VSo&zrOqLw3SV%Gkjerxe-NP%4(05a
zt1__8PD8eB4@el2=xiRXqmL60&SP$KPG~s6sO&%aFy#Ju(3XqFXy=?l=#qMqlFvJw
R(nFxN_!S;d{co`R_yrfECY=BP

literal 0
HcmV?d00001


From e017606a8b4966e03428055712a9c5cb191ee25c Mon Sep 17 00:00:00 2001
From: walczd3 <walczd3@idea-node-01.idea.rpi.edu>
Date: Tue, 5 Nov 2024 21:55:48 -0500
Subject: [PATCH 3/4] Saved aqueous1

---
 StudentNotebooks/Assignment05/Aqueous.Rmd | 160 +++++++++++++++++-----
 1 file changed, 125 insertions(+), 35 deletions(-)

diff --git a/StudentNotebooks/Assignment05/Aqueous.Rmd b/StudentNotebooks/Assignment05/Aqueous.Rmd
index 3b582c6..285107c 100644
--- a/StudentNotebooks/Assignment05/Aqueous.Rmd
+++ b/StudentNotebooks/Assignment05/Aqueous.Rmd
@@ -1,5 +1,6 @@
+
 ---
-title: "DAR F24 Project Status Notebook Template"
+title: "Aqueous SHERLOC minerals"
 author: "Student Name"
 date: "`r Sys.Date()`"
 output:
@@ -7,34 +8,9 @@ output:
     toc: yes
   pdf_document:
     toc: yes
-subtitle: "DAR Project Name Here: 'Mars' or 'DeFi' or 'CTBench'"
+subtitle: "MARs DAR-F24
 ---
 
-## Instructions (DELETE BEFORE SUBMISSION)
-
-* Use this notebook is a template for your biweekly project status assignment. 
-* Use the sections starting with **BiWeekly Work Summary** as your outline for your submitted notebook.
-* Summarize ALL of your work in this notebook; **if you don't show and/or link to your work here, it doesn't exist for us!**
-
-1. Create a new copy of this notebook in the `AssignmentX` sub-directory of your team's github repository using the following naming convention
-
-   * `rcsid_assignmentX.Rmd` and `rcsid_assignmentX.pdf`
-   * For example, `bennek_assignment03.Rmd`
-
-2. Document **all** the work you did on your assigned project this week **using the outline below.** 
-
-3. You MUST include figures and/or tables to illustrate your work. *Screen shots are okay*, but include something!
-
-4. You MUST include links to other important resources (knitted HTMl files, Shiny apps). See the guide below for help.
-
-5. Commit the source (`.Rmd`) and knitted (`.html`) versions of your notebook and push to github
-
-6. **Submit a pull request.** Please notify Dr. Erickson if you don't see your notebook merged within one day. 
-
-7. **DO NOT MERGE YOUR PULL REQUESTS YOURSELF!!**
-
-See the Grading Rubric for guidance on how the contents of this notebook will be graded on LMS or GradeScope. 
-
 ## Weekly Work Summary	
 
 **NOTE:** Follow an outline format; use bullets to express individual points. 
@@ -71,25 +47,138 @@ See the Grading Rubric for guidance on how the contents of this notebook will be
 * Clearly defined, unique contribution(s) done by you: code, ideas, writing...
 * Include github issues you've addressed if any
 
-## Analysis: Question 1 (Provide short name)
+## PACKAGES
+```{r}
+# Set the default CRAN repository
+local({r <- getOption("repos")
+       r["CRAN"] <- "http://cran.r-project.org" 
+       options(repos=r)
+})
+
+if (!require("pandoc")) {
+  install.packages("pandoc")
+  library(pandoc)
+}
+
+if (!require("ggplotify")) {
+  install.packages("ggplotify")
+  library(ggplotify)
+}
+
+if (!require("car")) {
+  install.packages("car")
+  library(car)
+}
+if (!require("ggbiplot")) {
+  install.packages("ggbiplot")
+  library(ggbiplot)
+}
+
+# Required packages for M20 LIBS analysis
+if (!require("rmarkdown")) {
+  install.packages("rmarkdown")
+  library(rmarkdown)
+}
+if (!require("tidyverse")) {
+  install.packages("tidyverse")
+  library(tidyverse)
+}
+if (!require("stringr")) {
+  install.packages("stringr")
+  library(stringr)
+}
+
+if (!require("ggbiplot")) {
+  install.packages("ggbiplot")
+  library(ggbiplot)
+}
+
+if (!require("pheatmap")) {
+  install.packages("pheatmap")
+  library(pheatmap)
+}
+if (!require("ggtern")) {
+  install.packages("ggtern")
+  library(ggtern)
+}
+
+if (!require("gridExtra")) {
+  install.packages("gridExtra")
+  library(gridExtra)
+}
+if (!require("randomForest")) {
+  install.packages("randomForest")
+  library(randomForest)
+}
+```
 
-### Question being asked 
 
-_Provide in natural language a statement of what question you're trying to answer_
+```{r}
+metadata_libs <- readRDS("~/DAR-Mars-F24/StudentData/v1_libs_to_sample.Rds") #metadata of libs. Unconnected with 
+libs <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_libs.Rds")
+lithology <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_lithology.Rds")
+sherloc <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_sherloc.Rds")
+pixl <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_pixl.Rds")
+metadata_pixl <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_sample_meta.Rds")
 
-### Data Preparation
+oxide_elem <- 1 / c(2.1392, 1.6681, 1.8895, (1.2865 + 1.4297)/2, 1.6582, 1.3992, 1.3480, 1.2046, 2.2916, 2.4972, 1, 1.4615, 1.2912) #/2 is used to find average of FeO and Fe2O3 
+elem_pixl <- pixl[,2:ncol(pixl)]
+for (i in seq(length(oxide_elem))) {
+  elem_pixl[,i] = elem_pixl[,i] * oxide_elem[i]
+}
 
-_Provide in natural language a description of the data you are using for this analysis_
+names(elem_pixl) <- c("Si", "Ti", "Al", "Fe", "Mg", "Ca", "Na", "K", "P", "S", "Cl", "Cr", "Mn")
+elem_pixl["Total_Cation"] <- rowSums(elem_pixl)
+elem_pixl["O"] <- 100 - elem_pixl$Total_Cation
 
-_Include a step-by-step description of how you prepare your data for analysis_
+```
 
-_If you're re-using dataframes prepared in another section, simply re-state what data you're using_
+## Analysis 1
+
+### Creating Aqueous Dataframes based off of ChatBS
+
+_Aqueous indicates whether a mineral has been chemically altered by an aqueous solvent or whether it precipiated out of ancient seas_
+
+_For the aqueous dataframe, Dr. Rogers implied a few notable 'features' to add. (1) creation of group of feature (e.g: Ca, Mg, and Fe carbonates will all be placed with a  Carbonates tag). (2) Is it aqueous or not (0 or 1). Quartz has altering forms of creation, but that will be identified as 1. Potentially we could add another feature that describes whether it partly is or not (but I think we will skip this for now). (3) Chemical Formula. Potentially apply NLP to process similarities._
 
 ```{r, result01_data}
-# Include all data processing code (if necessary), clearly commented
+
+#Dona created the same naming sequence for each sherloc and lithology dataframe so we can just use the names from one to identify aqueous and non-aqueous minerals 
+sample <- sherloc[,1]
+sherloc <- sherloc[,-c(names(sherloc) == "Sample")]
+
+#1 - feed into grepl
+common_names <- c("Sulfate|sulfate" = "Sulfates", "Carbonate|carbonate" = "Carbonates", "Organic" = "Organic Matter", "Perchlorate|perchlorate" = "Perchlorate")
+
+common_groups <- rep(NA, ncol(sherloc))
+for (i in 1:length(common_names)) {
+  check <- grepl(names(common_names)[i], names(sherloc))
+  common_groups[check] = common_names[[i]]
+}
+common_groups[which(is.na(common_groups) == T)] = names(sherloc)[which(is.na(common_groups) == T)] 
+
+#2
+#quartz is non-aq with correspondence to the note above
+aqueous_total <- c(0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0)
+aq <- sherloc[,aqueous_total == 1]
+nonaq <- sherloc[,aqueous_total == 0]
+
+#3 - formulas in order of common_groups
+#formulas have | in them to separate between distinguishable types. e.g: ferrous and ferric oxide 
+formulas <- c("(Na,Cl)Al(Si,Al)Si2O8", "(Mg,Fe,Ca)SO4", "(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4",
+"(Mg,Fe,Ca)SO4","(K,Na)ClO4|Mg(ClO4)2","(K,Na)ClO4|Mg(ClO4)2", "SiO4*nH2O", "PO4", "(Mg,Mn,Ca,Zn)(Fe,Al,Cr)Si2O6",
+"(Mg,Fe)2SiO4","(Mg,Fe,Ca)CO3","(Mg,Fe,Ca)CO3","(Mg,Fe,Ca)CO3","(M2/M3)2-3 (Al, Fe, Mg)2-3 (Si, Al)4-5 O10-11", "(Na,C,K)Al(Si,Al)Si2O8","SiO2", "Ca5(PO4)3(F,Cl,OH)", "FeTiO3|Fe2TiO4", "NaCl", "Fe2O3|Fe3O4","Fe2O3|Fe3O4*nH2O",
+NA,NA,NA,"(K,Ca,Mg,Fe,Al)4_6(Si,Al)8O20(OH,F)4","(Mg,Fe)6AlSi3O10(OH)8","Al2Si2O5(OH)4","(Fe,Mg)Cr2O4","FeTiO3","ZrSiO4",NA, "(Mg,Zn)Al2O4|Ca3Al2Si3O12")
+
+aqueous.df <- data.frame(mineral = names(sherloc), common_mineral = common_groups, aqueous = aqueous_total, formula = formulas)
+
+
+write_rds(aqueous.df, "~/DAR-Mars-F24/StudentData/aqueous.Rds")
+
 
 ```
 
+
 ### Analysis: Methods and results
 
 _Describe in natural language a statement of the analysis you're trying to do_
@@ -201,3 +290,4 @@ _Provide in natural language a clear discussion of your observations._
 
 _Provide in natural language a clear summary and your proposed next steps._
 
+

From 085d6e053b890522cac0036e8ffe1a44f5257125 Mon Sep 17 00:00:00 2001
From: walczd3 <walczd3@idea-node-01.idea.rpi.edu>
Date: Tue, 5 Nov 2024 22:23:12 -0500
Subject: [PATCH 4/4] asmt05

---
 .../Assignment05/walczd3-assignment05.Rmd     | 487 ++++++++++++++++++
 1 file changed, 487 insertions(+)
 create mode 100644 StudentNotebooks/Assignment05/walczd3-assignment05.Rmd

diff --git a/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd b/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd
new file mode 100644
index 0000000..b60ffd9
--- /dev/null
+++ b/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd
@@ -0,0 +1,487 @@
+---
+title: "DAR F24 Biweekly 1"
+author: "David Walczyk"
+date: "`r Sys.Date()`"
+output:
+  pdf_document:
+    toc: yes
+  html_document:
+    toc: yes
+subtitle: "DAR Project Name: Mars"
+---
+
+## Packages Load In
+
+```{r}
+# Set the default CRAN repository
+local({r <- getOption("repos")
+       r["CRAN"] <- "http://cran.r-project.org" 
+       options(repos=r)
+})
+
+if (!require("pandoc")) {
+  install.packages("pandoc")
+  library(pandoc)
+}
+
+if (!require("ggplotify")) {
+  install.packages("ggplotify")
+  library(ggplotify)
+}
+
+if (!require("car")) {
+  install.packages("car")
+  library(car)
+}
+if (!require("ggbiplot")) {
+  install.packages("ggbiplot")
+  library(ggbiplot)
+}
+
+# Required packages for M20 LIBS analysis
+if (!require("rmarkdown")) {
+  install.packages("rmarkdown")
+  library(rmarkdown)
+}
+if (!require("tidyverse")) {
+  install.packages("tidyverse")
+  library(tidyverse)
+}
+if (!require("stringr")) {
+  install.packages("stringr")
+  library(stringr)
+}
+
+if (!require("ggbiplot")) {
+  install.packages("ggbiplot")
+  library(ggbiplot)
+}
+
+if (!require("pheatmap")) {
+  install.packages("pheatmap")
+  library(pheatmap)
+}
+if (!require("ggtern")) {
+  install.packages("ggtern")
+  library(ggtern)
+}
+
+if (!require("gridExtra")) {
+  install.packages("gridExtra")
+  library(gridExtra)
+}
+if (!require("randomForest")) {
+  install.packages("randomForest")
+  library(randomForest)
+}
+if (!require("caret")) {
+  install.packages("caret")
+  library(caret)
+}
+if (!require("ggimage")) {
+  install.packages("ggimage")
+  library(ggimage)
+}
+```
+
+## Data Load In
+```{r}
+
+#-------------LIBS-------------------
+# Load the saved LIBS data with locations added
+libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
+libs.std_dev <- libs.df %>%
+    select((c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev,
+             MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total)))
+libs.df <- libs.df %>%  
+  select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev,
+             MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total)))
+
+# Convert the points to numeric
+libs.df$point <- as.numeric(libs.df$point)
+
+# Review what we have
+summary(libs.df)
+
+#----------PIXL----------------------
+# Load the saved PIXL data with locations added
+pixl.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/samples_pixl_wide.Rds")
+
+pixl.df
+# Convert to factors
+pixl.df[sapply(pixl.df, is.character)] <- lapply(pixl.df[sapply(pixl.df, is.character)], 
+                                       as.factor)
+
+# Review our dataframe
+summary(pixl.df)
+
+#----------SHERLOC----------------------
+# Read in data as provided.  
+sherloc_abrasion_raw <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/abrasions_sherloc_samples.Rds")
+
+# Clean up data types
+sherloc_abrasion_raw$Mineral<-as.factor(sherloc_abrasion_raw$Mineral)
+sherloc_abrasion_raw[sapply(sherloc_abrasion_raw, is.character)] <- lapply(sherloc_abrasion_raw[sapply(sherloc_abrasion_raw, is.character)], 
+                                       as.numeric)
+# Transform NA's to 0
+sherloc_abrasion_raw <- sherloc_abrasion_raw %>% replace(is.na(.), 0)
+
+# Reformat data so that rows are "abrasions" and columns list the presence of minerals. 
+# Do this by "pivoting" to a long format, and then back to the desired wide format.  
+
+sherloc_long <- sherloc_abrasion_raw %>%
+  pivot_longer(!Mineral, names_to = "Name", values_to = "Presence")
+
+# Make abrasion a factor 
+sherloc_long$Name <- as.factor(sherloc_long$Name)
+
+# Make it a matrix
+sherloc.matrix <- sherloc_long %>%
+  pivot_wider(names_from = Mineral, values_from = Presence)
+
+# Get sample information from PIXL and add to measurements -- assumes order is the same
+
+sherloc.df <- cbind(pixl.df[,c("sample","type","campaign","abrasion")],sherloc.matrix)
+
+# Review what we have
+summary(sherloc.df)
+
+
+# Load the saved lithology data with locations added
+lithology.df<- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/mineral_data_static.Rds")
+
+# Cast samples as numbers
+lithology.df$sample <- as.numeric(lithology.df$sample)
+
+# Convert rest into factors
+lithology.df[sapply(lithology.df, is.character)] <-
+  lapply(lithology.df[sapply(lithology.df, is.character)], 
+                                       as.factor)
+
+# Keep only first 16 samples because the data for the rest of the samples is not available yet
+lithology.df<-lithology.df[1:16,]
+
+# Create a matrix containing only the numeric measurements.  The remaining features are metadata about the sample. 
+lithology.matrix <- sapply(lithology.df[,6:40],as.numeric)-1            
+
+# Review the structure of our matrix
+str(lithology.matrix)
+
+supercam_libs_moc_loc <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
+
+oxide_elem <- 1 / c(1.3475, 1.6582, 1.8895, 2.1392, 2.2916, 2.4972, 1, 1.2046, 1.3992, 1.6681,1.4615, 1.2912, (1.2865+1.4927)/2 ) #/2 is used to find average of FeO and Fe2O3
+elem_pixl <- pixl.df[,2:14]
+for (i in seq(length(oxide_elem))) {
+  elem_pixl[,i] = elem_pixl[,i] * oxide_elem[i]
+}
+names(elem_pixl) <- c("Na", "Mg", "Al", "Si","P", "S", "Cl", "K", "Ca", "Ti", "Cr", "Mn", "Fe")
+elem_pixl["Total_Cation"] <- rowSums(elem_pixl)
+elem_pixl["O"] <- 100 - elem_pixl$Total_Cation
+
+
+```
+
+
+## BiWeekly Work Summary	
+
+**NOTE:** Follow an outline format; use bullets to express individual points. 
+
+* RCS ID: **walczd3** 
+* Project Name: **Mars** 
+* Summary of work since last week 
+
+_Last week I looked at whether calculated molar fractions could help predict whether Feldspars or Plagioclase (igneous rocks) molar fractions are always correlated to their lead element (lead element in chemcial formulas between either K-Spar or Albite (Na) & Anorthite (Ca)). I found it to be true in the case of K-spar. I also wondered whether the feldspar/plagioclase ternary diagram was indicative of prediction as well. All possible feldspar/plagioclase values were within the range of possibility which was nice to see but it wasn't perfect. Now, after taking a deeper dive into the literature I'd like to analyze more aqueous elements especially that of amorphous silicates, carbonates, phylosillicates and hydrated sulfates (especially that of serpentine) in addition to connecting PIXL and LIBS as shown by Charlotte and Margo. I hope to use this information to draw mass readings on whether we can interpret LIBS mineral possibilities._
+
+* Summary of github commits 
+
+_No, github commits as of now. I've worked on creating wire frames for the 2D app but no coded changes._ 
+    
+* List of presentations,  papers, or other outputs
+
+* Anderson et al. 2022 https://www.sciencedirect.com/science/article/abs/pii/S0584854721003049?via%3Dihub
+* Cousin et al. 2022 https://www.sciencedirect.com/science/article/pii/S0584854721002986 
+* Pileri et al. 2021 https://www.hou.usra.edu/meetings/lpsc2021/pdf/1606.pdf
+* Clegg et al. https://ssed.gsfc.nasa.gov/IPM/2014/PDF/1086.pdf
+* https://ars.els-cdn.com/content/image/1-s2.0-S0584854721002986-mmc1.pdf
+
+* **Required:** Provide illustrating figures and/or tables
+
+## Personal Contribution	
+
+* Clearly defined, unique contribution(s) done by you: code, ideas, writing...
+* Include github issues you've addressed if any
+
+## Analysis: Question 1 (Provide short name)
+
+### What are the most important features for predicitng minerals in the lithology dataset? 
+
+_Using lithology so that our prediction is binary, I would like to see the elemental composition differences between AQ and non-AQ minerals using randomForest.
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_If you're re-using dataframes prepared in another section, simply re-state what data you're using_
+```{r}
+names(lithology.df)
+#Manganese, phylosillicates, silicates, carbonates, amorphous silicates, hydrated sulfates
+
+aqueous.lith <- c(0,0,0,0,.5,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,
+             1,0,0,
+             0,0)
+
+aqueous.sher <- c(0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,.5,1,0,1,1,1,0,0,1,1,1,1,0,0,
+                  0,1,
+                  0)
+
+libs_typed <- readRDS("~/DAR-Mars-F24/StudentData/libs_typed.Rds")
+libs_targets <- libs_typed[which(libs_typed$`earthsample?` == 1), ]
+pixl_sol <- readRDS("~/DAR-Mars-F24/StudentData/pixl_sol_coordinates.Rds")
+#base random forest on
+library(randomForest)
+
+#use lithology for y as sherloc is a subset of mineral presence
+y.aq <-data.frame(lithology.matrix[,which(aqueous.lith > 0)])
+y.nonaq <- data.frame(lithology.matrix[,which(aqueous.lith == 0)])
+
+#remove hydrated carbonates
+y.aq <- y.aq %>%
+  select(-Hydrated_Carbonates)
+
+shared_feats <- names(pixl.df[,2:14])[c(4,10,3,13,2,9,1,8)]
+df <- pixl.df[,shared_feats]
+full_df <- pixl.df[,2:14]
+names(df)[4] <- "FeO"
+names(full_df)[ncol(full_df)] <- "FeO"
+
+#total PIXL features
+aq.fullmodel <- data.frame()
+nonaq.fullmodel <- data.frame()
+
+for (i in seq(ncol(y.aq))) {
+  model <- randomForest(y~.,data = cbind(full_df, y = factor(y.aq[,i])), ntree = 100, importance = T)
+  imp <- importance(model)[,3] #mean decrease accuracy
+  aq.fullmodel <- rbind(aq.fullmodel, imp)
+}
+rownames(aq.fullmodel) <- colnames(y.aq)
+names(aq.fullmodel) <- names(full_df)
+
+for (i in seq(ncol(y.nonaq))) {
+  model <- randomForest(y~.,data = cbind(full_df, y = factor(y.nonaq[,i])), ntree = 100, importance = T)
+  imp <- importance(model)[,3] #mean decrease accuracy
+  nonaq.fullmodel <- rbind(nonaq.fullmodel, imp)
+}
+rownames(nonaq.fullmodel) <- colnames(y.nonaq)
+names(nonaq.fullmodel) <- names(full_df)
+
+plot1 <- as.ggplot(pheatmap(aq.fullmodel, scale = "none", treeheight_row = 0, treeheight_col = 0, legend = F, angle_col = 90))
+plot2 <- as.ggplot(pheatmap(nonaq.fullmodel, scale = "none",treeheight_row = 0, treeheight_col = 0, legend =F, angle_col = 90))
+
+grid.arrange(plot1, plot2, ncol = 2, top = "Random Forest Gini-Decrease Acc. (AQ [left] vs. NonAQ [right]")
+
+```
+## Analysis: Question 2 (Provide short name)
+
+### Can we connect LIBS to PIXL and SHERLOC using the libs_typed dataset?
+
+_Provide in natural language a statement of what question you're trying to answer_
+
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_LIBS is not a fully accurate prediction of wt% on Mar's surface, it is exactly that, a prediction using multivariate models. Training and validation data is from terrestial datasets of different spectra and standard protocols. Calibration targets are used in multivariate models where the calibrations are used to tune models._
+
+```{r}
+
+#connect pixl to libs in a way that makes sense. Calibration targets are used as a proxy for measuring the health and status of the machine but are also used to calibrate the quantitative models used to predict wt% of LIBS oxides. Specific calibration targets do not directly have any meaning into the mineral composition of that sample. 
+libs.pixl.combined <- readRDS("~/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds") #connect the type and maybe abrasion types to prediction models as well for categorizing LIBS data on avg_sol dataset
+
+#sd of each unique sol
+sd_cols <- names(supercam_libs_moc_loc)[grepl("stdev",names(supercam_libs_moc_loc))] 
+avg_sd_sol <- supercam_libs_moc_loc %>%
+  select(sol, lat, lon, all_of(sd_cols)) %>%
+  group_by(sol) %>%
+  summarise(SiO2_stdev = mean(SiO2_stdev), TiO2_stdev = mean(TiO2_stdev), Al2O3_stdev = mean(Al2O3_stdev), FeOT_stdev = mean(FeOT_stdev), MgO_stdev = mean(MgO_stdev), CaO_stdev = mean(CaO_stdev), Na2O_stdev = mean(Na2O_stdev), K2O_stdev = mean(K2O_stdev))
+
+apply(avg_sd_sol[,2:ncol(avg_sd_sol)], 2, max)
+
+#average value of oxides for each sol
+avg_sol <- supercam_libs_moc_loc %>% 
+  group_by(sol) %>% #by sol just because overlapping sols with different lat-lon have the same averages
+  summarise(SiO2 = mean(SiO2), TiO2 = mean(TiO2), Al2O3 = mean(Al2O3), FeOT = mean(FeOT), MgO = mean(MgO),
+            CaO = mean(CaO), Na2O = mean(Na2O), K2O = mean(K2O), avg_total = mean(Total),n = n())
+
+dim(avg_sol)[1] == length(unique(supercam_libs_moc_loc$sol))
+
+#run through models to 
+names(libs_typed)
+avg_earthsample <- libs_typed[which(libs_typed$`earthsample?` == 1),] %>%
+  group_by(sol, `earthsample?`, type) %>%
+  summarise(SiO2 = mean(SiO2), TiO2 = mean(TiO2), Al2O3 = mean(Al2O3), FeOT = mean(FeOT), MgO = mean(MgO),
+            CaO = mean(CaO), Na2O = mean(Na2O), K2O = mean(K2O), n = n())
+
+
+#are # of rows equal == the sum of all equal rows sols equal to the # of rows
+( nrow(libs_typed) == nrow(libs.pixl.combined) ) & (sum(libs_typed$sol == libs.pixl.combined$sol.x) == nrow(libs_typed))
+
+
+#use this
+libs_typed <- libs_typed %>%
+  mutate(type = libs.pixl.combined$type, campaign = libs.pixl.combined$campaign, name = libs.pixl.combined$name, abrasion = libs.pixl.combined$abrasion) 
+
+pixl_sol <- pixl_sol[-1,] %>%
+  mutate(lat = as.numeric(Lat), lon = as.numeric(Long)) %>%
+  select(-c(Lat,Long)) 
+
+
+ggplot(libs_typed, aes(x = lat,y = lon, color = type)) + 
+  geom_point() + 
+  geom_point(data = pixl_sol[-1,], aes(x = lat, y= lon), colour = "black", shape = 17, size = 3) + 
+  theme_minimal() + 
+  labs(caption = "Black Triangles are PIXL samples")
+#igneous and sedimentary samples are distinctly separated by bridge of 0 waypoints
+
+pixl <- pixl_sol %>%
+  mutate(Na2O = Na20, MgO = Mgo, CaO = Cao, Al2O3 = Al203, TiO2 = Ti02, FeOT = `FeO-T`, Na2O = Na20, SiO2 = Si02, K2O = K20) %>%
+  select(Na2O, MgO, CaO, Al2O3, TiO2, FeOT, Na2O, SiO2,K2O,type, campaign )
+
+libs <- libs_typed %>%
+  select(Na2O, MgO, CaO, Al2O3, TiO2, FeOT, Na2O, SiO2, K2O,type, campaign) %>%
+  replace(is.na(.), 0)
+
+#one hot enocde and remove type0, campaign0, etc. Also removed abrasion from all dataframes becasue i think it will overfit
+train <- data.frame(predict(dummyVars("~ .", data = pixl), newdata = pixl)) #has dups btw
+test <- data.frame(predict(dummyVars("~ .", data = libs), newdata = libs)) %>%
+  select(-c(type0, campaign0))
+
+#pca
+type <- c(pixl_sol$type, libs_typed$type)
+camp <- c(pixl_sol$campaign, libs_typed$campaign)
+pca.fit <- prcomp(rbind(train, test) , center = T, scale = F)
+summary(pca.fit) #93% within PC3
+
+#colored by type
+ggbiplot::ggbiplot(pca.fit, 
+                   groups= type, circle = T) +
+  theme_minimal() #reaffirms the importance of those sedimentary samples of high CaO content likely. Glad to see they are mostly identified by rock type
+#by campaign plots the same highlighted points
+
+library(tidymodels)
+#predict using randomForest
+pred.matrix <- lithology.matrix[,-c(10,13,24,26,28)] #remove carbonates, perchloartes, Na-perch, hydrated carbonates, hydrated iron oxides
+#i don't want to remove carbonates because it shows that every single sample contains carbonates except for Roubion
+minerals <- colnames(pred.matrix) #perchlorates, Na-perch, hydrated carbonates
+libs_predictions <- data.frame(sample = seq(nrow(test)))
+for (i in seq(length(minerals))) {
+  data <- cbind(train, y = factor(pred.matrix[-1,i]))
+  model <- randomForest(y~., data = data, ntrees= 100)
+  pred <- predict(model, test)
+  libs_predictions[,i+1] = pred #bc sample is first
+}
+
+libs_predictions <- libs_predictions[,-1] #drops first sample column, be careful!
+names(libs_predictions) <- minerals
+
+
+#model on PIXL shared features and use model to predict average_sol mineral targets.
+
+```
+
+
+
+
+## Analysis: Question 3 (Provide short name)
+
+### Question being asked 
+
+_Provide in natural language a statement of what question you're trying to answer_
+
+### Data Preparation
+
+_Provide in natural language a description of the data you are using for this analysis_
+
+_Include a step-by-step description of how you prepare your data for analysis_
+
+_If you're re-using dataframes prepared in another section, simply re-state what data you're using_
+
+```{r}
+#Plot pyroxene, fix feldspar-plagioclase graph (might have to calcualte molar fractions manually), maybe another ternary graph but im not sure
+#add geom segments onto
+
+mineral_percents <- read.csv("mineral_wts.csv")
+dim(mineral_percents)
+imagepath <- "~/DAR-Mars-F24/Resources/Ternary-phase-diagram-of-feldspar-Endmember-and-solids-solution-not-necessarily-stable.png"
+#anorthite, albite, orthoclase - 
+#uses: library(ggimage)
+
+ggtern::ggtern(data = mineral_percents[17:nrow(mineral_percents),], aes(x = Xab, y = Xor, z = Xan)) + 
+  geom_point(aes(fill = "LIBS"),pch = 21, color = "purple") + 
+  theme_rgbw()+ 
+  labs(x="Ab", #NaAlSi3O8
+   y="O", #KAlSi3O8 
+   z="An", #CaAl2Si2O8
+   title = "Feldspar - Plagioclase Ternary Graph", color = "Type") +
+  annotate("segment", x = 100, y = 0, z= 11, xend = 90, yend = 11, zend = 12) + 
+  annotate("segment", x = 57, y = 32, z = 11, xend = 0, yend = 90, zend = 11) + 
+  annotate("segment", x = 0, y = 10, z = 90, xend = 63, yend = 10, zend = 28) + 
+  annotate("segment", x = 70, y = 0, z = 30, xend = 63, yend = 10, zend = 28) + 
+  annotate("segment", x = 45, y = 10, z = 50, xend = 48, yend = 0, zend = 52) + 
+  annotate("segment", x = 27, y = 10, z = 63, xend = 30, yend = 0, zend = 70 ) + 
+  annotate("segment", x = 9, y = 10, z = 80, xend = 10, yend = 0, zend = 90) +
+  annotate("segment", x = 80, y = 10, z = 10, xend = 90, yend = 10, zend = 0) + 
+  annotate("segment", x = 63, y = 38, z = 0, xend = 57, yend = 32, zend = 11) + 
+  annotate("segment", x = 80, y = 10, z = 10, xend = 66, yend = 14, zend = 20) + 
+  geom_point(data = mineral_percents[2:16,], aes(x = Xab, y = Xor, z = Xan, color = pixl_sol$type), shape = 17, size = 3)
+  
+
+pixl_sol
+
+
+
+#create and then color LIBS and PIXL
+```
+
+
+
+
+
+## junk
+
+
+
+log_pixl <- cbind(as.data.frame(log(pixl.df[,2:14] + 1)))#added one to all values to offset Inf replaced as zeroes. log(pixl.df) == 0 != pixl.df == 0
+log_pixlShare <- cbind(log_pixl[,c(4,10,3,13,2,9,1,8)], label = pixl.df$type)
+log_pixlLone <- cbind(log_pixl[,c(5,6,7,11,12)], label = pixl.df$type)
+log_libs <- as.data.frame(log(libs.df[,6:13] + 1))
+
+share_long <- log_pixlShare %>%
+  pivot_longer(cols = names(log_pixlShare)[-ncol(log_pixlShare)], values_to = "Log(Values)", names_to = "Variable")
+
+lone_long <- log_pixlLone %>%
+  pivot_longer(cols = names(log_pixlLone)[-ncol(log_pixlLone)], values_to = "Log(Values)", names_to = "Variable")
+
+libs_long <- log_libs %>%
+  pivot_longer(cols = names(log_libs), values_to = "Log(Values)", names_to = "Variable")
+
+ps <- ggplot(data = share_long, aes(x = Variable, y = `Log(Values)`)) + 
+  coord_flip() + 
+  geom_boxplot(aes(fill = label))
+
+pl <-  ggplot(data = lone_long, aes(x = Variable, y = `Log(Values)`)) + 
+  coord_flip() + 
+  geom_boxplot(aes(fill = label))
+
+l <-  ggplot(data = libs_long, aes(x = Variable, y = `Log(Values)`)) + 
+  coord_flip() + 
+  geom_boxplot()
+
+grid.arrange(ps, l, ncol = 2, top = "PIXL vs. LIBS shared features")
+
+pheatmap(cor(log_pixlShare[,-ncol(log_pixlShare)]))
+