diff --git a/StudentData/aqueous.Rds b/StudentData/aqueous.Rds new file mode 100644 index 0000000..ca671e5 Binary files /dev/null and b/StudentData/aqueous.Rds differ diff --git a/StudentNotebooks/Assignment05/Aqueous.Rmd b/StudentNotebooks/Assignment05/Aqueous.Rmd new file mode 100644 index 0000000..285107c --- /dev/null +++ b/StudentNotebooks/Assignment05/Aqueous.Rmd @@ -0,0 +1,293 @@ + +--- +title: "Aqueous SHERLOC minerals" +author: "Student Name" +date: "`r Sys.Date()`" +output: + html_document: + toc: yes + pdf_document: + toc: yes +subtitle: "MARs DAR-F24 +--- + +## Weekly Work Summary + +**NOTE:** Follow an outline format; use bullets to express individual points. + +* RCS ID: **Always** include this! +* Project Name: **Always** include this! +* Summary of work since last week + + * Describe the important aspects of what you worked on and accomplished + +* NEW: Summary of github issues added and worked + + * Issues that you've submitted + * Issues that you've self-assigned and addressed + +* Summary of github commits + + * Include branch name(s) + * Include filenames for any added or changed files on github + * Include links to shared Shiny apps + +* List of presentations, papers, or other outputs + + * Include browsable links (ie Google Slides, et.al.) + +* List of references (if necessary) +* Indicate any use of group shared code base +* Indicate which parts of your described work were done by you or as part of joint efforts + +* **Required:** Provide illustrating figures and/or tables + +## Personal Contribution + +* Clearly defined, unique contribution(s) done by you: code, ideas, writing... +* Include github issues you've addressed if any + +## PACKAGES +```{r} +# Set the default CRAN repository +local({r <- getOption("repos") + r["CRAN"] <- "http://cran.r-project.org" + options(repos=r) +}) + +if (!require("pandoc")) { + install.packages("pandoc") + library(pandoc) +} + +if (!require("ggplotify")) { + install.packages("ggplotify") + library(ggplotify) +} + +if (!require("car")) { + install.packages("car") + library(car) +} +if (!require("ggbiplot")) { + install.packages("ggbiplot") + library(ggbiplot) +} + +# Required packages for M20 LIBS analysis +if (!require("rmarkdown")) { + install.packages("rmarkdown") + library(rmarkdown) +} +if (!require("tidyverse")) { + install.packages("tidyverse") + library(tidyverse) +} +if (!require("stringr")) { + install.packages("stringr") + library(stringr) +} + +if (!require("ggbiplot")) { + install.packages("ggbiplot") + library(ggbiplot) +} + +if (!require("pheatmap")) { + install.packages("pheatmap") + library(pheatmap) +} +if (!require("ggtern")) { + install.packages("ggtern") + library(ggtern) +} + +if (!require("gridExtra")) { + install.packages("gridExtra") + library(gridExtra) +} +if (!require("randomForest")) { + install.packages("randomForest") + library(randomForest) +} +``` + + +```{r} +metadata_libs <- readRDS("~/DAR-Mars-F24/StudentData/v1_libs_to_sample.Rds") #metadata of libs. Unconnected with +libs <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_libs.Rds") +lithology <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_lithology.Rds") +sherloc <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_sherloc.Rds") +pixl <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_pixl.Rds") +metadata_pixl <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/v1_sample_meta.Rds") + +oxide_elem <- 1 / c(2.1392, 1.6681, 1.8895, (1.2865 + 1.4297)/2, 1.6582, 1.3992, 1.3480, 1.2046, 2.2916, 2.4972, 1, 1.4615, 1.2912) #/2 is used to find average of FeO and Fe2O3 +elem_pixl <- pixl[,2:ncol(pixl)] +for (i in seq(length(oxide_elem))) { + elem_pixl[,i] = elem_pixl[,i] * oxide_elem[i] +} + +names(elem_pixl) <- c("Si", "Ti", "Al", "Fe", "Mg", "Ca", "Na", "K", "P", "S", "Cl", "Cr", "Mn") +elem_pixl["Total_Cation"] <- rowSums(elem_pixl) +elem_pixl["O"] <- 100 - elem_pixl$Total_Cation + +``` + +## Analysis 1 + +### Creating Aqueous Dataframes based off of ChatBS + +_Aqueous indicates whether a mineral has been chemically altered by an aqueous solvent or whether it precipiated out of ancient seas_ + +_For the aqueous dataframe, Dr. Rogers implied a few notable 'features' to add. (1) creation of group of feature (e.g: Ca, Mg, and Fe carbonates will all be placed with a Carbonates tag). (2) Is it aqueous or not (0 or 1). Quartz has altering forms of creation, but that will be identified as 1. Potentially we could add another feature that describes whether it partly is or not (but I think we will skip this for now). (3) Chemical Formula. Potentially apply NLP to process similarities._ + +```{r, result01_data} + +#Dona created the same naming sequence for each sherloc and lithology dataframe so we can just use the names from one to identify aqueous and non-aqueous minerals +sample <- sherloc[,1] +sherloc <- sherloc[,-c(names(sherloc) == "Sample")] + +#1 - feed into grepl +common_names <- c("Sulfate|sulfate" = "Sulfates", "Carbonate|carbonate" = "Carbonates", "Organic" = "Organic Matter", "Perchlorate|perchlorate" = "Perchlorate") + +common_groups <- rep(NA, ncol(sherloc)) +for (i in 1:length(common_names)) { + check <- grepl(names(common_names)[i], names(sherloc)) + common_groups[check] = common_names[[i]] +} +common_groups[which(is.na(common_groups) == T)] = names(sherloc)[which(is.na(common_groups) == T)] + +#2 +#quartz is non-aq with correspondence to the note above +aqueous_total <- c(0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0) +aq <- sherloc[,aqueous_total == 1] +nonaq <- sherloc[,aqueous_total == 0] + +#3 - formulas in order of common_groups +#formulas have | in them to separate between distinguishable types. e.g: ferrous and ferric oxide +formulas <- c("(Na,Cl)Al(Si,Al)Si2O8", "(Mg,Fe,Ca)SO4", "(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4","(Mg,Fe,Ca)SO4", +"(Mg,Fe,Ca)SO4","(K,Na)ClO4|Mg(ClO4)2","(K,Na)ClO4|Mg(ClO4)2", "SiO4*nH2O", "PO4", "(Mg,Mn,Ca,Zn)(Fe,Al,Cr)Si2O6", +"(Mg,Fe)2SiO4","(Mg,Fe,Ca)CO3","(Mg,Fe,Ca)CO3","(Mg,Fe,Ca)CO3","(M2/M3)2-3 (Al, Fe, Mg)2-3 (Si, Al)4-5 O10-11", "(Na,C,K)Al(Si,Al)Si2O8","SiO2", "Ca5(PO4)3(F,Cl,OH)", "FeTiO3|Fe2TiO4", "NaCl", "Fe2O3|Fe3O4","Fe2O3|Fe3O4*nH2O", +NA,NA,NA,"(K,Ca,Mg,Fe,Al)4_6(Si,Al)8O20(OH,F)4","(Mg,Fe)6AlSi3O10(OH)8","Al2Si2O5(OH)4","(Fe,Mg)Cr2O4","FeTiO3","ZrSiO4",NA, "(Mg,Zn)Al2O4|Ca3Al2Si3O12") + +aqueous.df <- data.frame(mineral = names(sherloc), common_mineral = common_groups, aqueous = aqueous_total, formula = formulas) + + +write_rds(aqueous.df, "~/DAR-Mars-F24/StudentData/aqueous.Rds") + + +``` + + +### Analysis: Methods and results + +_Describe in natural language a statement of the analysis you're trying to do_ + +_Provide clearly commented analysis code; include code for tables and figures!_ + +```{r, result01_analysis} +# Include all analysis code, clearly commented +# If not possible, screen shots are acceptable. +# If your contributions included things that are not done in an R-notebook, +# (e.g. researching, writing, and coding in Python), you still need to do +# this status notebook in R. Describe what you did here and put any products +# that you created in github. If you are writing online documents (e.g. overleaf +# or google docs), you can include links to the documents in this notebook +# instead of actual text. + +``` + +### Discussion of results + +_Provide in natural language a clear discussion of your observations._ + + +## Analysis: Question 2 (Provide short name) + +### Question being asked + +_Provide in natural language a statement of what question you're trying to answer_ + +### Data Preparation + +_Provide in natural language a description of the data you are using for this analysis_ + +_Include a step-by-step description of how you prepare your data for analysis_ + +_If you're re-using dataframes prepared in another section, simply re-state what data you're using_ + +```{r, result02_data} +# Include all data processing code (if necessary), clearly commented + +``` + +### Analysis: Methods and Results + +_Describe in natural language a statement of the analysis you're trying to do_ + +_Provide clearly commented analysis code; include code for tables and figures!_ + +```{r, result02_analysis} +# Include all analysis code, clearly commented +# If not possible, screen shots are acceptable. +# If your contributions included things that are not done in an R-notebook, +# (e.g. researching, writing, and coding in Python), you still need to do +# this status notebook in R. Describe what you did here and put any products +# that you created in github (documents, jupytor notebooks, etc). If you are writing online documents (e.g. overleaf +# or google docs), you can include links to the documents in this notebook +# instead of actual text. + +``` + +### Discussion of results + +_Provide in natural language a clear discussion of your observations._ + + +## Analysis: Question 3 (Provide short name) + +### Question being asked + +_Provide in natural language a statement of what question you're trying to answer_ + +### Data Preparation + +_Provide in natural language a description of the data you are using for this analysis_ + +_Include a step-by-step description of how you prepare your data for analysis_ + +_If you're re-using dataframes prepared in another section, re-state what data you're using_ + +```{r, result03_data} +# Include all data processing code (if necessary), clearly commented + +``` + +### Analysis methods used + +_Describe in natural language a statement of the analysis you're trying to do_ + +_Provide clearly commented analysis code; include code for tables and figures!_ + +```{r, result03_analysis} +# Include all analysis code, clearly commented +# If not possible, screen shots are acceptable. +# If your contributions included things that are not done in an R-notebook, +# (e.g. researching, writing, and coding in Python), you still need to do +# this status notebook in R. Describe what you did here and put any products +# that you created in github. If you are writing online documents (e.g. overleaf +# or google docs), you can include links to the documents in this notebook +# instead of actual text. + +``` + + +### Discussion of results + +_Provide in natural language a clear discussion of your observations._ + +## Summary and next steps + +_Provide in natural language a clear summary and your proposed next steps._ + + diff --git a/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd b/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd new file mode 100644 index 0000000..b60ffd9 --- /dev/null +++ b/StudentNotebooks/Assignment05/walczd3-assignment05.Rmd @@ -0,0 +1,487 @@ +--- +title: "DAR F24 Biweekly 1" +author: "David Walczyk" +date: "`r Sys.Date()`" +output: + pdf_document: + toc: yes + html_document: + toc: yes +subtitle: "DAR Project Name: Mars" +--- + +## Packages Load In + +```{r} +# Set the default CRAN repository +local({r <- getOption("repos") + r["CRAN"] <- "http://cran.r-project.org" + options(repos=r) +}) + +if (!require("pandoc")) { + install.packages("pandoc") + library(pandoc) +} + +if (!require("ggplotify")) { + install.packages("ggplotify") + library(ggplotify) +} + +if (!require("car")) { + install.packages("car") + library(car) +} +if (!require("ggbiplot")) { + install.packages("ggbiplot") + library(ggbiplot) +} + +# Required packages for M20 LIBS analysis +if (!require("rmarkdown")) { + install.packages("rmarkdown") + library(rmarkdown) +} +if (!require("tidyverse")) { + install.packages("tidyverse") + library(tidyverse) +} +if (!require("stringr")) { + install.packages("stringr") + library(stringr) +} + +if (!require("ggbiplot")) { + install.packages("ggbiplot") + library(ggbiplot) +} + +if (!require("pheatmap")) { + install.packages("pheatmap") + library(pheatmap) +} +if (!require("ggtern")) { + install.packages("ggtern") + library(ggtern) +} + +if (!require("gridExtra")) { + install.packages("gridExtra") + library(gridExtra) +} +if (!require("randomForest")) { + install.packages("randomForest") + library(randomForest) +} +if (!require("caret")) { + install.packages("caret") + library(caret) +} +if (!require("ggimage")) { + install.packages("ggimage") + library(ggimage) +} +``` + +## Data Load In +```{r} + +#-------------LIBS------------------- +# Load the saved LIBS data with locations added +libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds") +libs.std_dev <- libs.df %>% + select((c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev, + MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total))) +libs.df <- libs.df %>% + select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev, + MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total))) + +# Convert the points to numeric +libs.df$point <- as.numeric(libs.df$point) + +# Review what we have +summary(libs.df) + +#----------PIXL---------------------- +# Load the saved PIXL data with locations added +pixl.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/samples_pixl_wide.Rds") + +pixl.df +# Convert to factors +pixl.df[sapply(pixl.df, is.character)] <- lapply(pixl.df[sapply(pixl.df, is.character)], + as.factor) + +# Review our dataframe +summary(pixl.df) + +#----------SHERLOC---------------------- +# Read in data as provided. +sherloc_abrasion_raw <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/abrasions_sherloc_samples.Rds") + +# Clean up data types +sherloc_abrasion_raw$Mineral<-as.factor(sherloc_abrasion_raw$Mineral) +sherloc_abrasion_raw[sapply(sherloc_abrasion_raw, is.character)] <- lapply(sherloc_abrasion_raw[sapply(sherloc_abrasion_raw, is.character)], + as.numeric) +# Transform NA's to 0 +sherloc_abrasion_raw <- sherloc_abrasion_raw %>% replace(is.na(.), 0) + +# Reformat data so that rows are "abrasions" and columns list the presence of minerals. +# Do this by "pivoting" to a long format, and then back to the desired wide format. + +sherloc_long <- sherloc_abrasion_raw %>% + pivot_longer(!Mineral, names_to = "Name", values_to = "Presence") + +# Make abrasion a factor +sherloc_long$Name <- as.factor(sherloc_long$Name) + +# Make it a matrix +sherloc.matrix <- sherloc_long %>% + pivot_wider(names_from = Mineral, values_from = Presence) + +# Get sample information from PIXL and add to measurements -- assumes order is the same + +sherloc.df <- cbind(pixl.df[,c("sample","type","campaign","abrasion")],sherloc.matrix) + +# Review what we have +summary(sherloc.df) + + +# Load the saved lithology data with locations added +lithology.df<- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/mineral_data_static.Rds") + +# Cast samples as numbers +lithology.df$sample <- as.numeric(lithology.df$sample) + +# Convert rest into factors +lithology.df[sapply(lithology.df, is.character)] <- + lapply(lithology.df[sapply(lithology.df, is.character)], + as.factor) + +# Keep only first 16 samples because the data for the rest of the samples is not available yet +lithology.df<-lithology.df[1:16,] + +# Create a matrix containing only the numeric measurements. The remaining features are metadata about the sample. +lithology.matrix <- sapply(lithology.df[,6:40],as.numeric)-1 + +# Review the structure of our matrix +str(lithology.matrix) + +supercam_libs_moc_loc <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds") + +oxide_elem <- 1 / c(1.3475, 1.6582, 1.8895, 2.1392, 2.2916, 2.4972, 1, 1.2046, 1.3992, 1.6681,1.4615, 1.2912, (1.2865+1.4927)/2 ) #/2 is used to find average of FeO and Fe2O3 +elem_pixl <- pixl.df[,2:14] +for (i in seq(length(oxide_elem))) { + elem_pixl[,i] = elem_pixl[,i] * oxide_elem[i] +} +names(elem_pixl) <- c("Na", "Mg", "Al", "Si","P", "S", "Cl", "K", "Ca", "Ti", "Cr", "Mn", "Fe") +elem_pixl["Total_Cation"] <- rowSums(elem_pixl) +elem_pixl["O"] <- 100 - elem_pixl$Total_Cation + + +``` + + +## BiWeekly Work Summary + +**NOTE:** Follow an outline format; use bullets to express individual points. + +* RCS ID: **walczd3** +* Project Name: **Mars** +* Summary of work since last week + +_Last week I looked at whether calculated molar fractions could help predict whether Feldspars or Plagioclase (igneous rocks) molar fractions are always correlated to their lead element (lead element in chemcial formulas between either K-Spar or Albite (Na) & Anorthite (Ca)). I found it to be true in the case of K-spar. I also wondered whether the feldspar/plagioclase ternary diagram was indicative of prediction as well. All possible feldspar/plagioclase values were within the range of possibility which was nice to see but it wasn't perfect. Now, after taking a deeper dive into the literature I'd like to analyze more aqueous elements especially that of amorphous silicates, carbonates, phylosillicates and hydrated sulfates (especially that of serpentine) in addition to connecting PIXL and LIBS as shown by Charlotte and Margo. I hope to use this information to draw mass readings on whether we can interpret LIBS mineral possibilities._ + +* Summary of github commits + +_No, github commits as of now. I've worked on creating wire frames for the 2D app but no coded changes._ + +* List of presentations, papers, or other outputs + +* Anderson et al. 2022 https://www.sciencedirect.com/science/article/abs/pii/S0584854721003049?via%3Dihub +* Cousin et al. 2022 https://www.sciencedirect.com/science/article/pii/S0584854721002986 +* Pileri et al. 2021 https://www.hou.usra.edu/meetings/lpsc2021/pdf/1606.pdf +* Clegg et al. https://ssed.gsfc.nasa.gov/IPM/2014/PDF/1086.pdf +* https://ars.els-cdn.com/content/image/1-s2.0-S0584854721002986-mmc1.pdf + +* **Required:** Provide illustrating figures and/or tables + +## Personal Contribution + +* Clearly defined, unique contribution(s) done by you: code, ideas, writing... +* Include github issues you've addressed if any + +## Analysis: Question 1 (Provide short name) + +### What are the most important features for predicitng minerals in the lithology dataset? + +_Using lithology so that our prediction is binary, I would like to see the elemental composition differences between AQ and non-AQ minerals using randomForest. + +### Data Preparation + +_Provide in natural language a description of the data you are using for this analysis_ + +_Include a step-by-step description of how you prepare your data for analysis_ + +_If you're re-using dataframes prepared in another section, simply re-state what data you're using_ +```{r} +names(lithology.df) +#Manganese, phylosillicates, silicates, carbonates, amorphous silicates, hydrated sulfates + +aqueous.lith <- c(0,0,0,0,.5,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1, + 1,0,0, + 0,0) + +aqueous.sher <- c(0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,.5,1,0,1,1,1,0,0,1,1,1,1,0,0, + 0,1, + 0) + +libs_typed <- readRDS("~/DAR-Mars-F24/StudentData/libs_typed.Rds") +libs_targets <- libs_typed[which(libs_typed$`earthsample?` == 1), ] +pixl_sol <- readRDS("~/DAR-Mars-F24/StudentData/pixl_sol_coordinates.Rds") +#base random forest on +library(randomForest) + +#use lithology for y as sherloc is a subset of mineral presence +y.aq <-data.frame(lithology.matrix[,which(aqueous.lith > 0)]) +y.nonaq <- data.frame(lithology.matrix[,which(aqueous.lith == 0)]) + +#remove hydrated carbonates +y.aq <- y.aq %>% + select(-Hydrated_Carbonates) + +shared_feats <- names(pixl.df[,2:14])[c(4,10,3,13,2,9,1,8)] +df <- pixl.df[,shared_feats] +full_df <- pixl.df[,2:14] +names(df)[4] <- "FeO" +names(full_df)[ncol(full_df)] <- "FeO" + +#total PIXL features +aq.fullmodel <- data.frame() +nonaq.fullmodel <- data.frame() + +for (i in seq(ncol(y.aq))) { + model <- randomForest(y~.,data = cbind(full_df, y = factor(y.aq[,i])), ntree = 100, importance = T) + imp <- importance(model)[,3] #mean decrease accuracy + aq.fullmodel <- rbind(aq.fullmodel, imp) +} +rownames(aq.fullmodel) <- colnames(y.aq) +names(aq.fullmodel) <- names(full_df) + +for (i in seq(ncol(y.nonaq))) { + model <- randomForest(y~.,data = cbind(full_df, y = factor(y.nonaq[,i])), ntree = 100, importance = T) + imp <- importance(model)[,3] #mean decrease accuracy + nonaq.fullmodel <- rbind(nonaq.fullmodel, imp) +} +rownames(nonaq.fullmodel) <- colnames(y.nonaq) +names(nonaq.fullmodel) <- names(full_df) + +plot1 <- as.ggplot(pheatmap(aq.fullmodel, scale = "none", treeheight_row = 0, treeheight_col = 0, legend = F, angle_col = 90)) +plot2 <- as.ggplot(pheatmap(nonaq.fullmodel, scale = "none",treeheight_row = 0, treeheight_col = 0, legend =F, angle_col = 90)) + +grid.arrange(plot1, plot2, ncol = 2, top = "Random Forest Gini-Decrease Acc. (AQ [left] vs. NonAQ [right]") + +``` +## Analysis: Question 2 (Provide short name) + +### Can we connect LIBS to PIXL and SHERLOC using the libs_typed dataset? + +_Provide in natural language a statement of what question you're trying to answer_ + + +### Data Preparation + +_Provide in natural language a description of the data you are using for this analysis_ + +_Include a step-by-step description of how you prepare your data for analysis_ + +_LIBS is not a fully accurate prediction of wt% on Mar's surface, it is exactly that, a prediction using multivariate models. Training and validation data is from terrestial datasets of different spectra and standard protocols. Calibration targets are used in multivariate models where the calibrations are used to tune models._ + +```{r} + +#connect pixl to libs in a way that makes sense. Calibration targets are used as a proxy for measuring the health and status of the machine but are also used to calibrate the quantitative models used to predict wt% of LIBS oxides. Specific calibration targets do not directly have any meaning into the mineral composition of that sample. +libs.pixl.combined <- readRDS("~/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds") #connect the type and maybe abrasion types to prediction models as well for categorizing LIBS data on avg_sol dataset + +#sd of each unique sol +sd_cols <- names(supercam_libs_moc_loc)[grepl("stdev",names(supercam_libs_moc_loc))] +avg_sd_sol <- supercam_libs_moc_loc %>% + select(sol, lat, lon, all_of(sd_cols)) %>% + group_by(sol) %>% + summarise(SiO2_stdev = mean(SiO2_stdev), TiO2_stdev = mean(TiO2_stdev), Al2O3_stdev = mean(Al2O3_stdev), FeOT_stdev = mean(FeOT_stdev), MgO_stdev = mean(MgO_stdev), CaO_stdev = mean(CaO_stdev), Na2O_stdev = mean(Na2O_stdev), K2O_stdev = mean(K2O_stdev)) + +apply(avg_sd_sol[,2:ncol(avg_sd_sol)], 2, max) + +#average value of oxides for each sol +avg_sol <- supercam_libs_moc_loc %>% + group_by(sol) %>% #by sol just because overlapping sols with different lat-lon have the same averages + summarise(SiO2 = mean(SiO2), TiO2 = mean(TiO2), Al2O3 = mean(Al2O3), FeOT = mean(FeOT), MgO = mean(MgO), + CaO = mean(CaO), Na2O = mean(Na2O), K2O = mean(K2O), avg_total = mean(Total),n = n()) + +dim(avg_sol)[1] == length(unique(supercam_libs_moc_loc$sol)) + +#run through models to +names(libs_typed) +avg_earthsample <- libs_typed[which(libs_typed$`earthsample?` == 1),] %>% + group_by(sol, `earthsample?`, type) %>% + summarise(SiO2 = mean(SiO2), TiO2 = mean(TiO2), Al2O3 = mean(Al2O3), FeOT = mean(FeOT), MgO = mean(MgO), + CaO = mean(CaO), Na2O = mean(Na2O), K2O = mean(K2O), n = n()) + + +#are # of rows equal == the sum of all equal rows sols equal to the # of rows +( nrow(libs_typed) == nrow(libs.pixl.combined) ) & (sum(libs_typed$sol == libs.pixl.combined$sol.x) == nrow(libs_typed)) + + +#use this +libs_typed <- libs_typed %>% + mutate(type = libs.pixl.combined$type, campaign = libs.pixl.combined$campaign, name = libs.pixl.combined$name, abrasion = libs.pixl.combined$abrasion) + +pixl_sol <- pixl_sol[-1,] %>% + mutate(lat = as.numeric(Lat), lon = as.numeric(Long)) %>% + select(-c(Lat,Long)) + + +ggplot(libs_typed, aes(x = lat,y = lon, color = type)) + + geom_point() + + geom_point(data = pixl_sol[-1,], aes(x = lat, y= lon), colour = "black", shape = 17, size = 3) + + theme_minimal() + + labs(caption = "Black Triangles are PIXL samples") +#igneous and sedimentary samples are distinctly separated by bridge of 0 waypoints + +pixl <- pixl_sol %>% + mutate(Na2O = Na20, MgO = Mgo, CaO = Cao, Al2O3 = Al203, TiO2 = Ti02, FeOT = `FeO-T`, Na2O = Na20, SiO2 = Si02, K2O = K20) %>% + select(Na2O, MgO, CaO, Al2O3, TiO2, FeOT, Na2O, SiO2,K2O,type, campaign ) + +libs <- libs_typed %>% + select(Na2O, MgO, CaO, Al2O3, TiO2, FeOT, Na2O, SiO2, K2O,type, campaign) %>% + replace(is.na(.), 0) + +#one hot enocde and remove type0, campaign0, etc. Also removed abrasion from all dataframes becasue i think it will overfit +train <- data.frame(predict(dummyVars("~ .", data = pixl), newdata = pixl)) #has dups btw +test <- data.frame(predict(dummyVars("~ .", data = libs), newdata = libs)) %>% + select(-c(type0, campaign0)) + +#pca +type <- c(pixl_sol$type, libs_typed$type) +camp <- c(pixl_sol$campaign, libs_typed$campaign) +pca.fit <- prcomp(rbind(train, test) , center = T, scale = F) +summary(pca.fit) #93% within PC3 + +#colored by type +ggbiplot::ggbiplot(pca.fit, + groups= type, circle = T) + + theme_minimal() #reaffirms the importance of those sedimentary samples of high CaO content likely. Glad to see they are mostly identified by rock type +#by campaign plots the same highlighted points + +library(tidymodels) +#predict using randomForest +pred.matrix <- lithology.matrix[,-c(10,13,24,26,28)] #remove carbonates, perchloartes, Na-perch, hydrated carbonates, hydrated iron oxides +#i don't want to remove carbonates because it shows that every single sample contains carbonates except for Roubion +minerals <- colnames(pred.matrix) #perchlorates, Na-perch, hydrated carbonates +libs_predictions <- data.frame(sample = seq(nrow(test))) +for (i in seq(length(minerals))) { + data <- cbind(train, y = factor(pred.matrix[-1,i])) + model <- randomForest(y~., data = data, ntrees= 100) + pred <- predict(model, test) + libs_predictions[,i+1] = pred #bc sample is first +} + +libs_predictions <- libs_predictions[,-1] #drops first sample column, be careful! +names(libs_predictions) <- minerals + + +#model on PIXL shared features and use model to predict average_sol mineral targets. + +``` + + + + +## Analysis: Question 3 (Provide short name) + +### Question being asked + +_Provide in natural language a statement of what question you're trying to answer_ + +### Data Preparation + +_Provide in natural language a description of the data you are using for this analysis_ + +_Include a step-by-step description of how you prepare your data for analysis_ + +_If you're re-using dataframes prepared in another section, simply re-state what data you're using_ + +```{r} +#Plot pyroxene, fix feldspar-plagioclase graph (might have to calcualte molar fractions manually), maybe another ternary graph but im not sure +#add geom segments onto + +mineral_percents <- read.csv("mineral_wts.csv") +dim(mineral_percents) +imagepath <- "~/DAR-Mars-F24/Resources/Ternary-phase-diagram-of-feldspar-Endmember-and-solids-solution-not-necessarily-stable.png" +#anorthite, albite, orthoclase - +#uses: library(ggimage) + +ggtern::ggtern(data = mineral_percents[17:nrow(mineral_percents),], aes(x = Xab, y = Xor, z = Xan)) + + geom_point(aes(fill = "LIBS"),pch = 21, color = "purple") + + theme_rgbw()+ + labs(x="Ab", #NaAlSi3O8 + y="O", #KAlSi3O8 + z="An", #CaAl2Si2O8 + title = "Feldspar - Plagioclase Ternary Graph", color = "Type") + + annotate("segment", x = 100, y = 0, z= 11, xend = 90, yend = 11, zend = 12) + + annotate("segment", x = 57, y = 32, z = 11, xend = 0, yend = 90, zend = 11) + + annotate("segment", x = 0, y = 10, z = 90, xend = 63, yend = 10, zend = 28) + + annotate("segment", x = 70, y = 0, z = 30, xend = 63, yend = 10, zend = 28) + + annotate("segment", x = 45, y = 10, z = 50, xend = 48, yend = 0, zend = 52) + + annotate("segment", x = 27, y = 10, z = 63, xend = 30, yend = 0, zend = 70 ) + + annotate("segment", x = 9, y = 10, z = 80, xend = 10, yend = 0, zend = 90) + + annotate("segment", x = 80, y = 10, z = 10, xend = 90, yend = 10, zend = 0) + + annotate("segment", x = 63, y = 38, z = 0, xend = 57, yend = 32, zend = 11) + + annotate("segment", x = 80, y = 10, z = 10, xend = 66, yend = 14, zend = 20) + + geom_point(data = mineral_percents[2:16,], aes(x = Xab, y = Xor, z = Xan, color = pixl_sol$type), shape = 17, size = 3) + + +pixl_sol + + + +#create and then color LIBS and PIXL +``` + + + + + +## junk + + + +log_pixl <- cbind(as.data.frame(log(pixl.df[,2:14] + 1)))#added one to all values to offset Inf replaced as zeroes. log(pixl.df) == 0 != pixl.df == 0 +log_pixlShare <- cbind(log_pixl[,c(4,10,3,13,2,9,1,8)], label = pixl.df$type) +log_pixlLone <- cbind(log_pixl[,c(5,6,7,11,12)], label = pixl.df$type) +log_libs <- as.data.frame(log(libs.df[,6:13] + 1)) + +share_long <- log_pixlShare %>% + pivot_longer(cols = names(log_pixlShare)[-ncol(log_pixlShare)], values_to = "Log(Values)", names_to = "Variable") + +lone_long <- log_pixlLone %>% + pivot_longer(cols = names(log_pixlLone)[-ncol(log_pixlLone)], values_to = "Log(Values)", names_to = "Variable") + +libs_long <- log_libs %>% + pivot_longer(cols = names(log_libs), values_to = "Log(Values)", names_to = "Variable") + +ps <- ggplot(data = share_long, aes(x = Variable, y = `Log(Values)`)) + + coord_flip() + + geom_boxplot(aes(fill = label)) + +pl <- ggplot(data = lone_long, aes(x = Variable, y = `Log(Values)`)) + + coord_flip() + + geom_boxplot(aes(fill = label)) + +l <- ggplot(data = libs_long, aes(x = Variable, y = `Log(Values)`)) + + coord_flip() + + geom_boxplot() + +grid.arrange(ps, l, ncol = 2, top = "PIXL vs. LIBS shared features") + +pheatmap(cor(log_pixlShare[,-ncol(log_pixlShare)])) +