diff --git a/StudentData/PIXL_LIBS_Combined.Rds b/StudentData/PIXL_LIBS_Combined.Rds index c7bf055..88b47ca 100644 Binary files a/StudentData/PIXL_LIBS_Combined.Rds and b/StudentData/PIXL_LIBS_Combined.Rds differ diff --git a/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd b/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd index 7ac4ad6..d42d184 100644 --- a/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd +++ b/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd @@ -90,11 +90,8 @@ libs.df[,6:13] <- sapply(libs.df[,6:13],as.numeric) libs.df<-libs.df%>% filter(!(grepl("scct", target))) -#add a column to indicate the nearest pixl -libs.df<-cbind(nearestpixl=0,libs.df) - #make a dataframe of just the LIBS Lat/Long and target name and remove duplicates -libstargets.df<-libs.df[,c(1,3,4,5)] +libstargets.df<-cbind("nearestpixl"=0,libs.df[,1:4]) libstargets.df<-distinct(libstargets.df) ``` @@ -120,33 +117,24 @@ pixl.df<-pixl.df[c(2,4,6,8,10,12,14,16),] ### Combining datasets ```{r} -distance=100 -distCosine(pixl.df[1,c(1,2)],libstargets.df[1,c(2,3)], r=3393169) -for(i in 1:nrow(pixl.df)) { - libstargets.df<-libstargets.df%>%mutate(nearestpixl = ifelse(distHaversine(pixl.df[i,c(1,2)],c(lat,lon),r=3393169),pixl.df[i,5], nearestpixl)) - -} -``` - -```{r} +# Create a new dataframe with the LIBS metatdata as well as features corresponding to each PIXL Abrasion libstargets.df<-cbind(libstargets.df,"Distance"=0,"Bellegrade"=0,"Dourbes"=0,"Quartier"=0,"Alfalfa"=0,"ThorntonGap"=0,"BerryHollow"=0,"Novarupta"=0,"UganikIsland"=0) ``` ```{r} -#DistCosine(pixl.df[1,c(1,2)],libstargets.df[1,c(2,3)], r=3393169) - +#Calculate the distance between each LIBS target and each PIXL abrasion, then fill in the minimum distances for(i in 1:nrow(libstargets.df)) { - libstargets.df[i,c(6:13)]<-c(distHaversine(pixl.df[1,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[2,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[3,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[4,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[5,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[6,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[7,c(1,2)],libstargets.df[i,c(2,3)],r=3393169), - distHaversine(pixl.df[8,c(1,2)],libstargets.df[i,c(2,3)],r=3393169)) + libstargets.df[i,c(7:14)]<-c(distHaversine(pixl.df[1,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[2,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[3,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[4,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[5,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[6,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[7,c(1,2)],libstargets.df[i,c(3,4)],r=3393169), + distHaversine(pixl.df[8,c(1,2)],libstargets.df[i,c(3,4)],r=3393169)) - libstargets.df[i,1]<-which.min(libstargets.df[i,c(6:13)]) - libstargets.df[i,5]<-min(libstargets.df[i,c(6:13)]) + libstargets.df[i,1]<-which.min(libstargets.df[i,c(7:14)]) + libstargets.df[i,6]<-min(libstargets.df[i,c(7:14)]) } libstargets.df$nearestpixl<-as.factor(libstargets.df$nearestpixl) @@ -156,6 +144,7 @@ levels(libstargets.df$nearestpixl)<-(c("Bellegrade","Dourbes","Quartier","Alfalf ```{r} +#Create vectors of LIBS targets corresponding to each PIXL Abrasion Bellegrade<-libstargets.df[libstargets.df$nearestpixl=="Bellegrade",]$target Dourbes<-libstargets.df[libstargets.df$nearestpixl=="Dourbes",]$target Quartier<-libstargets.df[libstargets.df$nearestpixl=="Quartier",]$target @@ -166,26 +155,11 @@ Novarupta<-libstargets.df[libstargets.df$nearestpixl=="Novarupta",]$target UganikIsland<-libstargets.df[libstargets.df$nearestpixl=="UganikIsland",]$target ``` - -### Ternary Diagram ```{r} -meters=100 - -included.libs<-(libstargets.df%>% - filter(Distance% - filter(target %in% included.libs) - -libs.matrix <- libs.matrix[,c(5,7:14)] - -libs.tern <- as.data.frame(libs.matrix) %>% - mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>% - select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,TiO2)) +#Add the nearest Abrasion to the LIBS data +libs.pixl.merged <- cbind("Abrasion"=0,libs.df) -libs.tern<-cbind("Abrasion"=0,libs.tern) - -libs.tern<-libs.tern%>% +libs.pixl.merged<-libs.pixl.merged%>% mutate(Abrasion = ifelse(target%in%Alfalfa,"Alfalfa", ifelse(target %in% Bellegrade, "Belegrade", ifelse(target %in% BerryHollow, "BerryHollow", @@ -193,55 +167,77 @@ libs.tern<-libs.tern%>% ifelse(target %in% Novarupta, "Novarupta", ifelse(target %in% Quartier, "Quartier", ifelse(target %in% ThorntonGap, "ThorntonGap", - ifelse(target %in% UganikIsland, "UganikIsland",Abrasion))))))))) + ifelse(target %in% UganikIsland, "Uganik Island",Abrasion))))))))) + +targetdistance<-libstargets.df[,2:6] +libs.pixl.merged<-merge(libs.pixl.merged,targetdistance,by=c("target","sol","lat","lon"),all.x=T) +``` + +```{r} +#Add back some pixl features +pix<-pixl.df[,c(1,2,4,5)] +libs.pixl <- merge(libs.pixl.merged, pix, by.x="Abrasion",by.y="abrasion",all.x=TRUE) + +#rename and reorder columns +libs.pixl<-cbind("LIBS.Target"=libs.pixl$target,libs.pixl[,4:5],"LIBS.Sol"=libs.pixl$sol,"LIBS.Point"=libs.pixl$point,"Distance"=libs.pixl$Distance,"PIXL.Abrasion"=libs.pixl$Abrasion,libs.pixl[,16:18],libs.pixl[,7:14]) +colnames(libs.pixl)<-c("LIBS.Target","LIBS.Lat","LIBS.Lon","LIBS.Sol","LIBS.Point", "Distance","PIXL.Abrasion","PIXL.Lat","PIXL.Lon","PIXL.Campaign","LIBS.SiO2","LIBS.TiO2","LIBS.Al2O3","LIBS.FeOT","LIBS.MgO","LIBS.CaO","LIBS.Na2O","LIBS.K2O") +``` -kabledf<-rbind("Distance (m)"=meters,"Targets"=length(included.libs),"Points"=nrow(libs.tern)) +```{r} +#setwd("~/DAR-Mars-F24/StudentData") +#saveRDS(libs.pixl,"PIXL_LIBS_Combined.Rds") +``` + +The libs.pixl dataframe is now saved to PIXL_LIBS_Combined.Rds, which can be found in the StudentData folder. + +### Ternary Diagram + +Example of creating a ternary plot filtering the combined LIBS and PIXL data +```{r} +libs.tern <- libs.pixl %>% + mutate(x=(LIBS.SiO2+LIBS.Al2O3)/100,y=(LIBS.FeOT+LIBS.MgO)/100,z=(LIBS.CaO+LIBS.Na2O+LIBS.K2O)/100) -kable(kabledf) +libs.tern<-libs.tern[,c(6,7,19:21)] ``` ```{r} +meters=7 + ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) + - geom_point(data=libs.tern,aes(color=Abrasion,alpha=0.5)) + + geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + theme_rgbw() + - labs(title=paste("Mars LIBS Data Within",distance,"meters of PIXL",sep=" "), + labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL",sep=" "), x="Si+Al", y="Fe+Mg", z="Ca+Na+K")+theme(legend.position="right") + guides(alpha="none") -``` - -### Line plot - -```{r} meters=100 -included.libs<-(libstargets.df%>% - filter(Distance% - filter(target %in% included.libs) +ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) + + geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + + theme_rgbw() + + labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL",sep=" "), + x="Si+Al", + y="Fe+Mg", + z="Ca+Na+K")+theme(legend.position="right") + + guides(alpha="none") +``` -libs.matrix <- libs.matrix[,c(5,7:14)] -libs.matrix<-libs.matrix[,c(1:2,4:9,3)] +### Line plot -libs.matrix<-cbind("Abrasion"=0,libs.matrix) +Example of how to add the pixl data as extra rows to the LIBS data, so that it can be graphed on the same plot -libs.matrix<-libs.matrix%>% - mutate(Abrasion = ifelse(target%in%Alfalfa,"Alfalfa", - ifelse(target %in% Bellegrade, "Belegrade", - ifelse(target %in% BerryHollow, "BerryHollow", - ifelse(target %in% Dourbes, "Dourbes", - ifelse(target %in% Novarupta, "Novarupta", - ifelse(target %in% Quartier, "Quartier", - ifelse(target %in% ThorntonGap, "ThorntonGap", - ifelse(target %in% UganikIsland, "UganikIsland",Abrasion))))))))) +```{r} +#remove certain features from the combined pixl libs dataset +libs.matrix <- libs.pixl[,c(7,1,11,13:18,12)] +#rename columns libs.matrix<-cbind(libsorpixl=1,libs.matrix) + +colnames(libs.matrix)<-c("libsorpixl","Abrasion","targets+names","SiO2","Al2O3","FeOT","MgO","CaO","Na2O","K2O","TiO2") ``` @@ -262,9 +258,7 @@ pixl.df<-cbind(libsorpixl=0,pixl.df) ``` ```{r} +#rename pixl columns and combine dataframes colnames(pixl.df)<-colnames(libs.matrix) pixllibs.df<-rbind(pixl.df,libs.matrix) ``` - - - diff --git a/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.Rmd b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.Rmd new file mode 100755 index 0000000..bb273e5 --- /dev/null +++ b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.Rmd @@ -0,0 +1,487 @@ +--- +title: "Data Analytics Research Individual Final Project Report" +author: "Margo VanEsselstyn" +date: "`r Sys.Date()`" +output: + html_document: + toc: yes + toc_depth: 3 + toc_float: yes + number_sections: no + theme: united + html_notebook: default + pdf_document: + toc: yes + toc_depth: '3' +--- +# DAR Project and Group Members + +* Project name: Mars +* Project team members: Charlotte Peterson, Doña Roberts, Xuanting Wang, David Walczyk, Charlotte Newman, Dante Mwatibo, Nicolas Morawski, CJ Marino, Aadi Lahiri, Ashton Compton + +# 0.0 Preliminaries. + +This report is generated from an R Markdown file that includes all the R code necessary to produce the results described and embedded in the report. Code blocks can be surpressed from output for readability using the command code `{R, echo=show}` in the code block header. If `show <- FALSE` the code block will be surpressed; if `show <- TRUE` then the code will be show. + +```{r} +# Set to TRUE to expand R code blocks; set to FALSE to collapse R code blocks +show <- TRUE +``` + +Executing this R notebook requires some subset of the following packages: + +* `ggplot2` +* `tidyverse` +* `ggtern` +* `knitr` +* `pheatmap` + +These will be installed and loaded as necessary (code suppressed). + + +```{r, include=FALSE} +# This code will install required packages if they are not already installed +# ALWAYS INSTALL YOUR PACKAGES LIKE THIS! +if (!require("ggplot2")) { + install.packages("ggplot2") + library(ggplot2) +} +if (!require("tidyverse")) { + install.packages("tidyverse") + library(tidyverse) +} +if (!require("ggtern")) { + install.packages("ggtern") + library(ggtern) +} +if(!require("knitr")) { + install.packages("knitr") + library(knitr) +} +if(!require("pheatmap")){ + install.packages("pheatmap") + library(pheatmap) +} +``` + +# 1.0 Project Introduction + +This project outlines my analysis of the Mars LIBS and PIXL data. It largely revolves around data processing and organization + +# 2.0 Organization of Report + +This report is organized as follows: + +* Section 3.0. Finding 1: Here we discuss the LIBS scct targets and the importance of differentiating them in future analysis + +* Section 4.0: Finding 2: Here we discuss the connection between the LIBS and PIXL data + +* Section 5.0 Overall conclusions and suggestions + +# 3.0 Finding 1: Understanding LIBS Targets + +I researched the meaning behind the LIBS target names, and categorized the LIBS data into a few major categories. I created a new Rds file that includes a column labeling each LIBS sample with its category. + +## 3.1 Data, Code, and Resources + +Here is a list data sets, codes, that are used in your work. Along with brief description and URL where they are located. + +1. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds) + +2. libs_typed.Rds is the Rds file containing the LIBS data as well as a type column categorizing each sample [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds) + +3. SupercamCalibrationTargets.pdf is a pdf containing information about the calibration targets used in the LIBS data. [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf) + +4. v1_libs.Rds is the Rds file containing the LIBS data as well as my categorization that Doña put into a standardized format [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds) + + +I used the libs dataset with the standard deviation features, distances, and totals removed. I made sure that certain categories were numeric and added a new "type" column. + +Then, I added a description to each scct (calibration) target in the type column, which names the earth reference based on the pdf linked in this section. For example, the scct target containing "PMIFA0306" was typed "Olivine" + +I also labeled the targets with "aegis" in their names with AEGIS, these samples can be used the same as the other Mars LIBS samples, but it is noted that the measurement is taken using AEGIS, the rover's AI. So instead of the target being chosen intentionally by a scientist, it is chosen by the rover when it has extra resources to take a sample. + +From the analysts notebook, targets with "scam" in their names correspond to targets of other measurements, I went through the analysts notebook for these samples and added the other measurements that were taken at the same target into the type column. For example, the "villeplane_scam" target also had ZCam measurements taken at the same target, so I typed it "ZCAM-SCAM" + +I labeled two targets ("sei_________________" and "naakih______________"), with further descriptions because the analysts notebook had clear descriptions of what the target was intended to be sampling. + +All remaining samples are typed "other". + +```{r} +libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds") + +#Drop the standard deviation features, the sum of the percentages, +#the distance, and the total frequencies +libs.df <- libs.df %>% + select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev, + MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total))) + +# Convert the points to numeric +libs.df$point <- as.numeric(libs.df$point) +typedlibs<-cbind(libs.df[,1:4],"type"=0,libs.df[5:13]) +``` + +```{r} +targetlist<-c("tsrich0404","LCMB0006","LCA530106","PMIFS0505","TAPAG0206","PMIOR0507", + "PMIDN0302","PMIFA0306","PMIAN0106","PMIEN0602","TSERP0102","LBHVO20406", + "LJSC10304","LANKE0101","LSIDE0101","LJMN10106","NTE010301","NTE020106", + "NTE030106","NTE040106","NTE050301","SHERG02","TITANIUM","aegis", + "buzzard_rocks_scam","alfalfa_378_scam","chiniak_565_scam", + "garde_210_scam","guillaumes_168_scam","montpezat_350_scam","naltsos_scam", + "ouzel_falls_792_scam","pollock_knob_501_sca","rose_river_falls_sca", + "roubion_168_scam","villeplane_scam","atmo_mountain_637_sc", + "crosswind_lake_641_s") + +typelist<-c("BHVO-2 basalt and K sulfate mixture","Chert","Calcite","Ferrosilite", + "Fluoro-Chloro-Hydro Apatite","Orthoclase","Diopside","Olivine","Andesine", + "Enstatite","Serpentine/Talc","BHVO-2 standard basalt","Mars soil analog", + "Ankerite","Siderite","JMN-1 standard Mn nodule", + "Basalt dopped in minor elements - Cu, Zn", + "Basalt dopped in minor elements - Mn, Ba, Cr", + "Basalt dopped in minor elements - Zn", + "Basalt dopped in minor elements - Li, Sr", + "Basalt dopped in minor elements - Ni","Shergottite","Titanium","AEGIS", + "PIXL-SCAM","VISIR-Ramanx2-ZCAM-SCAM","AT-SCAM","AT-SCAM","PIXL-SCAM", + "PIXL-SCAM","PIXL-SCAM","AT-SCAM","ZCAM-SCAM","?-SCAM","ZCAM-PIXL-SCAM", + "ZCAM-SCAM","ZCAMMS-SCAM","ZCAM-SCAM") + +targettyped<-as.data.frame(cbind(targetlist,typelist),rownames=c(1)) + +for(i in 1:23){ + typedlibs<-typedlibs %>% + mutate(type = ifelse(grepl(targettyped[i,1],target,ignore.case=T), + targettyped[i,2], type)) +} + +for(i in 24:nrow(targettyped)){ + typedlibs<-typedlibs %>% + mutate(type= ifelse(grepl(targettyped[i,1], target,ignore.case=T) & type=="0", + targettyped[i,2],type)) +} + +typedlibs<-typedlibs %>% + mutate(type=ifelse(type=="0","other",type)) %>% + mutate(type= ifelse(target=="sei_________________", "other - fine soil",type)) %>% + mutate(type= ifelse(target=="naakih______________", "other - coarse soil",type)) + +kable(targettyped) +``` + +## 3.2 Contribution + +This section was sole work, except for Doña's standardization of my file into the v1_libs.Rds at the end. Later, in my ternary diagrams, I used the same seed and number of clusters as Aadi, so that my clustering would match his. + +## 3.3 Methods Description + +Now, we plot the average of each calibration/scct target, against the clustered mars LIBS data. The Mars data is separated from the reference data, we plot the reference data over the Mars data with labeled points corresponding to the table of reference types. + +```{r} +libs.matrix <- as.matrix(libs.df[,6:13]) + +libs.tern <- as.data.frame(libs.matrix) %>% + mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>% + select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,TiO2)) + + +libs.tern<-cbind(libs.tern, "type"=typedlibs$type, "target"=typedlibs$target, + "shape"=typedlibs$type) + +libs.tern<-libs.tern %>% mutate(shape = ifelse(grepl("SCAM", type, ignore.case=T), + "other", shape)) %>% + mutate(shape = ifelse(grepl("other", type, ignore.case=T), + "other", shape)) %>% + mutate(shape = ifelse(grepl("scct", target, ignore.case=T), "scct", shape)) + +libs.tern$shape<-as.factor(libs.tern$shape) +``` + + +This is not specific analysis, moreso a recommendation that for future work, the scct values should be separated from the actual mars data. Previously, we had been analysing these targets as if they were mars data, when in fact they should be treated as reference or calibration data. + +When graphing the reference points on the ternary plot, ggrepel has a conflict with ggtern, so I had to manually add where the labels should go. If this issue with ggtern is fixed in the future, this can be simplified to use ggrepel for the labels. + +## 3.4 Result and Discussion + +```{r} +set.seed(1234) +km<-kmeans(libs.tern[,1:3],4) + +libs.tern<-as.data.frame(cbind(libs.tern,"cluster"=as.factor(km$cluster))) +``` + +```{r} +libs.tern.other<-libs.tern[libs.tern$shape=="other",] +libs.tern.scct<-libs.tern[libs.tern$shape=="scct",] + +#libs.scct.avg<-libs.tern.scct[, lapply(.SD, average), by= target] +libs.scct.avg<-aggregate(cbind(x,y,z) ~ type, data = libs.tern.scct, FUN = "mean") +libs.tern.other<-libs.tern.other[,c(1,2,3,7)] +libs.tern.other<-cbind(libs.tern.other,"type"=0) +libs.scct.avg<-cbind(libs.scct.avg[,2:4],"cluster"=libs.scct.avg$type,"type"=1) +libs.tern<-rbind(libs.scct.avg,libs.tern.other) +``` + +```{r} +libs.tern<-cbind(libs.tern,"num"=rownames(libs.tern),"legend"=0) +``` + +```{r} +libs.tern<-libs.tern %>% + mutate(legend=paste(num,cluster,sep=" ")) +``` + + +```{r} +libstern<-cbind(libs.tern,xend=0,yend=0,zend=0) +libstern<-libstern%>% + mutate(xend= ifelse(type=="1", x,xend)) %>% + mutate(yend= ifelse(type=="1", y,yend)) %>% + mutate(zend= ifelse(type=="1", z,zend)) +``` + +```{r} +for(i in c(1,10,11,13,17)){ + libstern[i,8:10]<-c(libstern[i,1]+0.06,libstern[i,2],libstern[i,3]-0.06) +} + +for(i in c(7)){ + libstern[i,8:10]<-c(libstern[i,1]+0.09,libstern[i,2],libstern[i,3]-0.09) +} + +for(i in c(15,20)){ + libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]+0.05,libstern[i,3]-0.05) +} + +for(i in c(3)){ + libstern[i,8:10]<-c(libstern[i,1]+0.02,libstern[i,2]+0.06,libstern[i,3]-0.08) +} + +for(i in c(14)){ + libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.05,libstern[i,3]+0.05) +} + +for(i in c(4,16,19)){ + libstern[i,8:10]<-c(libstern[i,1]-0.08,libstern[i,2]+0.02,libstern[i,3]+0.06) +} + +for(i in c(2,9)){ + libstern[i,8:10]<-c(libstern[i,1]+0.03,libstern[i,2]-0.05,libstern[i,3]+0.03) +} + +for(i in c(6,8,12,18,22)){ + libstern[i,8:10]<-c(libstern[i,1]-0.06,libstern[i,2],libstern[i,3]+0.06) +} + +for(i in c(5)){ + libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.09,libstern[i,3]+0.09) +} + +for(i in c(21)){ + libstern[i,8:10]<-c(libstern[i,1]-0.03,libstern[i,2]+0.07,libstern[i,3]-0.03) +} +``` + +```{r} +nv = -0.00 #Vertical Adjustment +pn = position_nudge_tern(y=nv,x=-nv,z=nv) + +ggtern(libstern, ggtern::aes(x=x,y=y,z=z)) + + geom_point(data=subset(libstern,type==0),aes(color=cluster),alpha=0.5) + + geom_point(data=subset(libstern,type==1),aes())+ + theme_rgbw() + + labs(title="Mars LIBS Data With Reference Samples Highlighted", + x="Si+Al", + y="Fe+Mg", + z="Ca+Na+K") + + theme(legend.position="bottom") + + geom_text(position=pn,data=subset(libstern,type==1), + aes(x=xend,y=yend,z=zend,label=num),check_overlap=T)+ + geom_segment(aes(x=x,xend = xend, y = y, yend=yend, z=z, zend=zend),size=0.3, + data = subset(libstern,type==1))+ + theme_nomask() +``` +This plot with the table of reference samples below allows us to examine our k-means clusters in the context of the earth reference samples. + +One of the primary results of looking at this graph is that many of the points that seemed to be outliers from the rest of the data are actually calibration targets. There are much fewer points in cluster 3, our smallest and most distinct cluster, when you consider that some of those original points are averaged into the reference points for calcite and Flouro-Chloro-Hydro Apatite. + +Also, it is interesting that much of cluster 2 has no earth references over it. This could indicate that samples like these are not common on earth, or it could indicate that the scientists deciding what earth references to include thought samples like those were unimportant. + +```{r} +kablelibstern<-cbind(point=libs.tern$num,"Description"=libs.tern$cluster) +kablelibstern<-kablelibstern[1:22,] + +kable(kablelibstern) +``` + +In the future, it would be helpful to be able to select only certain references, and to split these references into igneous and sedimentary categories so that when examining a specific igneous or sedimentary sample it is easy to see what references to compare it to. This is especially useful when examining these earth references in conjunction with the PIXL data, which can be plotted on a ternary plot as well. + +# 4.0 Finding 2: Matching LIBS and PIXL Data + +Matching the LIBS and PIXL data using their longitude and latitudes. + +## 4.1 Data, Code, and Resources + +Here is a list data sets, codes, that are used in your work. Along with brief description and URL where they are located. + +1. MatchingLIBSandPIXL.Rmd outlines a lot of the work behind matching the LIBS targets to PIXL samples. [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd) + +2. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds) + +3. samples_pixl_wide.Rds is the Rds file containing all of the PIXL data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds) + +2. pixl_sol_coordinates.Rds contains the pixl data with the coordinates and sol metadata added from the analysts notebook [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds). + +5. PIXL_LIBS_Combined.Rds is the final product of combining the LIBS and PIXL data created by Charlotte and I. [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds) + +6. lahira-finalProjectF24.Rmd is the final notebook of Aadi Lahiri, which we get our earth scaling method from [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd) + +## 4.2 Contribution + +I worked with Charlotte Peterson on this section, we matched the LIBS and PIXL data together. Then I graphed the ternary plot of the LIBS data colored by its closest PIXL sample. The earth scaling technique on my final heatmap was from Aadi Lahiri's notebook. + + +## 4.3 Methods Description + +First, columns with the PIXL coordinates and sol were added to the PIXL data. This information was found from the analyst's notebook. + +The final iteration of combined data is created in the MatchingLIBSandPIXL.Rmd file. The result of that file is the PIXL_LIBS_Combined.Rds, which contains every mars LIBS sample, so the scct/earth reference samples are removed. Each LIBS sample is listed with its closest PIXL sample, ignoring the atmospheric sample, and their distance in meters. + +The scct/earth reference samples were removed because their location has nothing to do with their data, the rover is taking a LIBS measurement of reference materials it carries with it. + +Even though some of the distances are farther than we would consider close or useful, ultimately, we chose not to remove any data points so that in the future people can make their own cutoff of what distance they consider to be significant or relevant. The recommendation we would make is to consider paired PIXL and LIBS samples that are within 7 meters of each other, as that is close to the range where the LIBS laser can reach. + +```{r} +pixllibs<-readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds") +``` + +```{r} +distancetable<-pixllibs[,c(1:4,6,7)] +distancetable<-distinct(distancetable) + +ggplot(data=distancetable, aes(x=Distance, group=PIXL.Abrasion, fill=PIXL.Abrasion)) + + ggtitle("Distances of LIBS Data to Nearest PIXL Abrasion")+ + geom_density(adjust=1.5, alpha=.4) +``` +Three of the PIXL Abrasions have a lot of points that are very close, but many others have data that is very spread. + +## 4.4 Result and Discussion + +Since the work of matching the LIBS and PIXL data has already been done, it is easy to import the combined RDS and convert that data into a dataframe to be used for a ternary diagram + +We can easily change what distance we would like to view + +```{r} +libs.tern <- as.data.frame(pixllibs) %>% + mutate(x=(LIBS.SiO2+LIBS.Al2O3)/100,y=(LIBS.FeOT+LIBS.MgO)/100,z=(LIBS.CaO+LIBS.Na2O+LIBS.K2O)/100) + +libs.tern<-libs.tern[,c(6,7,19,20,21)] +``` + + +```{r} +meters<- 100 + +ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) + + geom_point(data=subset(libs.tern, Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + + theme_rgbw() + + labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "), + x="Si+Al", y="Fe+Mg",z="Ca+Na+K") + + theme(legend.position="right") + + guides(alpha="none",color=guide_legend(title="PIXL Abrasion")) + +meters<- 7 + +ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) + + geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + + theme_rgbw() + + labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "), + x="Si+Al",y="Fe+Mg",z="Ca+Na+K") + + theme(legend.position="right") + + guides(alpha="none",color=guide_legend(title="PIXL Abrasion")) +``` + +We can see here that when interpreting the LIBS data within 7 meters of each PIXL abrasion, the data is not tightly clustered. For the most part, the data is just as spread as the entire data. + +```{r} +libs.heatmap<-pixllibs + +libs.heatmap<-libs.heatmap[libs.heatmap$Distance <= 7, ] + +libs.heatmap.mean<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,LIBS.CaO, + LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "mean") +libs.heatmap.med<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO, + LIBS.CaO,LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "median") + +libs.heatmap.mean<-cbind(libs.heatmap.mean,"x"="mean") +libs.heatmap.mean$PIXL.Abrasion<-as.character(libs.heatmap.mean$PIXL.Abrasion) +libs.heatmap.mean<-libs.heatmap.mean %>% + mutate(x=paste(PIXL.Abrasion,x,sep=" ")) +rownames(libs.heatmap.mean)<-libs.heatmap.mean$x + +libs.heatmap.med<-cbind(libs.heatmap.med,"x"="median") +libs.heatmap.med$PIXL.Abrasion<-as.character(libs.heatmap.med$PIXL.Abrasion) +libs.heatmap.med<-libs.heatmap.med %>% + mutate(x=paste(PIXL.Abrasion,x,sep=" ")) + +rownames(libs.heatmap.med)<-libs.heatmap.med$x + +libs.heatmap<-rbind(libs.heatmap.mean,libs.heatmap.med) +libs.heatmap<-libs.heatmap[c(1,7,2,8,3,9,4,10,5,11,6,12),] + +pheatmap(libs.heatmap[,2:9],scale="column",cluster_rows=F,cluster_cols=F, + main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Column-Scaled") +pheatmap(libs.heatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F, + main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Unscaled") +``` +When looking at the heatmap, we can see more differences between the abrasions than are visible on the ternary plot. + +```{r} +libs_earth <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/LIBS_training_set_quartiles.Rds") +earthheatmap <- libs.heatmap %>% select(c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, + LIBS.CaO, LIBS.Na2O, LIBS.K2O,x)) %>% + rowwise() %>% mutate("Si"= (LIBS.SiO2-libs_earth[3,2])/(libs_earth[4,2] - libs_earth[2,2]), + "Ti"= (LIBS.TiO2-libs_earth[3,3])/(libs_earth[4,3] - libs_earth[2,3]), + "Al"= (LIBS.Al2O3-libs_earth[3,4])/(libs_earth[4,4] - libs_earth[2,4]), + "Fe"= (LIBS.FeOT-libs_earth[3,5])/(libs_earth[4,5] - libs_earth[2,5]), + "Mg"= (LIBS.MgO-libs_earth[3,6])/(libs_earth[4,6] - libs_earth[2,6]), + "Ca"= (LIBS.CaO-libs_earth[3,7])/(libs_earth[4,7] - libs_earth[2,7]), + "Na"= (LIBS.Na2O-libs_earth[3,8])/(libs_earth[4,8] - libs_earth[2,8]), + "K"= (LIBS.K2O-libs_earth[3,9])/(libs_earth[4,9] - libs_earth[2,9])) %>% + select(!c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, LIBS.CaO, LIBS.Na2O, LIBS.K2O)) + +earthheatmap<-as.matrix(earthheatmap) +earthheatmap<-as.data.frame(earthheatmap) + +rownames(earthheatmap)<-earthheatmap$x + +#earthheatmap<-earthheatmap[,2:9] +earthheatmap[,2:9]<-sapply(earthheatmap[,2:9],as.numeric) + +pheatmap(earthheatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F, + main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Earth Scaled") +``` +Using Aadi's earth scaling technique, we can compare our two earlier heatmaps with an earth scaled heatmap. The main consistency between these heatmaps is high variation in the Mg or MgO columns. + +## 5.5 Conclusions, Limitations, and Future Work. + +More analysis of the LIBS data grouped by PIXL needs to be done. I think a Principle Component analysis could be interesting to help us see what factors break up the different groups. I also think a similarity analysis within the groups could be interesting. + +One potential issue with the combined LIBS and PIXL dataset, is that the LIBS data has some duplicate points with different latitude and longitudes. For example, the LIBS target "aegis_0907a_________" contains duplicate points with the same data with differing longitude and latitude values. This could indicate that there is more error than we think with the latitude and longitude of LIBS measurements. We are not 100% sure what the LIBS latitude and longitude is referring to (the laser or the rover), but this could indicate we understand it even less. This notebook runs on the assumption that the latitude and longitude refers to the location of the rover. + +# Bibliography +Provide a listing of references and other sources. + +* [Cousin21] Cousin, A., Sautter, V., Fabre, C., Dromart, G., Montagnac, G., Drouet, C., Meslin, P. Y., Gasnault, O., Beyssac, O., Bernard, S., Cloutis, E., Forni, O., Beck, P., Fouchet, T., Johnson, J. R., Lasue, J., Ollila, A. M., De Parseval, P., Gouy, S., & Caron, B. (2021). SuperCam calibration targets on board the perseverance rover: Fabrication and quantitative characterization. Spectrochimica Acta Part B: Atomic Spectroscopy, 106341. https://doi.org/10.1016/j.sab.2021.106341 + +* [Hamilton18] Hamilton NE, Ferry M (2018). “ggtern: Ternary Diagrams Using ggplot2.” _Journal of Statistical Software, Code Snippets_, *87*(3), + 1-17. doi:10.18637/jss.v087.c03 + +* [Hijmans24] Hijmans R (2024). _geosphere: Spherical Trigonometry_. R package version 1.5-20, + +```{r} +#citation("geosphere") +#citation("ggtern") +``` + + + diff --git a/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.html b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.html new file mode 100644 index 0000000..63a52a9 --- /dev/null +++ b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.html @@ -0,0 +1,2330 @@ + + + + + + + + + + + + + + + +Data Analytics Research Individual Final Project Report + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
+

DAR Project and Group Members

+
    +
  • Project name: Mars
  • +
  • Project team members: Charlotte Peterson, Doña Roberts, Xuanting +Wang, David Walczyk, Charlotte Newman, Dante Mwatibo, Nicolas Morawski, +CJ Marino, Aadi Lahiri, Ashton Compton
  • +
+
+
+

0.0 Preliminaries.

+

This report is generated from an R Markdown file that includes all +the R code necessary to produce the results described and embedded in +the report. Code blocks can be surpressed from output for readability +using the command code {R, echo=show} in the code block +header. If show <- FALSE the code block will be +surpressed; if show <- TRUE then the code will be +show.

+
# Set to TRUE to expand R code blocks; set to FALSE to collapse R code blocks 
+show <- TRUE
+

Executing this R notebook requires some subset of the following +packages:

+
    +
  • ggplot2
  • +
  • tidyverse
  • +
  • ggtern
  • +
  • knitr
  • +
  • pheatmap
  • +
+

These will be installed and loaded as necessary (code +suppressed).

+ +
+
+

1.0 Project Introduction

+

This project outlines my analysis of the Mars LIBS and PIXL data. It +largely revolves around data processing and organization

+
+
+

2.0 Organization of Report

+

This report is organized as follows:

+
    +
  • Section 3.0. Finding 1: Here we discuss the LIBS scct targets and +the importance of differentiating them in future analysis

  • +
  • Section 4.0: Finding 2: Here we discuss the connection between +the LIBS and PIXL data

  • +
  • Section 5.0 Overall conclusions and suggestions

  • +
+
+
+

3.0 Finding 1: Understanding LIBS Targets

+

I researched the meaning behind the LIBS target names, and +categorized the LIBS data into a few major categories. I created a new +Rds file that includes a column labeling each LIBS sample with its +category.

+
+

3.1 Data, Code, and Resources

+

Here is a list data sets, codes, that are used in your work. Along +with brief description and URL where they are located.

+
    +
  1. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  2. +
  3. libs_typed.Rds is the Rds file containing the LIBS data as well +as a type column categorizing each sample https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds

  4. +
  5. SupercamCalibrationTargets.pdf is a pdf containing information +about the calibration targets used in the LIBS data. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf

  6. +
  7. v1_libs.Rds is the Rds file containing the LIBS data as well as +my categorization that Doña put into a standardized format https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  8. +
+

I used the libs dataset with the standard deviation features, +distances, and totals removed. I made sure that certain categories were +numeric and added a new “type” column.

+

Then, I added a description to each scct (calibration) target in the +type column, which names the earth reference based on the pdf linked in +this section. For example, the scct target containing “PMIFA0306” was +typed “Olivine”

+

I also labeled the targets with “aegis” in their names with AEGIS, +these samples can be used the same as the other Mars LIBS samples, but +it is noted that the measurement is taken using AEGIS, the rover’s AI. +So instead of the target being chosen intentionally by a scientist, it +is chosen by the rover when it has extra resources to take a sample.

+

From the analysts notebook, targets with “scam” in their names +correspond to targets of other measurements, I went through the analysts +notebook for these samples and added the other measurements that were +taken at the same target into the type column. For example, the +“villeplane_scam” target also had ZCam measurements taken at the same +target, so I typed it “ZCAM-SCAM”

+

I labeled two targets (“sei_________________” and +“naakih______________”), with further descriptions because the analysts +notebook had clear descriptions of what the target was intended to be +sampling.

+

All remaining samples are typed “other”.

+
libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
+
+#Drop the standard deviation features, the sum of the percentages, 
+#the distance, and the total frequencies
+libs.df <- libs.df %>% 
+  select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev,
+             MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total)))
+
+# Convert the points to numeric
+libs.df$point <- as.numeric(libs.df$point)
+typedlibs<-cbind(libs.df[,1:4],"type"=0,libs.df[5:13])
+
targetlist<-c("tsrich0404","LCMB0006","LCA530106","PMIFS0505","TAPAG0206","PMIOR0507",
+              "PMIDN0302","PMIFA0306","PMIAN0106","PMIEN0602","TSERP0102","LBHVO20406",
+              "LJSC10304","LANKE0101","LSIDE0101","LJMN10106","NTE010301","NTE020106",
+              "NTE030106","NTE040106","NTE050301","SHERG02","TITANIUM","aegis",
+              "buzzard_rocks_scam","alfalfa_378_scam","chiniak_565_scam",
+              "garde_210_scam","guillaumes_168_scam","montpezat_350_scam","naltsos_scam",
+              "ouzel_falls_792_scam","pollock_knob_501_sca","rose_river_falls_sca",
+              "roubion_168_scam","villeplane_scam","atmo_mountain_637_sc",
+              "crosswind_lake_641_s")
+
+typelist<-c("BHVO-2 basalt and K sulfate mixture","Chert","Calcite","Ferrosilite",
+            "Fluoro-Chloro-Hydro Apatite","Orthoclase","Diopside","Olivine","Andesine",
+            "Enstatite","Serpentine/Talc","BHVO-2 standard basalt","Mars soil analog",
+            "Ankerite","Siderite","JMN-1 standard Mn nodule",
+            "Basalt dopped in minor elements - Cu, Zn",
+            "Basalt dopped in minor elements - Mn, Ba, Cr",
+            "Basalt dopped in minor elements - Zn",
+            "Basalt dopped in minor elements - Li, Sr",
+            "Basalt dopped in minor elements - Ni","Shergottite","Titanium","AEGIS",
+            "PIXL-SCAM","VISIR-Ramanx2-ZCAM-SCAM","AT-SCAM","AT-SCAM","PIXL-SCAM",
+            "PIXL-SCAM","PIXL-SCAM","AT-SCAM","ZCAM-SCAM","?-SCAM","ZCAM-PIXL-SCAM",
+            "ZCAM-SCAM","ZCAMMS-SCAM","ZCAM-SCAM")
+
+targettyped<-as.data.frame(cbind(targetlist,typelist),rownames=c(1))
+
+for(i in 1:23){
+  typedlibs<-typedlibs %>%
+    mutate(type = ifelse(grepl(targettyped[i,1],target,ignore.case=T),
+                       targettyped[i,2], type))
+}
+
+for(i in 24:nrow(targettyped)){
+  typedlibs<-typedlibs %>%
+    mutate(type= ifelse(grepl(targettyped[i,1], target,ignore.case=T) & type=="0",
+                      targettyped[i,2],type))
+}
+
+typedlibs<-typedlibs %>%
+  mutate(type=ifelse(type=="0","other",type)) %>%
+  mutate(type= ifelse(target=="sei_________________", "other - fine soil",type)) %>%
+  mutate(type= ifelse(target=="naakih______________", "other - coarse soil",type))
+
+kable(targettyped)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
targetlisttypelist
tsrich0404BHVO-2 basalt and K sulfate mixture
LCMB0006Chert
LCA530106Calcite
PMIFS0505Ferrosilite
TAPAG0206Fluoro-Chloro-Hydro Apatite
PMIOR0507Orthoclase
PMIDN0302Diopside
PMIFA0306Olivine
PMIAN0106Andesine
PMIEN0602Enstatite
TSERP0102Serpentine/Talc
LBHVO20406BHVO-2 standard basalt
LJSC10304Mars soil analog
LANKE0101Ankerite
LSIDE0101Siderite
LJMN10106JMN-1 standard Mn nodule
NTE010301Basalt dopped in minor elements - Cu, Zn
NTE020106Basalt dopped in minor elements - Mn, Ba, Cr
NTE030106Basalt dopped in minor elements - Zn
NTE040106Basalt dopped in minor elements - Li, Sr
NTE050301Basalt dopped in minor elements - Ni
SHERG02Shergottite
TITANIUMTitanium
aegisAEGIS
buzzard_rocks_scamPIXL-SCAM
alfalfa_378_scamVISIR-Ramanx2-ZCAM-SCAM
chiniak_565_scamAT-SCAM
garde_210_scamAT-SCAM
guillaumes_168_scamPIXL-SCAM
montpezat_350_scamPIXL-SCAM
naltsos_scamPIXL-SCAM
ouzel_falls_792_scamAT-SCAM
pollock_knob_501_scaZCAM-SCAM
rose_river_falls_sca?-SCAM
roubion_168_scamZCAM-PIXL-SCAM
villeplane_scamZCAM-SCAM
atmo_mountain_637_scZCAMMS-SCAM
crosswind_lake_641_sZCAM-SCAM
+
+
+

3.2 Contribution

+

This section was sole work, except for Doña’s standardization of my +file into the v1_libs.Rds at the end. Later, in my ternary diagrams, I +used the same seed and number of clusters as Aadi, so that my clustering +would match his.

+
+
+

3.3 Methods Description

+

Now, we plot the average of each calibration/scct target, against the +clustered mars LIBS data. The Mars data is separated from the reference +data, we plot the reference data over the Mars data with labeled points +corresponding to the table of reference types.

+
libs.matrix <- as.matrix(libs.df[,6:13]) 
+
+libs.tern <- as.data.frame(libs.matrix) %>%
+  mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>%
+  select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,TiO2))
+
+
+libs.tern<-cbind(libs.tern, "type"=typedlibs$type, "target"=typedlibs$target, 
+                 "shape"=typedlibs$type)
+
+libs.tern<-libs.tern %>% mutate(shape = ifelse(grepl("SCAM", type, ignore.case=T),
+                       "other", shape)) %>%
+  mutate(shape = ifelse(grepl("other", type, ignore.case=T),
+                       "other", shape)) %>%
+  mutate(shape = ifelse(grepl("scct", target, ignore.case=T), "scct", shape))
+
+libs.tern$shape<-as.factor(libs.tern$shape)
+

This is not specific analysis, moreso a recommendation that for +future work, the scct values should be separated from the actual mars +data. Previously, we had been analysing these targets as if they were +mars data, when in fact they should be treated as reference or +calibration data.

+

When graphing the reference points on the ternary plot, ggrepel has a +conflict with ggtern, so I had to manually add where the labels should +go. If this issue with ggtern is fixed in the future, this can be +simplified to use ggrepel for the labels.

+
+
+

3.4 Result and Discussion

+
set.seed(1234)
+km<-kmeans(libs.tern[,1:3],4)
+
+libs.tern<-as.data.frame(cbind(libs.tern,"cluster"=as.factor(km$cluster)))
+
libs.tern.other<-libs.tern[libs.tern$shape=="other",]
+libs.tern.scct<-libs.tern[libs.tern$shape=="scct",]
+
+#libs.scct.avg<-libs.tern.scct[, lapply(.SD, average), by= target]
+libs.scct.avg<-aggregate(cbind(x,y,z) ~ type, data = libs.tern.scct, FUN = "mean")
+libs.tern.other<-libs.tern.other[,c(1,2,3,7)]
+libs.tern.other<-cbind(libs.tern.other,"type"=0)
+libs.scct.avg<-cbind(libs.scct.avg[,2:4],"cluster"=libs.scct.avg$type,"type"=1)
+libs.tern<-rbind(libs.scct.avg,libs.tern.other)
+
libs.tern<-cbind(libs.tern,"num"=rownames(libs.tern),"legend"=0)
+
libs.tern<-libs.tern %>% 
+  mutate(legend=paste(num,cluster,sep="  "))
+
libstern<-cbind(libs.tern,xend=0,yend=0,zend=0)
+libstern<-libstern%>% 
+  mutate(xend= ifelse(type=="1", x,xend)) %>%
+  mutate(yend= ifelse(type=="1", y,yend)) %>%
+  mutate(zend= ifelse(type=="1", z,zend))
+
for(i in c(1,10,11,13,17)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.06,libstern[i,2],libstern[i,3]-0.06)
+}
+
+for(i in c(7)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.09,libstern[i,2],libstern[i,3]-0.09)
+}
+
+for(i in c(15,20)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]+0.05,libstern[i,3]-0.05)
+}
+
+for(i in c(3)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.02,libstern[i,2]+0.06,libstern[i,3]-0.08)
+}
+
+for(i in c(14)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.05,libstern[i,3]+0.05)
+}
+
+for(i in c(4,16,19)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.08,libstern[i,2]+0.02,libstern[i,3]+0.06)
+}
+
+for(i in c(2,9)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.03,libstern[i,2]-0.05,libstern[i,3]+0.03)
+}
+
+for(i in c(6,8,12,18,22)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.06,libstern[i,2],libstern[i,3]+0.06)
+}
+
+for(i in c(5)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.09,libstern[i,3]+0.09)
+}
+
+for(i in c(21)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.03,libstern[i,2]+0.07,libstern[i,3]-0.03)
+}
+
nv = -0.00  #Vertical Adjustment
+pn = position_nudge_tern(y=nv,x=-nv,z=nv)
+
+ggtern(libstern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libstern,type==0),aes(color=cluster),alpha=0.5) + 
+  geom_point(data=subset(libstern,type==1),aes())+
+  theme_rgbw() + 
+  labs(title="Mars LIBS Data With Reference Samples Highlighted",
+       x="Si+Al",
+       y="Fe+Mg",
+       z="Ca+Na+K") + 
+  theme(legend.position="bottom") +
+  geom_text(position=pn,data=subset(libstern,type==1),
+            aes(x=xend,y=yend,z=zend,label=num),check_overlap=T)+
+  geom_segment(aes(x=x,xend = xend, y = y, yend=yend, z=z, zend=zend),size=0.3,
+               data = subset(libstern,type==1))+
+  theme_nomask()
+
## Warning in geom_text(position = pn, data = subset(libstern, type == 1), :
+## Ignoring unknown aesthetics: z
+
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
+## ℹ Please use `linewidth` instead.
+## This warning is displayed once every 8 hours.
+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
+## generated.
+
## Warning in geom_segment(aes(x = x, xend = xend, y = y, yend = yend, z = z, :
+## Ignoring unknown aesthetics: z and zend
+
## Warning in acomp(data[, self$required_aes]): Negative values in composition are
+## used as detection limits
+

+This plot with the table of reference samples below allows us to examine +our k-means clusters in the context of the earth reference samples.

+

One of the primary results of looking at this graph is that many of +the points that seemed to be outliers from the rest of the data are +actually calibration targets. There are much fewer points in cluster 3, +our smallest and most distinct cluster, when you consider that some of +those original points are averaged into the reference points for calcite +and Flouro-Chloro-Hydro Apatite.

+

Also, it is interesting that much of cluster 2 has no earth +references over it. This could indicate that samples like these are not +common on earth, or it could indicate that the scientists deciding what +earth references to include thought samples like those were +unimportant.

+
kablelibstern<-cbind(point=libs.tern$num,"Description"=libs.tern$cluster)
+kablelibstern<-kablelibstern[1:22,]
+
+kable(kablelibstern)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pointDescription
1Andesine
2Ankerite
3Basalt dopped in minor elements - Cu, Zn
4Basalt dopped in minor elements - Li, Sr
5Basalt dopped in minor elements - Mn, Ba, Cr
6Basalt dopped in minor elements - Ni
7Basalt dopped in minor elements - Zn
8BHVO-2 basalt and K sulfate mixture
9BHVO-2 standard basalt
10Calcite
11Chert
12Diopside
13Enstatite
14Ferrosilite
15Fluoro-Chloro-Hydro Apatite
16JMN-1 standard Mn nodule
17Mars soil analog
18Olivine
19Orthoclase
20Serpentine/Talc
21Shergottite
22Siderite
+

In the future, it would be helpful to be able to select only certain +references, and to split these references into igneous and sedimentary +categories so that when examining a specific igneous or sedimentary +sample it is easy to see what references to compare it to. This is +especially useful when examining these earth references in conjunction +with the PIXL data, which can be plotted on a ternary plot as well.

+
+
+
+

4.0 Finding 2: Matching LIBS and PIXL Data

+

Matching the LIBS and PIXL data using their longitude and +latitudes.

+
+

4.1 Data, Code, and Resources

+

Here is a list data sets, codes, that are used in your work. Along +with brief description and URL where they are located.

+
    +
  1. MatchingLIBSandPIXL.Rmd outlines a lot of the work behind +matching the LIBS targets to PIXL samples. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd

  2. +
  3. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  4. +
  5. samples_pixl_wide.Rds is the Rds file containing all of the PIXL +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds

  6. +
  7. pixl_sol_coordinates.Rds contains the pixl data with the +coordinates and sol metadata added from the analysts notebook https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds.

  8. +
  9. PIXL_LIBS_Combined.Rds is the final product of combining the LIBS +and PIXL data created by Charlotte and I. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds

  10. +
  11. lahira-finalProjectF24.Rmd is the final notebook of Aadi Lahiri, +which we get our earth scaling method from https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd

  12. +
+
+
+

4.2 Contribution

+

I worked with Charlotte Peterson on this section, we matched the LIBS +and PIXL data together. Then I graphed the ternary plot of the LIBS data +colored by its closest PIXL sample. The earth scaling technique on my +final heatmap was from Aadi Lahiri’s notebook.

+
+
+

4.3 Methods Description

+

First, columns with the PIXL coordinates and sol were added to the +PIXL data. This information was found from the analyst’s notebook.

+

The final iteration of combined data is created in the +MatchingLIBSandPIXL.Rmd file. The result of that file is the +PIXL_LIBS_Combined.Rds, which contains every mars LIBS sample, so the +scct/earth reference samples are removed. Each LIBS sample is listed +with its closest PIXL sample, ignoring the atmospheric sample, and their +distance in meters.

+

The scct/earth reference samples were removed because their location +has nothing to do with their data, the rover is taking a LIBS +measurement of reference materials it carries with it.

+

Even though some of the distances are farther than we would consider +close or useful, ultimately, we chose not to remove any data points so +that in the future people can make their own cutoff of what distance +they consider to be significant or relevant. The recommendation we would +make is to consider paired PIXL and LIBS samples that are within 7 +meters of each other, as that is close to the range where the LIBS laser +can reach.

+
pixllibs<-readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds")
+
distancetable<-pixllibs[,c(1:4,6,7)]
+distancetable<-distinct(distancetable)
+
+ggplot(data=distancetable, aes(x=Distance, group=PIXL.Abrasion, fill=PIXL.Abrasion)) +
+  ggtitle("Distances of LIBS Data to Nearest PIXL Abrasion")+
+    geom_density(adjust=1.5, alpha=.4)
+

+Three of the PIXL Abrasions have a lot of points that are very close, +but many others have data that is very spread.

+
+
+

4.4 Result and Discussion

+

Since the work of matching the LIBS and PIXL data has already been +done, it is easy to import the combined RDS and convert that data into a +dataframe to be used for a ternary diagram

+

We can easily change what distance we would like to view

+
libs.tern <- as.data.frame(pixllibs) %>%
+  mutate(x=(LIBS.SiO2+LIBS.Al2O3)/100,y=(LIBS.FeOT+LIBS.MgO)/100,z=(LIBS.CaO+LIBS.Na2O+LIBS.K2O)/100)
+
+libs.tern<-libs.tern[,c(6,7,19,20,21)]
+
meters<- 100
+
+ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libs.tern, Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
+  theme_rgbw() + 
+  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
+       x="Si+Al", y="Fe+Mg",z="Ca+Na+K") +
+  theme(legend.position="right") + 
+  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))
+

+
meters<- 7
+
+ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
+  theme_rgbw() + 
+  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
+       x="Si+Al",y="Fe+Mg",z="Ca+Na+K") +
+  theme(legend.position="right") + 
+  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))
+

+

We can see here that when interpreting the LIBS data within 7 meters +of each PIXL abrasion, the data is not tightly clustered. For the most +part, the data is just as spread as the entire data.

+
libs.heatmap<-pixllibs
+
+libs.heatmap<-libs.heatmap[libs.heatmap$Distance <= 7, ]
+
+libs.heatmap.mean<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,LIBS.CaO,
+    LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "mean")
+libs.heatmap.med<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,
+    LIBS.CaO,LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "median")
+
+libs.heatmap.mean<-cbind(libs.heatmap.mean,"x"="mean")
+libs.heatmap.mean$PIXL.Abrasion<-as.character(libs.heatmap.mean$PIXL.Abrasion)
+libs.heatmap.mean<-libs.heatmap.mean %>%
+  mutate(x=paste(PIXL.Abrasion,x,sep=" "))
+rownames(libs.heatmap.mean)<-libs.heatmap.mean$x
+
+libs.heatmap.med<-cbind(libs.heatmap.med,"x"="median")
+libs.heatmap.med$PIXL.Abrasion<-as.character(libs.heatmap.med$PIXL.Abrasion)
+libs.heatmap.med<-libs.heatmap.med %>%
+  mutate(x=paste(PIXL.Abrasion,x,sep=" "))
+
+rownames(libs.heatmap.med)<-libs.heatmap.med$x
+
+libs.heatmap<-rbind(libs.heatmap.mean,libs.heatmap.med)
+libs.heatmap<-libs.heatmap[c(1,7,2,8,3,9,4,10,5,11,6,12),]
+
+pheatmap(libs.heatmap[,2:9],scale="column",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Column-Scaled")
+

+
pheatmap(libs.heatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Unscaled")
+

+When looking at the heatmap, we can see more differences between the +abrasions than are visible on the ternary plot.

+
libs_earth <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/LIBS_training_set_quartiles.Rds")
+earthheatmap <- libs.heatmap %>% select(c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, 
+    LIBS.CaO, LIBS.Na2O, LIBS.K2O,x)) %>% 
+  rowwise() %>% mutate("Si"= (LIBS.SiO2-libs_earth[3,2])/(libs_earth[4,2] - libs_earth[2,2]), 
+                       "Ti"= (LIBS.TiO2-libs_earth[3,3])/(libs_earth[4,3] - libs_earth[2,3]),
+                       "Al"= (LIBS.Al2O3-libs_earth[3,4])/(libs_earth[4,4] - libs_earth[2,4]),
+                       "Fe"= (LIBS.FeOT-libs_earth[3,5])/(libs_earth[4,5] - libs_earth[2,5]),
+                       "Mg"= (LIBS.MgO-libs_earth[3,6])/(libs_earth[4,6] - libs_earth[2,6]),
+                       "Ca"= (LIBS.CaO-libs_earth[3,7])/(libs_earth[4,7] - libs_earth[2,7]),
+                       "Na"= (LIBS.Na2O-libs_earth[3,8])/(libs_earth[4,8] - libs_earth[2,8]),
+                       "K"= (LIBS.K2O-libs_earth[3,9])/(libs_earth[4,9] - libs_earth[2,9])) %>%
+  select(!c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, LIBS.CaO, LIBS.Na2O, LIBS.K2O))
+
+earthheatmap<-as.matrix(earthheatmap)
+earthheatmap<-as.data.frame(earthheatmap)
+
+rownames(earthheatmap)<-earthheatmap$x
+
+#earthheatmap<-earthheatmap[,2:9]
+earthheatmap[,2:9]<-sapply(earthheatmap[,2:9],as.numeric)
+
+pheatmap(earthheatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Earth Scaled")
+

+Using Aadi’s earth scaling technique, we can compare our two earlier +heatmaps with an earth scaled heatmap. The main consistency between +these heatmaps is high variation in the Mg or MgO columns.

+
+
+

5.5 Conclusions, Limitations, and Future Work.

+

More analysis of the LIBS data grouped by PIXL needs to be done. I +think a Principle Component analysis could be interesting to help us see +what factors break up the different groups. I also think a similarity +analysis within the groups could be interesting.

+

One potential issue with the combined LIBS and PIXL dataset, is that +the LIBS data has some duplicate points with different latitude and +longitudes. For example, the LIBS target “aegis_0907a_________” contains +duplicate points with the same data with differing longitude and +latitude values. This could indicate that there is more error than we +think with the latitude and longitude of LIBS measurements. We are not +100% sure what the LIBS latitude and longitude is referring to (the +laser or the rover), but this could indicate we understand it even less. +This notebook runs on the assumption that the latitude and longitude +refers to the location of the rover.

+
+
+
+

Bibliography

+

Provide a listing of references and other sources.

+
    +
  • [Cousin21] Cousin, A., Sautter, V., Fabre, C., Dromart, G., +Montagnac, G., Drouet, C., Meslin, P. Y., Gasnault, O., Beyssac, O., +Bernard, S., Cloutis, E., Forni, O., Beck, P., Fouchet, T., Johnson, J. +R., Lasue, J., Ollila, A. M., De Parseval, P., Gouy, S., & Caron, B. +(2021). SuperCam calibration targets on board the perseverance rover: +Fabrication and quantitative characterization. Spectrochimica Acta Part +B: Atomic Spectroscopy, 106341. https://doi.org/10.1016/j.sab.2021.106341

  • +
  • [Hamilton18] Hamilton NE, Ferry M (2018). “ggtern: Ternary +Diagrams Using ggplot2.” Journal of Statistical Software, Code +Snippets, 87(3), 1-17. doi:10.18637/jss.v087.c03 https://doi.org/10.18637/jss.v087.c03

  • +
  • [Hijmans24] Hijmans R (2024). geosphere: Spherical +Trigonometry. R package version 1.5-20, https://CRAN.R-project.org/package=geosphere

  • +
+
#citation("geosphere")
+#citation("ggtern")
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.nb.html b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.nb.html new file mode 100644 index 0000000..a5f7e3d --- /dev/null +++ b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.nb.html @@ -0,0 +1,2609 @@ + + + + + + + + + + + + + + + +Data Analytics Research Individual Final Project Report + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + +
+

DAR Project and Group Members

+
    +
  • Project name: Mars
  • +
  • Project team members: Charlotte Peterson, Doña Roberts, Xuanting +Wang, David Walczyk, Charlotte Newman, Dante Mwatibo, Nicolas Morawski, +CJ Marino, Aadi Lahiri, Ashton Compton
  • +
+
+
+

0.0 Preliminaries.

+

This report is generated from an R Markdown file that includes all +the R code necessary to produce the results described and embedded in +the report. Code blocks can be surpressed from output for readability +using the command code {R, echo=show} in the code block +header. If show <- FALSE the code block will be +surpressed; if show <- TRUE then the code will be +show.

+ + + +
# Set to TRUE to expand R code blocks; set to FALSE to collapse R code blocks 
+show <- TRUE
+ + + +

Executing this R notebook requires some subset of the following +packages:

+
    +
  • ggplot2
  • +
  • tidyverse
  • +
  • ggtern
  • +
  • knitr
  • +
  • pheatmap
  • +
+

These will be installed and loaded as necessary (code +suppressed).

+ + + +
+
+

1.0 Project Introduction

+

This project outlines my analysis of the Mars LIBS and PIXL data. It +largely revolves around data processing and organization

+
+
+

2.0 Organization of Report

+

This report is organized as follows:

+
    +
  • Section 3.0. Finding 1: Here we discuss the LIBS scct targets and +the importance of differentiating them in future analysis

  • +
  • Section 4.0: Finding 2: Here we discuss the connection between +the LIBS and PIXL data

  • +
  • Section 5.0 Overall conclusions and suggestions

  • +
+
+
+

3.0 Finding 1: Understanding LIBS Targets

+

I researched the meaning behind the LIBS target names, and +categorized the LIBS data into a few major categories. I created a new +Rds file that includes a column labeling each LIBS sample with its +category.

+
+

3.1 Data, Code, and Resources

+

Here is a list data sets, codes, that are used in your work. Along +with brief description and URL where they are located.

+
    +
  1. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  2. +
  3. libs_typed.Rds is the Rds file containing the LIBS data as well +as a type column categorizing each sample https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds

  4. +
  5. SupercamCalibrationTargets.pdf is a pdf containing information +about the calibration targets used in the LIBS data. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf

  6. +
  7. v1_libs.Rds is the Rds file containing the LIBS data as well as +my categorization that Doña put into a standardized format https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  8. +
+

I used the libs dataset with the standard deviation features, +distances, and totals removed. I made sure that certain categories were +numeric and added a new “type” column.

+

Then, I added a description to each scct (calibration) target in the +type column, which names the earth reference based on the pdf linked in +this section. For example, the scct target containing “PMIFA0306” was +typed “Olivine”

+

I also labeled the targets with “aegis” in their names with AEGIS, +these samples can be used the same as the other Mars LIBS samples, but +it is noted that the measurement is taken using AEGIS, the rover’s AI. +So instead of the target being chosen intentionally by a scientist, it +is chosen by the rover when it has extra resources to take a sample.

+

From the analysts notebook, targets with “scam” in their names +correspond to targets of other measurements, I went through the analysts +notebook for these samples and added the other measurements that were +taken at the same target into the type column. For example, the +“villeplane_scam” target also had ZCam measurements taken at the same +target, so I typed it “ZCAM-SCAM”

+

I labeled two targets (“sei_________________” and +“naakih______________”), with further descriptions because the analysts +notebook had clear descriptions of what the target was intended to be +sampling.

+

All remaining samples are typed “other”.

+ + + +
libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
+
+#Drop the standard deviation features, the sum of the percentages, 
+#the distance, and the total frequencies
+libs.df <- libs.df %>% 
+  select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev,
+             MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total)))
+
+# Convert the points to numeric
+libs.df$point <- as.numeric(libs.df$point)
+typedlibs<-cbind(libs.df[,1:4],"type"=0,libs.df[5:13])
+ + + + + + +
targetlist<-c("tsrich0404","LCMB0006","LCA530106","PMIFS0505","TAPAG0206","PMIOR0507",
+              "PMIDN0302","PMIFA0306","PMIAN0106","PMIEN0602","TSERP0102","LBHVO20406",
+              "LJSC10304","LANKE0101","LSIDE0101","LJMN10106","NTE010301","NTE020106",
+              "NTE030106","NTE040106","NTE050301","SHERG02","TITANIUM","aegis",
+              "buzzard_rocks_scam","alfalfa_378_scam","chiniak_565_scam",
+              "garde_210_scam","guillaumes_168_scam","montpezat_350_scam","naltsos_scam",
+              "ouzel_falls_792_scam","pollock_knob_501_sca","rose_river_falls_sca",
+              "roubion_168_scam","villeplane_scam","atmo_mountain_637_sc",
+              "crosswind_lake_641_s")
+
+typelist<-c("BHVO-2 basalt and K sulfate mixture","Chert","Calcite","Ferrosilite",
+            "Fluoro-Chloro-Hydro Apatite","Orthoclase","Diopside","Olivine","Andesine",
+            "Enstatite","Serpentine/Talc","BHVO-2 standard basalt","Mars soil analog",
+            "Ankerite","Siderite","JMN-1 standard Mn nodule",
+            "Basalt dopped in minor elements - Cu, Zn",
+            "Basalt dopped in minor elements - Mn, Ba, Cr",
+            "Basalt dopped in minor elements - Zn",
+            "Basalt dopped in minor elements - Li, Sr",
+            "Basalt dopped in minor elements - Ni","Shergottite","Titanium","AEGIS",
+            "PIXL-SCAM","VISIR-Ramanx2-ZCAM-SCAM","AT-SCAM","AT-SCAM","PIXL-SCAM",
+            "PIXL-SCAM","PIXL-SCAM","AT-SCAM","ZCAM-SCAM","?-SCAM","ZCAM-PIXL-SCAM",
+            "ZCAM-SCAM","ZCAMMS-SCAM","ZCAM-SCAM")
+
+targettyped<-as.data.frame(cbind(targetlist,typelist),rownames=c(1))
+
+for(i in 1:23){
+  typedlibs<-typedlibs %>%
+    mutate(type = ifelse(grepl(targettyped[i,1],target,ignore.case=T),
+                       targettyped[i,2], type))
+}
+
+for(i in 24:nrow(targettyped)){
+  typedlibs<-typedlibs %>%
+    mutate(type= ifelse(grepl(targettyped[i,1], target,ignore.case=T) & type=="0",
+                      targettyped[i,2],type))
+}
+
+typedlibs<-typedlibs %>%
+  mutate(type=ifelse(type=="0","other",type)) %>%
+  mutate(type= ifelse(target=="sei_________________", "other - fine soil",type)) %>%
+  mutate(type= ifelse(target=="naakih______________", "other - coarse soil",type))
+
+kable(targettyped)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
targetlisttypelist
tsrich0404BHVO-2 basalt and K sulfate mixture
LCMB0006Chert
LCA530106Calcite
PMIFS0505Ferrosilite
TAPAG0206Fluoro-Chloro-Hydro Apatite
PMIOR0507Orthoclase
PMIDN0302Diopside
PMIFA0306Olivine
PMIAN0106Andesine
PMIEN0602Enstatite
TSERP0102Serpentine/Talc
LBHVO20406BHVO-2 standard basalt
LJSC10304Mars soil analog
LANKE0101Ankerite
LSIDE0101Siderite
LJMN10106JMN-1 standard Mn nodule
NTE010301Basalt dopped in minor elements - Cu, Zn
NTE020106Basalt dopped in minor elements - Mn, Ba, Cr
NTE030106Basalt dopped in minor elements - Zn
NTE040106Basalt dopped in minor elements - Li, Sr
NTE050301Basalt dopped in minor elements - Ni
SHERG02Shergottite
TITANIUMTitanium
aegisAEGIS
buzzard_rocks_scamPIXL-SCAM
alfalfa_378_scamVISIR-Ramanx2-ZCAM-SCAM
chiniak_565_scamAT-SCAM
garde_210_scamAT-SCAM
guillaumes_168_scamPIXL-SCAM
montpezat_350_scamPIXL-SCAM
naltsos_scamPIXL-SCAM
ouzel_falls_792_scamAT-SCAM
pollock_knob_501_scaZCAM-SCAM
rose_river_falls_sca?-SCAM
roubion_168_scamZCAM-PIXL-SCAM
villeplane_scamZCAM-SCAM
atmo_mountain_637_scZCAMMS-SCAM
crosswind_lake_641_sZCAM-SCAM
+ + + + + + +
+
+

3.2 Contribution

+

This section was sole work, except for Doña’s standardization of my +file into the v1_libs.Rds at the end. Later, in my ternary diagrams, I +used the same seed and number of clusters as Aadi, so that my clustering +would match his.

+
+
+

3.3 Methods Description

+

Now, we plot the average of each calibration/scct target, against the +clustered mars LIBS data. The Mars data is separated from the reference +data, we plot the reference data over the Mars data with labeled points +corresponding to the table of reference types.

+ + + +
libs.matrix <- as.matrix(libs.df[,6:13]) 
+
+libs.tern <- as.data.frame(libs.matrix) %>%
+  mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>%
+  select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,TiO2))
+
+
+libs.tern<-cbind(libs.tern, "type"=typedlibs$type, "target"=typedlibs$target, 
+                 "shape"=typedlibs$type)
+
+libs.tern<-libs.tern %>% mutate(shape = ifelse(grepl("SCAM", type, ignore.case=T),
+                       "other", shape)) %>%
+  mutate(shape = ifelse(grepl("other", type, ignore.case=T),
+                       "other", shape)) %>%
+  mutate(shape = ifelse(grepl("scct", target, ignore.case=T), "scct", shape))
+
+libs.tern$shape<-as.factor(libs.tern$shape)
+ + + +

This is not specific analysis, moreso a recommendation that for +future work, the scct values should be separated from the actual mars +data. Previously, we had been analysing these targets as if they were +mars data, when in fact they should be treated as reference or +calibration data.

+

When graphing the reference points on the ternary plot, ggrepel has a +conflict with ggtern, so I had to manually add where the labels should +go. If this issue with ggtern is fixed in the future, this can be +simplified to use ggrepel for the labels.

+
+
+

3.4 Result and Discussion

+ + + +
set.seed(1234)
+km<-kmeans(libs.tern[,1:3],4)
+
+libs.tern<-as.data.frame(cbind(libs.tern,"cluster"=as.factor(km$cluster)))
+ + + + + + +
libs.tern.other<-libs.tern[libs.tern$shape=="other",]
+libs.tern.scct<-libs.tern[libs.tern$shape=="scct",]
+
+#libs.scct.avg<-libs.tern.scct[, lapply(.SD, average), by= target]
+libs.scct.avg<-aggregate(cbind(x,y,z) ~ type, data = libs.tern.scct, FUN = "mean")
+libs.tern.other<-libs.tern.other[,c(1,2,3,7)]
+libs.tern.other<-cbind(libs.tern.other,"type"=0)
+libs.scct.avg<-cbind(libs.scct.avg[,2:4],"cluster"=libs.scct.avg$type,"type"=1)
+libs.tern<-rbind(libs.scct.avg,libs.tern.other)
+ + + + + + +
libs.tern<-cbind(libs.tern,"num"=rownames(libs.tern),"legend"=0)
+ + + + + + +
libs.tern<-libs.tern %>% 
+  mutate(legend=paste(num,cluster,sep="  "))
+ + + + + + +
libstern<-cbind(libs.tern,xend=0,yend=0,zend=0)
+libstern<-libstern%>% 
+  mutate(xend= ifelse(type=="1", x,xend)) %>%
+  mutate(yend= ifelse(type=="1", y,yend)) %>%
+  mutate(zend= ifelse(type=="1", z,zend))
+ + + + + + +
for(i in c(1,10,11,13,17)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.06,libstern[i,2],libstern[i,3]-0.06)
+}
+
+for(i in c(7)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.09,libstern[i,2],libstern[i,3]-0.09)
+}
+
+for(i in c(15,20)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]+0.05,libstern[i,3]-0.05)
+}
+
+for(i in c(3)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.02,libstern[i,2]+0.06,libstern[i,3]-0.08)
+}
+
+for(i in c(14)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.05,libstern[i,3]+0.05)
+}
+
+for(i in c(4,16,19)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.08,libstern[i,2]+0.02,libstern[i,3]+0.06)
+}
+
+for(i in c(2,9)){
+  libstern[i,8:10]<-c(libstern[i,1]+0.03,libstern[i,2]-0.05,libstern[i,3]+0.03)
+}
+
+for(i in c(6,8,12,18,22)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.06,libstern[i,2],libstern[i,3]+0.06)
+}
+
+for(i in c(5)){
+  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.09,libstern[i,3]+0.09)
+}
+
+for(i in c(21)){
+  libstern[i,8:10]<-c(libstern[i,1]-0.03,libstern[i,2]+0.07,libstern[i,3]-0.03)
+}
+ + + + + + +
nv = -0.00  #Vertical Adjustment
+pn = position_nudge_tern(y=nv,x=-nv,z=nv)
+
+ggtern(libstern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libstern,type==0),aes(color=cluster),alpha=0.5) + 
+  geom_point(data=subset(libstern,type==1),aes())+
+  theme_rgbw() + 
+  labs(title="Mars LIBS Data With Reference Samples Highlighted",
+       x="Si+Al",
+       y="Fe+Mg",
+       z="Ca+Na+K") + 
+  theme(legend.position="bottom") +
+  geom_text(position=pn,data=subset(libstern,type==1),
+            aes(x=xend,y=yend,z=zend,label=num),check_overlap=T)+
+  geom_segment(aes(x=x,xend = xend, y = y, yend=yend, z=z, zend=zend),size=0.3,
+               data = subset(libstern,type==1))+
+  theme_nomask()
+ + +
Warning in geom_text(position = pn, data = subset(libstern, type == 1),  :
+  Ignoring unknown aesthetics: z
+Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
+ℹ Please use `linewidth` instead.
+This warning is displayed once every 8 hours.
+Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
+Warning in geom_segment(aes(x = x, xend = xend, y = y, yend = yend, z = z,  :
+  Ignoring unknown aesthetics: z and zend
+Warning in acomp(data[, self$required_aes]) :
+  Negative values in composition are used as detection limits
+ + +

+ + + +

This plot with the table of reference samples below allows us to +examine our k-means clusters in the context of the earth reference +samples.

+

One of the primary results of looking at this graph is that many of +the points that seemed to be outliers from the rest of the data are +actually calibration targets. There are much fewer points in cluster 3, +our smallest and most distinct cluster, when you consider that some of +those original points are averaged into the reference points for calcite +and Flouro-Chloro-Hydro Apatite.

+

Also, it is interesting that much of cluster 2 has no earth +references over it. This could indicate that samples like these are not +common on earth, or it could indicate that the scientists deciding what +earth references to include thought samples like those were +unimportant.

+ + + +
kablelibstern<-cbind(point=libs.tern$num,"Description"=libs.tern$cluster)
+kablelibstern<-kablelibstern[1:22,]
+
+kable(kablelibstern)
+ + + +

In the future, it would be helpful to be able to select only certain +references, and to split these references into igneous and sedimentary +categories so that when examining a specific igneous or sedimentary +sample it is easy to see what references to compare it to. This is +especially useful when examining these earth references in conjunction +with the PIXL data, which can be plotted on a ternary plot as well.

+
+
+
+

4.0 Finding 2: Matching LIBS and PIXL Data

+

Matching the LIBS and PIXL data using their longitude and +latitudes.

+
+

4.1 Data, Code, and Resources

+

Here is a list data sets, codes, that are used in your work. Along +with brief description and URL where they are located.

+
    +
  1. MatchingLIBSandPIXL.Rmd outlines a lot of the work behind +matching the LIBS targets to PIXL samples. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd

  2. +
  3. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds

  4. +
  5. samples_pixl_wide.Rds is the Rds file containing all of the PIXL +data https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds

  6. +
  7. pixl_sol_coordinates.Rds contains the pixl data with the +coordinates and sol metadata added from the analysts notebook https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds.

  8. +
  9. PIXL_LIBS_Combined.Rds is the final product of combining the LIBS +and PIXL data created by Charlotte and I. https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds

  10. +
  11. lahira-finalProjectF24.Rmd is the final notebook of Aadi Lahiri, +which we get our earth scaling method from https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd

  12. +
+
+
+

4.2 Contribution

+

I worked with Charlotte Peterson on this section, we matched the LIBS +and PIXL data together. Then I graphed the ternary plot of the LIBS data +colored by its closest PIXL sample. The earth scaling technique on my +final heatmap was from Aadi Lahiri’s notebook.

+
+
+

4.3 Methods Description

+

First, columns with the PIXL coordinates and sol were added to the +PIXL data. This information was found from the analyst’s notebook.

+

The final iteration of combined data is created in the +MatchingLIBSandPIXL.Rmd file. The result of that file is the +PIXL_LIBS_Combined.Rds, which contains every mars LIBS sample, so the +scct/earth reference samples are removed. Each LIBS sample is listed +with its closest PIXL sample, ignoring the atmospheric sample, and their +distance in meters.

+

The scct/earth reference samples were removed because their location +has nothing to do with their data, the rover is taking a LIBS +measurement of reference materials it carries with it.

+

Even though some of the distances are farther than we would consider +close or useful, ultimately, we chose not to remove any data points so +that in the future people can make their own cutoff of what distance +they consider to be significant or relevant. The recommendation we would +make is to consider paired PIXL and LIBS samples that are within 7 +meters of each other, as that is close to the range where the LIBS laser +can reach.

+ + + +
pixllibs<-readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds")
+ + + + + + +
distancetable<-pixllibs[,c(1:4,6,7)]
+distancetable<-distinct(distancetable)
+
+ggplot(data=distancetable, aes(x=Distance, group=PIXL.Abrasion, fill=PIXL.Abrasion)) +
+  ggtitle("Distances of LIBS Data to Nearest PIXL Abrasion")+
+    geom_density(adjust=1.5, alpha=.4)
+ + +

+ + + +

Three of the PIXL Abrasions have a lot of points that are very close, +but many others have data that is very spread.

+
+
+

4.4 Result and Discussion

+

Since the work of matching the LIBS and PIXL data has already been +done, it is easy to import the combined RDS and convert that data into a +dataframe to be used for a ternary diagram

+

We can easily change what distance we would like to view

+ + + +
libs.tern <- as.data.frame(pixllibs) %>%
+  mutate(x=(LIBS.SiO2+LIBS.Al2O3)/100,y=(LIBS.FeOT+LIBS.MgO)/100,z=(LIBS.CaO+LIBS.Na2O+LIBS.K2O)/100)
+
+libs.tern<-libs.tern[,c(6,7,19,20,21)]
+ + + + + + +
meters<- 100
+
+ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libs.tern, Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
+  theme_rgbw() + 
+  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
+       x="Si+Al", y="Fe+Mg",z="Ca+Na+K") +
+  theme(legend.position="right") + 
+  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))
+
+meters<- 7
+
+ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
+  geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
+  theme_rgbw() + 
+  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
+       x="Si+Al",y="Fe+Mg",z="Ca+Na+K") +
+  theme(legend.position="right") + 
+  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))
+ + + +

We can see here that when interpreting the LIBS data within 7 meters +of each PIXL abrasion, the data is not tightly clustered. For the most +part, the data is just as spread as the entire data.

+ + + +
libs.heatmap<-pixllibs
+
+libs.heatmap<-libs.heatmap[libs.heatmap$Distance <= 7, ]
+
+libs.heatmap.mean<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,LIBS.CaO,
+    LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "mean")
+libs.heatmap.med<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,
+    LIBS.CaO,LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "median")
+
+libs.heatmap.mean<-cbind(libs.heatmap.mean,"x"="mean")
+libs.heatmap.mean$PIXL.Abrasion<-as.character(libs.heatmap.mean$PIXL.Abrasion)
+libs.heatmap.mean<-libs.heatmap.mean %>%
+  mutate(x=paste(PIXL.Abrasion,x,sep=" "))
+rownames(libs.heatmap.mean)<-libs.heatmap.mean$x
+
+libs.heatmap.med<-cbind(libs.heatmap.med,"x"="median")
+libs.heatmap.med$PIXL.Abrasion<-as.character(libs.heatmap.med$PIXL.Abrasion)
+libs.heatmap.med<-libs.heatmap.med %>%
+  mutate(x=paste(PIXL.Abrasion,x,sep=" "))
+
+rownames(libs.heatmap.med)<-libs.heatmap.med$x
+
+libs.heatmap<-rbind(libs.heatmap.mean,libs.heatmap.med)
+libs.heatmap<-libs.heatmap[c(1,7,2,8,3,9,4,10,5,11,6,12),]
+
+pheatmap(libs.heatmap[,2:9],scale="column",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Column-Scaled")
+pheatmap(libs.heatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Unscaled")
+ + + +

When looking at the heatmap, we can see more differences between the +abrasions than are visible on the ternary plot.

+ + + +
libs_earth <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/LIBS_training_set_quartiles.Rds")
+earthheatmap <- libs.heatmap %>% select(c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, 
+    LIBS.CaO, LIBS.Na2O, LIBS.K2O,x)) %>% 
+  rowwise() %>% mutate("Si"= (LIBS.SiO2-libs_earth[3,2])/(libs_earth[4,2] - libs_earth[2,2]), 
+                       "Ti"= (LIBS.TiO2-libs_earth[3,3])/(libs_earth[4,3] - libs_earth[2,3]),
+                       "Al"= (LIBS.Al2O3-libs_earth[3,4])/(libs_earth[4,4] - libs_earth[2,4]),
+                       "Fe"= (LIBS.FeOT-libs_earth[3,5])/(libs_earth[4,5] - libs_earth[2,5]),
+                       "Mg"= (LIBS.MgO-libs_earth[3,6])/(libs_earth[4,6] - libs_earth[2,6]),
+                       "Ca"= (LIBS.CaO-libs_earth[3,7])/(libs_earth[4,7] - libs_earth[2,7]),
+                       "Na"= (LIBS.Na2O-libs_earth[3,8])/(libs_earth[4,8] - libs_earth[2,8]),
+                       "K"= (LIBS.K2O-libs_earth[3,9])/(libs_earth[4,9] - libs_earth[2,9])) %>%
+  select(!c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, LIBS.CaO, LIBS.Na2O, LIBS.K2O))
+
+earthheatmap<-as.matrix(earthheatmap)
+earthheatmap<-as.data.frame(earthheatmap)
+
+rownames(earthheatmap)<-earthheatmap$x
+
+#earthheatmap<-earthheatmap[,2:9]
+earthheatmap[,2:9]<-sapply(earthheatmap[,2:9],as.numeric)
+
+pheatmap(earthheatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
+    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Earth Scaled")
+ + + +

Using Aadi’s earth scaling technique, we can compare our two earlier +heatmaps with an earth scaled heatmap. The main consistency between +these heatmaps is high variation in the Mg or MgO columns.

+
+
+

5.5 Conclusions, Limitations, and Future Work.

+

More analysis of the LIBS data grouped by PIXL needs to be done. I +think a Principle Component analysis could be interesting to help us see +what factors break up the different groups. I also think a similarity +analysis within the groups could be interesting.

+

One potential issue with the combined LIBS and PIXL dataset, is that +the LIBS data has some duplicate points with different latitude and +longitudes. For example, the LIBS target “aegis_0907a_________” contains +duplicate points with the same data with differing longitude and +latitude values. This could indicate that there is more error than we +think with the latitude and longitude of LIBS measurements. We are not +100% sure what the LIBS latitude and longitude is referring to (the +laser or the rover), but this could indicate we understand it even less. +This notebook runs on the assumption that the latitude and longitude +refers to the location of the rover.

+
+
+
+

Bibliography

+

Provide a listing of references and other sources.

+
    +
  • [Cousin21] Cousin, A., Sautter, V., Fabre, C., Dromart, G., +Montagnac, G., Drouet, C., Meslin, P. Y., Gasnault, O., Beyssac, O., +Bernard, S., Cloutis, E., Forni, O., Beck, P., Fouchet, T., Johnson, J. +R., Lasue, J., Ollila, A. M., De Parseval, P., Gouy, S., & Caron, B. +(2021). SuperCam calibration targets on board the perseverance rover: +Fabrication and quantitative characterization. Spectrochimica Acta Part +B: Atomic Spectroscopy, 106341. https://doi.org/10.1016/j.sab.2021.106341

  • +
  • [Hamilton18] Hamilton NE, Ferry M (2018). “ggtern: Ternary +Diagrams Using ggplot2.” Journal of Statistical Software, Code +Snippets, 87(3), 1-17. doi:10.18637/jss.v087.c03 https://doi.org/10.18637/jss.v087.c03

  • +
  • [Hijmans24] Hijmans R (2024). geosphere: Spherical +Trigonometry. R package version 1.5-20, https://CRAN.R-project.org/package=geosphere

  • +
+ + + +
#citation("geosphere")
+#citation("ggtern")
+ + + + +
+ +
---
title: "Data Analytics Research Individual Final Project Report"
author: "Margo VanEsselstyn"
date: "`r Sys.Date()`"
output:
  html_document:
    toc: yes
    toc_depth: 3
    toc_float: yes
    number_sections: no
    theme: united
  html_notebook: default
  pdf_document:
    toc: yes
    toc_depth: '3'
---
# DAR Project and Group Members

* Project name: Mars
* Project team members: Charlotte Peterson, Doña Roberts, Xuanting Wang, David Walczyk, Charlotte Newman, Dante Mwatibo, Nicolas Morawski, CJ Marino, Aadi Lahiri, Ashton Compton

# 0.0 Preliminaries.

This report is generated from an R Markdown file that includes all the R code necessary to produce the results described and embedded in the report.  Code blocks can be surpressed from output for readability using the command code `{R,  echo=show}` in the code block header. If `show <- FALSE` the code block will be surpressed; if `show <- TRUE` then the code will be show. 

```{r}
# Set to TRUE to expand R code blocks; set to FALSE to collapse R code blocks 
show <- TRUE
```

Executing this R notebook requires some subset of the following packages:

* `ggplot2`
* `tidyverse`
* `ggtern`
* `knitr`
* `pheatmap`

These will be installed and loaded as necessary (code suppressed). 

<!-- The `include=FALSE` option prevents your code from being shown at all -->
```{r, include=FALSE}
# This code will install required packages if they are not already installed
# ALWAYS INSTALL YOUR PACKAGES LIKE THIS!
if (!require("ggplot2")) {
   install.packages("ggplot2")
   library(ggplot2)
}
if (!require("tidyverse")) {
   install.packages("tidyverse")
   library(tidyverse)
}
if (!require("ggtern")) {
  install.packages("ggtern")
  library(ggtern)
}
if(!require("knitr")) {
  install.packages("knitr")
  library(knitr)
}
if(!require("pheatmap")){
  install.packages("pheatmap")
  library(pheatmap)
}
```

# 1.0 Project Introduction

This project outlines my analysis of the Mars LIBS and PIXL data. It largely revolves around data processing and organization

# 2.0 Organization of Report

This report is organized as follows: 

* Section 3.0.  Finding 1: Here we discuss the LIBS scct targets and the importance of differentiating them in future analysis 

* Section 4.0: Finding 2: Here we discuss the connection between the LIBS and PIXL data

* Section 5.0 Overall conclusions and suggestions 

# 3.0 Finding 1: Understanding LIBS Targets

I researched the meaning behind the LIBS target names, and categorized the LIBS data into a few major categories. I created a new Rds file that includes a column labeling each LIBS sample with its category. 

## 3.1 Data, Code, and Resources

Here is a list  data sets, codes,  that are used in your work. Along with brief description and URL where they are located.

1. supercam_libs_moc_loc.Rds is the Rds file containing the LIBS data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds)

2. libs_typed.Rds is the Rds file containing the LIBS data as well as a type column categorizing each sample [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/StudentData/libs_typed.Rds)

3. SupercamCalibrationTargets.pdf is a pdf containing information about the calibration targets used in the LIBS data. [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentNotebooks/Assignment07_DraftFinalProjectNotebook/SupercamCalibrationTargets.pdf)

4. v1_libs.Rds is the Rds file containing the LIBS data as well as my categorization that Doña put into a standardized format [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds)


I used the libs dataset with the standard deviation features, distances, and totals removed. I made sure that certain categories were numeric and added a new "type" column.

Then, I added a description to each scct (calibration) target in the type column, which names the earth reference based on the pdf linked in this section. For example, the scct target containing "PMIFA0306" was typed "Olivine"

I also labeled the targets with "aegis" in their names with AEGIS, these samples can be used the same as the other Mars LIBS samples, but it is noted that the measurement is taken using AEGIS, the rover's AI. So instead of the target being chosen intentionally by a scientist, it is chosen by the rover when it has extra resources to take a sample.

From the analysts notebook, targets with "scam" in their names correspond to targets of other measurements, I went through the analysts notebook for these samples and added the other measurements that were taken at the same target into the type column. For example, the "villeplane_scam" target also had ZCam measurements taken at the same target, so I typed it "ZCAM-SCAM"

I labeled two targets ("sei_________________" and "naakih______________"),  with further descriptions because the analysts notebook had clear descriptions of what the target was intended to be sampling. 

All remaining samples are typed "other".

```{r}
libs.df <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")

#Drop the standard deviation features, the sum of the percentages, 
#the distance, and the total frequencies
libs.df <- libs.df %>% 
  select(!(c(distance_mm,Tot.Em.,SiO2_stdev,TiO2_stdev,Al2O3_stdev,FeOT_stdev,
             MgO_stdev,Na2O_stdev,CaO_stdev,K2O_stdev,Total)))

# Convert the points to numeric
libs.df$point <- as.numeric(libs.df$point)
typedlibs<-cbind(libs.df[,1:4],"type"=0,libs.df[5:13])
```

```{r}
targetlist<-c("tsrich0404","LCMB0006","LCA530106","PMIFS0505","TAPAG0206","PMIOR0507",
              "PMIDN0302","PMIFA0306","PMIAN0106","PMIEN0602","TSERP0102","LBHVO20406",
              "LJSC10304","LANKE0101","LSIDE0101","LJMN10106","NTE010301","NTE020106",
              "NTE030106","NTE040106","NTE050301","SHERG02","TITANIUM","aegis",
              "buzzard_rocks_scam","alfalfa_378_scam","chiniak_565_scam",
              "garde_210_scam","guillaumes_168_scam","montpezat_350_scam","naltsos_scam",
              "ouzel_falls_792_scam","pollock_knob_501_sca","rose_river_falls_sca",
              "roubion_168_scam","villeplane_scam","atmo_mountain_637_sc",
              "crosswind_lake_641_s")

typelist<-c("BHVO-2 basalt and K sulfate mixture","Chert","Calcite","Ferrosilite",
            "Fluoro-Chloro-Hydro Apatite","Orthoclase","Diopside","Olivine","Andesine",
            "Enstatite","Serpentine/Talc","BHVO-2 standard basalt","Mars soil analog",
            "Ankerite","Siderite","JMN-1 standard Mn nodule",
            "Basalt dopped in minor elements - Cu, Zn",
            "Basalt dopped in minor elements - Mn, Ba, Cr",
            "Basalt dopped in minor elements - Zn",
            "Basalt dopped in minor elements - Li, Sr",
            "Basalt dopped in minor elements - Ni","Shergottite","Titanium","AEGIS",
            "PIXL-SCAM","VISIR-Ramanx2-ZCAM-SCAM","AT-SCAM","AT-SCAM","PIXL-SCAM",
            "PIXL-SCAM","PIXL-SCAM","AT-SCAM","ZCAM-SCAM","?-SCAM","ZCAM-PIXL-SCAM",
            "ZCAM-SCAM","ZCAMMS-SCAM","ZCAM-SCAM")

targettyped<-as.data.frame(cbind(targetlist,typelist),rownames=c(1))

for(i in 1:23){
  typedlibs<-typedlibs %>%
    mutate(type = ifelse(grepl(targettyped[i,1],target,ignore.case=T),
                       targettyped[i,2], type))
}

for(i in 24:nrow(targettyped)){
  typedlibs<-typedlibs %>%
    mutate(type= ifelse(grepl(targettyped[i,1], target,ignore.case=T) & type=="0",
                      targettyped[i,2],type))
}

typedlibs<-typedlibs %>%
  mutate(type=ifelse(type=="0","other",type)) %>%
  mutate(type= ifelse(target=="sei_________________", "other - fine soil",type)) %>%
  mutate(type= ifelse(target=="naakih______________", "other - coarse soil",type))

kable(targettyped)
```

## 3.2 Contribution

This section was sole work, except for Doña's standardization of my file into the v1_libs.Rds at the end. Later, in my ternary diagrams, I used the same seed and number of clusters as Aadi, so that my clustering would match his. 

## 3.3 Methods Description 

Now, we plot the average of each calibration/scct target, against the clustered mars LIBS data. The Mars data is separated from the reference data, we plot the reference data over the Mars data with labeled points corresponding to the table of reference types. 

```{r}
libs.matrix <- as.matrix(libs.df[,6:13]) 

libs.tern <- as.data.frame(libs.matrix) %>%
  mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>%
  select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,TiO2))


libs.tern<-cbind(libs.tern, "type"=typedlibs$type, "target"=typedlibs$target, 
                 "shape"=typedlibs$type)

libs.tern<-libs.tern %>% mutate(shape = ifelse(grepl("SCAM", type, ignore.case=T),
                       "other", shape)) %>%
  mutate(shape = ifelse(grepl("other", type, ignore.case=T),
                       "other", shape)) %>%
  mutate(shape = ifelse(grepl("scct", target, ignore.case=T), "scct", shape))

libs.tern$shape<-as.factor(libs.tern$shape)
```


This is not specific analysis, moreso a recommendation that for future work, the scct values should be separated from the actual mars data. Previously, we had been analysing these targets as if they were mars data, when in fact they should be treated as reference or calibration data. 

When graphing the reference points on the ternary plot, ggrepel has a conflict with ggtern, so I had to manually add where the labels should go. If this issue with ggtern is fixed in the future, this can be simplified to use ggrepel for the labels. 

## 3.4 Result and Discussion 

```{r}
set.seed(1234)
km<-kmeans(libs.tern[,1:3],4)

libs.tern<-as.data.frame(cbind(libs.tern,"cluster"=as.factor(km$cluster)))
```

```{r}
libs.tern.other<-libs.tern[libs.tern$shape=="other",]
libs.tern.scct<-libs.tern[libs.tern$shape=="scct",]

#libs.scct.avg<-libs.tern.scct[, lapply(.SD, average), by= target]
libs.scct.avg<-aggregate(cbind(x,y,z) ~ type, data = libs.tern.scct, FUN = "mean")
libs.tern.other<-libs.tern.other[,c(1,2,3,7)]
libs.tern.other<-cbind(libs.tern.other,"type"=0)
libs.scct.avg<-cbind(libs.scct.avg[,2:4],"cluster"=libs.scct.avg$type,"type"=1)
libs.tern<-rbind(libs.scct.avg,libs.tern.other)
```

```{r}
libs.tern<-cbind(libs.tern,"num"=rownames(libs.tern),"legend"=0)
```

```{r}
libs.tern<-libs.tern %>% 
  mutate(legend=paste(num,cluster,sep="  "))
```


```{r}
libstern<-cbind(libs.tern,xend=0,yend=0,zend=0)
libstern<-libstern%>% 
  mutate(xend= ifelse(type=="1", x,xend)) %>%
  mutate(yend= ifelse(type=="1", y,yend)) %>%
  mutate(zend= ifelse(type=="1", z,zend))
```

```{r}
for(i in c(1,10,11,13,17)){
  libstern[i,8:10]<-c(libstern[i,1]+0.06,libstern[i,2],libstern[i,3]-0.06)
}

for(i in c(7)){
  libstern[i,8:10]<-c(libstern[i,1]+0.09,libstern[i,2],libstern[i,3]-0.09)
}

for(i in c(15,20)){
  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]+0.05,libstern[i,3]-0.05)
}

for(i in c(3)){
  libstern[i,8:10]<-c(libstern[i,1]+0.02,libstern[i,2]+0.06,libstern[i,3]-0.08)
}

for(i in c(14)){
  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.05,libstern[i,3]+0.05)
}

for(i in c(4,16,19)){
  libstern[i,8:10]<-c(libstern[i,1]-0.08,libstern[i,2]+0.02,libstern[i,3]+0.06)
}

for(i in c(2,9)){
  libstern[i,8:10]<-c(libstern[i,1]+0.03,libstern[i,2]-0.05,libstern[i,3]+0.03)
}

for(i in c(6,8,12,18,22)){
  libstern[i,8:10]<-c(libstern[i,1]-0.06,libstern[i,2],libstern[i,3]+0.06)
}

for(i in c(5)){
  libstern[i,8:10]<-c(libstern[i,1],libstern[i,2]-0.09,libstern[i,3]+0.09)
}

for(i in c(21)){
  libstern[i,8:10]<-c(libstern[i,1]-0.03,libstern[i,2]+0.07,libstern[i,3]-0.03)
}
```

```{r}
nv = -0.00  #Vertical Adjustment
pn = position_nudge_tern(y=nv,x=-nv,z=nv)

ggtern(libstern, ggtern::aes(x=x,y=y,z=z)) +
  geom_point(data=subset(libstern,type==0),aes(color=cluster),alpha=0.5) + 
  geom_point(data=subset(libstern,type==1),aes())+
  theme_rgbw() + 
  labs(title="Mars LIBS Data With Reference Samples Highlighted",
       x="Si+Al",
       y="Fe+Mg",
       z="Ca+Na+K") + 
  theme(legend.position="bottom") +
  geom_text(position=pn,data=subset(libstern,type==1),
            aes(x=xend,y=yend,z=zend,label=num),check_overlap=T)+
  geom_segment(aes(x=x,xend = xend, y = y, yend=yend, z=z, zend=zend),size=0.3,
               data = subset(libstern,type==1))+
  theme_nomask()
```
This plot with the table of reference samples below allows us to examine our k-means clusters in the context of the earth reference samples. 

One of the primary results of looking at this graph is that many of the points that seemed to be outliers from the rest of the data are actually calibration targets. There are much fewer points in cluster 3, our smallest and most distinct cluster, when you consider that some of those original points are averaged into the reference points for calcite and Flouro-Chloro-Hydro Apatite.

Also, it is interesting that much of cluster 2 has no earth references over it. This could indicate that samples like these are not common on earth, or it could indicate that the scientists deciding what earth references to include thought samples like those were unimportant. 

```{r}
kablelibstern<-cbind(point=libs.tern$num,"Description"=libs.tern$cluster)
kablelibstern<-kablelibstern[1:22,]

kable(kablelibstern)
```

In the future, it would be helpful to be able to select only certain references, and to split these references into igneous and sedimentary categories so that when examining a specific igneous or sedimentary sample it is easy to see what references to compare it to. This is especially useful when examining these earth references in conjunction with the PIXL data, which can be plotted on a ternary plot as well.

# 4.0 Finding 2: Matching LIBS and PIXL Data

Matching the LIBS and PIXL data using their longitude and latitudes. 

## 4.1 Data, Code, and Resources

Here is a list  data sets, codes,  that are used in your work. Along with brief description and URL where they are located.

1. MatchingLIBSandPIXL.Rmd outlines a lot of the work behind matching the LIBS targets to PIXL samples.  [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/StudentNotebooks/Assignment05/MatchingLIBSandPIXL.Rmd)

2.  supercam_libs_moc_loc.Rds is the Rds file containing the LIBS data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/2fbb9b7988d536656bb118a0d8e0b644392ca09a/Data/supercam_libs_moc_loc.Rds)

3. samples_pixl_wide.Rds  is the Rds file containing all of the PIXL data [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/18a7440e2b4de50a2be8223adc9319f33f082f09/Data/samples_pixl_wide.Rds)

2. pixl_sol_coordinates.Rds contains the pixl data with the coordinates and sol metadata added from the analysts notebook [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/main/StudentData/pixl_sol_coordinates.Rds). 

5. PIXL_LIBS_Combined.Rds is the final product of combining the LIBS and PIXL data created by Charlotte and I. [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/e02a301198e2ec47e168448602eace6a6f7e3eaf/StudentData/PIXL_LIBS_Combined.Rds)

6. lahira-finalProjectF24.Rmd is the final notebook of Aadi Lahiri, which we get our earth scaling method from [https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd](https://github.rpi.edu/DataINCITE/DAR-Mars-F24/blob/4d7fc8b60026364dbb4c571d7b3d6318a08f68bb/StudentNotebooks/Assignment08_FinalProjectNotebook/lahira-finalProjectF24.Rmd)

## 4.2 Contribution

I worked with Charlotte Peterson on this section, we matched the LIBS and PIXL data together. Then I graphed the ternary plot of the LIBS data colored by its closest PIXL sample. The earth scaling technique on my final heatmap was from Aadi Lahiri's notebook. 


## 4.3 Methods Description 

First, columns with the PIXL coordinates and sol were added to the PIXL data. This information was found from the analyst's notebook. 

The final iteration of combined data is created in the MatchingLIBSandPIXL.Rmd file. The result of that file is the PIXL_LIBS_Combined.Rds, which contains every mars LIBS sample, so the scct/earth reference samples are removed. Each LIBS sample is listed with its closest PIXL sample, ignoring the atmospheric sample, and their distance in meters. 

The scct/earth reference samples were removed because their location has nothing to do with their data, the rover is taking a LIBS measurement of reference materials it carries with it. 

Even though some of the distances are farther than we would consider close or useful, ultimately, we chose not to remove any data points so that in the future people can make their own cutoff of what distance they consider to be significant or relevant. The recommendation we would make is to consider paired PIXL and LIBS samples that are within 7 meters of each other, as that is close to the range where the LIBS laser can reach. 

```{r}
pixllibs<-readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/StudentData/PIXL_LIBS_Combined.Rds")
```

```{r}
distancetable<-pixllibs[,c(1:4,6,7)]
distancetable<-distinct(distancetable)

ggplot(data=distancetable, aes(x=Distance, group=PIXL.Abrasion, fill=PIXL.Abrasion)) +
  ggtitle("Distances of LIBS Data to Nearest PIXL Abrasion")+
    geom_density(adjust=1.5, alpha=.4)
```
Three of the PIXL Abrasions have a lot of points that are very close, but many others have data that is very spread. 

## 4.4 Result and Discussion 

Since the work of matching the LIBS and PIXL data has already been done, it is easy to import the combined RDS and convert that data into a dataframe to be used for a ternary diagram

We can easily change what distance we would like to view

```{r}
libs.tern <- as.data.frame(pixllibs) %>%
  mutate(x=(LIBS.SiO2+LIBS.Al2O3)/100,y=(LIBS.FeOT+LIBS.MgO)/100,z=(LIBS.CaO+LIBS.Na2O+LIBS.K2O)/100)

libs.tern<-libs.tern[,c(6,7,19,20,21)]
```


```{r}
meters<- 100

ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
  geom_point(data=subset(libs.tern, Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
  theme_rgbw() + 
  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
       x="Si+Al", y="Fe+Mg",z="Ca+Na+K") +
  theme(legend.position="right") + 
  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))

meters<- 7

ggtern(libs.tern, ggtern::aes(x=x,y=y,z=z)) +
  geom_point(data=subset(libs.tern,Distance<=meters),aes(color=PIXL.Abrasion,alpha=0.5)) + 
  theme_rgbw() + 
  labs(title=paste("Mars LIBS Data Within",meters,"meters of PIXL Abrasion",sep=" "),
       x="Si+Al",y="Fe+Mg",z="Ca+Na+K") +
  theme(legend.position="right") + 
  guides(alpha="none",color=guide_legend(title="PIXL Abrasion"))
```

We can see here that when interpreting the LIBS data within 7 meters of each PIXL abrasion, the data is not tightly clustered. For the most part, the data is just as spread as the entire data. 

```{r}
libs.heatmap<-pixllibs

libs.heatmap<-libs.heatmap[libs.heatmap$Distance <= 7, ]

libs.heatmap.mean<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,LIBS.CaO,
    LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "mean")
libs.heatmap.med<-aggregate(cbind(LIBS.SiO2,LIBS.TiO2,LIBS.Al2O3,LIBS.FeOT,LIBS.MgO,
    LIBS.CaO,LIBS.Na2O,LIBS.K2O) ~ PIXL.Abrasion, data = libs.heatmap, FUN = "median")

libs.heatmap.mean<-cbind(libs.heatmap.mean,"x"="mean")
libs.heatmap.mean$PIXL.Abrasion<-as.character(libs.heatmap.mean$PIXL.Abrasion)
libs.heatmap.mean<-libs.heatmap.mean %>%
  mutate(x=paste(PIXL.Abrasion,x,sep=" "))
rownames(libs.heatmap.mean)<-libs.heatmap.mean$x

libs.heatmap.med<-cbind(libs.heatmap.med,"x"="median")
libs.heatmap.med$PIXL.Abrasion<-as.character(libs.heatmap.med$PIXL.Abrasion)
libs.heatmap.med<-libs.heatmap.med %>%
  mutate(x=paste(PIXL.Abrasion,x,sep=" "))

rownames(libs.heatmap.med)<-libs.heatmap.med$x

libs.heatmap<-rbind(libs.heatmap.mean,libs.heatmap.med)
libs.heatmap<-libs.heatmap[c(1,7,2,8,3,9,4,10,5,11,6,12),]

pheatmap(libs.heatmap[,2:9],scale="column",cluster_rows=F,cluster_cols=F,
    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Column-Scaled")
pheatmap(libs.heatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Unscaled")
```
When looking at the heatmap, we can see more differences between the abrasions than are visible on the ternary plot. 

```{r}
libs_earth <- readRDS("/academics/MATP-4910-F24/DAR-Mars-F24/Data/LIBS_training_set_quartiles.Rds")
earthheatmap <- libs.heatmap %>% select(c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, 
    LIBS.CaO, LIBS.Na2O, LIBS.K2O,x)) %>% 
  rowwise() %>% mutate("Si"= (LIBS.SiO2-libs_earth[3,2])/(libs_earth[4,2] - libs_earth[2,2]), 
                       "Ti"= (LIBS.TiO2-libs_earth[3,3])/(libs_earth[4,3] - libs_earth[2,3]),
                       "Al"= (LIBS.Al2O3-libs_earth[3,4])/(libs_earth[4,4] - libs_earth[2,4]),
                       "Fe"= (LIBS.FeOT-libs_earth[3,5])/(libs_earth[4,5] - libs_earth[2,5]),
                       "Mg"= (LIBS.MgO-libs_earth[3,6])/(libs_earth[4,6] - libs_earth[2,6]),
                       "Ca"= (LIBS.CaO-libs_earth[3,7])/(libs_earth[4,7] - libs_earth[2,7]),
                       "Na"= (LIBS.Na2O-libs_earth[3,8])/(libs_earth[4,8] - libs_earth[2,8]),
                       "K"= (LIBS.K2O-libs_earth[3,9])/(libs_earth[4,9] - libs_earth[2,9])) %>%
  select(!c(LIBS.SiO2, LIBS.TiO2, LIBS.Al2O3, LIBS.FeOT, LIBS.MgO, LIBS.CaO, LIBS.Na2O, LIBS.K2O))

earthheatmap<-as.matrix(earthheatmap)
earthheatmap<-as.data.frame(earthheatmap)

rownames(earthheatmap)<-earthheatmap$x

#earthheatmap<-earthheatmap[,2:9]
earthheatmap[,2:9]<-sapply(earthheatmap[,2:9],as.numeric)

pheatmap(earthheatmap[,2:9],scale="none",cluster_rows=F,cluster_cols=F,
    main="Means and Medians of LIBS data within 7m of PIXL Abrasion, \n Earth Scaled")
```
Using Aadi's earth scaling technique, we can compare our two earlier heatmaps with an earth scaled heatmap. The main consistency between these heatmaps is high variation in the Mg or MgO columns.

## 5.5 Conclusions, Limitations,  and Future Work.

More analysis of the LIBS data grouped by PIXL needs to be done. I think a Principle Component analysis could be interesting to help us see what factors break up the different groups. I also think a similarity analysis within the groups could be interesting. 

One potential issue with the combined LIBS and PIXL dataset, is that the LIBS data has some duplicate points with different latitude and longitudes. For example, the LIBS target "aegis_0907a_________" contains duplicate points with the same data with differing longitude and latitude values. This could indicate that there is more error than we think with the latitude and longitude of LIBS measurements. We are not 100% sure what the LIBS latitude and longitude is referring to (the laser or the rover), but this could indicate we understand it even less. This notebook runs on the assumption that the latitude and longitude refers to the location of the rover. 

# Bibliography
Provide a listing of references and other sources.

* [Cousin21] Cousin, A., Sautter, V., Fabre, C., Dromart, G., Montagnac, G., Drouet, C., Meslin, P. Y., Gasnault, O., Beyssac, O., Bernard, S., Cloutis, E., Forni, O., Beck, P., Fouchet, T., Johnson, J. R., Lasue, J., Ollila, A. M., De Parseval, P., Gouy, S., & Caron, B. (2021). SuperCam calibration targets on board the perseverance rover: Fabrication and quantitative characterization. Spectrochimica Acta Part B: Atomic Spectroscopy, 106341. https://doi.org/10.1016/j.sab.2021.106341

* [Hamilton18] Hamilton NE, Ferry M (2018). “ggtern: Ternary Diagrams Using ggplot2.” _Journal of Statistical Software, Code Snippets_, *87*(3),
  1-17. doi:10.18637/jss.v087.c03 <https://doi.org/10.18637/jss.v087.c03>
  
* [Hijmans24] Hijmans R (2024). _geosphere: Spherical Trigonometry_. R package version 1.5-20, <https://CRAN.R-project.org/package=geosphere>

```{r}
#citation("geosphere")
#citation("ggtern")
```




+ + + +
+ + + + + + + + + + + + + + + + diff --git a/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.pdf b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.pdf new file mode 100644 index 0000000..e6d7af4 Binary files /dev/null and b/StudentNotebooks/Assignment08_FinalProjectNotebook/vanesm_finalProjectdF24.pdf differ