Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
This notebook takes our four main data sets (PIXL, LIBS, SHERLOC, & LITHOLOGY) and gives them a consistent naming scheme for their columns.
```{r setup, include=FALSE}
# Set the default CRAN repository
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
if (!require("tidyr")) {
install.packages("tidyr")
library(qpcR)
}
if (!require("dplyr")) {
install.packages("dplyr")
library(qpcR)
}
if (!require("qpcR")) {
install.packages("qpcR")
library(qpcR)
}
```
# Importing data frames
```{r}
# Importing PIXL
#pixl.df <- readRDS("~/DAR-Mars-F24/Data/samples_pixl_wide.Rds") #Old PIXL, missing Lat and Lon
pixl.df <- readRDS("~/DAR-Mars-F24/StudentData/pixl_sol_coordinates.Rds")
# Importing LIBS
libs.df <- readRDS("~/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
libs_type.df <- readRDS("~/DAR-Mars-F24/StudentData/libs_typed.Rds")
# Importing Lithology
lithology.df<- readRDS("~/DAR-Mars-F24/Data/mineral_data_static.Rds")
# Importing and reformating Sherloc
sherloc.df <- readRDS("~/DAR-Mars-F24/Data/abrasions_sherloc_samples.Rds")
## Clean up data types
sherloc.df$Mineral <- as.factor(sherloc.df$Mineral)
sherloc.df[sapply(sherloc.df, is.character)] <- lapply(sherloc.df[sapply(sherloc.df, is.character)],as.numeric)
## Change N/A's to "0"
sherloc.df <- sherloc.df %>% replace(is.na(.), 0)
## Reformat data so that rows are "abrasions" and columns list the presence of minerals.
## Do this by "pivoting" to a long format, and then back to the desired wide format.
sherloc.df <- sherloc.df %>% pivot_longer(!Mineral, names_to = "Name", values_to = "Presence")
## Make abrasion a factor
sherloc.df$Name <- as.factor(sherloc.df$Name)
## Make it a matrix
sherloc.matrix <- sherloc.df %>% pivot_wider(names_from = Mineral, values_from = Presence)
sherloc.df <- cbind(sherloc.matrix,pixl.df[,"sample"])
# pixl and libs combined data frame
pixl_libs.df <- readRDS("PIXL_LIBS_Combined.Rds")
```
# Renaming Columns
```{r}
# Renaming PIXL names
colnames(pixl.df) <- c("Lat","Lon","Sol","Sample",
"Na2O","MgO","Al2O3","SiO2","P2O5","SO3","Cl","K2O","CaO","TiO2","Cr2O3","MnO","FeOT",
"Name","Type","Campaign","Location","Abrasion")
# Renaming LIBS
libs.df <- cbind(libs.df,libs_type.df$"type")
colnames(libs.df) <- c("Sol","Lat","Lon","Target","Point",
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
"Total","distance_mm","Tot.Em.",
"Type")
libs_type <- libs_type.df$"earthsample?"
libs_type <- as.logical(libs_type) # Was originally "numeric"
# Renaming Lithology
colnames(lithology.df) <- c("Sample","Name","SampleType","Campaign","Abrasion",
"Feldspar","Plagioclase","Pyroxene","Olivine","Quartz",
"Apatite","FeTi oxides","Iron oxide","Sulfate","Perchlorates",
"Phosphate","Ca-sulfate","Carbonate","Fe-Mg-clay minerals","Fe-Mg carbonate",
"Mg-sulfate","Phyllosilicates","Chlorite","Halite","Organic matter",
"Hydrated Ca-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Na-perchlorate","Amorphous Silicate",
"Hydrated Carbonates","Disordered Silicates","Hydrated Iron oxide","Sulfate+Organic matter","Other hydrated phases",
"Kaolinite (hydrous Al-clay)","Chromite","Ilmenite","Zircon/Baddeleyite","Spinels")
# Renaming Sherloc
colnames(sherloc.df) <- c("Name",
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
"Disordered Silicates","Feldspar","Quartz","Apatite",
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels","Sample")
# Renaming Pixl and Libs combined data set
colnames(pixl_libs.df) <- c("Target.libs","Lat.libs","Lon.libs","Sol.libs","Point.libs",
"Distance",
"Abrasion.pixl","Lat.pixl","Lon.pixl","Campaign.pixl",
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs")
```
# Creating Sample metadata data frame
```{r}
# Creating meta data data frame
sample_meta.df <- qpcR:::cbind.na(pixl.df[,c("Sol","Lat","Lon","Type","Campaign","Abrasion","Name","Location")], lithology.df[,c("Sample","SampleType")])
# Reordering it
sample_meta.df <- sample_meta.df[,c("Sample","Name","Sol","Lat","Lon","Abrasion","Campaign","Type","SampleType")]
# Changing atmospherics type from "N/A" to "Atmospheric"
sample_meta.df[1,"Type"] <- "Atmospheric"
```
# Seperating Libs
Separating out earth reference Libs from normal Libs
```{r}
# Creating a data frame with only the Scct Libs data
libs_earth_references.df <- libs.df[libs_type,]
# Removing the Scct Libs data from Libs.df
libs.df <- libs.df[!libs_type,]
```
# Adding LIBS clusters
Performs k-means and saves clusters
```{r}
libs_ternary <- libs.df %>% select(c(SiO2, Al2O3, FeOT, MgO, CaO, Na2O, K2O))%>%
mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>%
select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O)) %>%
drop_na()
set.seed(10)
k <- 4
tern.km <- kmeans(libs_ternary, k)
Cluster <- as.factor(tern.km$cluster)
libs.df <- cbind(libs.df, Cluster)
```
# Reordering Columns and removing meta data from PIXL, Lithology, LIBS, & Sherloc
```{r}
# Resorting PIXL columns
pixl.df <- pixl.df[,c("Sample",
"SiO2","TiO2","Al2O3","FeOT","MgO","CaO","Na2O","K2O", #These show up in LIBS
"P2O5","SO3","Cl","Cr2O3","MnO" #These ones don't show up in LIBS
)]
# Resorting LIBS columns
libs.df <- libs.df[,c("Target","Point","Sol","Lat","Lon","Type","Cluster",
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
"Total","distance_mm","Tot.Em.")]
# Resorting LIBS columns
libs_earth_references.df <- libs_earth_references.df[,c("Target","Point","Sol","Lat","Lon","Type",
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
"Total","distance_mm","Tot.Em.")]
# Resorting Lithology columns
lithology.df <- lithology.df[1:16,c("Sample",
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
"Disordered Silicates","Feldspar","Quartz","Apatite",
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")]
# Resorting Sherloc columns
sherloc.df <- sherloc.df[,c("Sample",
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
"Disordered Silicates","Feldspar","Quartz","Apatite",
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")]
# Resorting Pixl and Libs combined data set
pixl_libs.df <- pixl_libs.df[,c("Target.libs","Point.libs","Sol.libs","Lat.libs","Lon.libs",
"Distance",
"Abrasion.pixl","Campaign.pixl","Lat.pixl","Lon.pixl",
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs")]
```
# Fixing data types
Check types and fix them (ex Sample, Sol, Lat, Lon -> numeric, Name -> character, Abrasion, Campaign, Type, SampleType -> Factor)
```{r}
# Pixl
## Already good!
## Sample is integer and concentrations are numeric!
# Libs
libs.df$Point <- as.factor(libs.df$Point) # Was originally "character"
# Lithology
lithology.df[,2:36] <- lapply(lithology.df[,2:36],as.factor) # Was originally "character"
lithology.df$Sample <- as.integer(lithology.df$Sample) #To match Pixl
# Sherloc
sherloc.df[] <- data.frame(lapply(sherloc.df[],as.factor)) # Was originally "character"
sherloc.df$Sample <- as.integer(sherloc.df$Sample) # Back to original, since prior line changed it
# Sample Meta
sample_meta.df$Sample <- as.integer(sample_meta.df$Sample)
# sample_meta.df$Name <- as.character(sample_meta.df$Name) # Already in the format!
sample_meta.df$Sol <- as.numeric(sample_meta.df$Sol)
sample_meta.df$Lat <- as.numeric(sample_meta.df$Lat)
sample_meta.df$Lon <- as.numeric(sample_meta.df$Lon)
sample_meta.df$Abrasion <- as.factor(sample_meta.df$Abrasion)
sample_meta.df$Campaign <- as.factor(sample_meta.df$Campaign)
sample_meta.df$Type <- as.factor(sample_meta.df$Type)
sample_meta.df$SampleType <- as.factor(sample_meta.df$SampleType)
# Pixl and Libs combined
## Already good!
```
# Saving New data frames
```{r}
saveRDS(sample_meta.df, "v1_sample_meta.Rds")
saveRDS(libs.df, "v1_libs.Rds")
saveRDS(libs_earth_references.df, "v1_libs_earth_references.Rds")
saveRDS(lithology.df, "v1_lithology.Rds")
saveRDS(sherloc.df, "v1_sherloc.Rds")
saveRDS(pixl.df, "v1_pixl.Rds")
saveRDS(pixl_libs.df, "v1_libs_to_sample.Rds")
```