Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DAR-Mars-F24/StudentData/v1_consistent_data_naming.Rmd
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
226 lines (198 sloc)
10.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This notebook takes our four main data sets (PIXL, LIBS, SHERLOC, & LITHOLOGY) and gives them a consistent naming scheme for their columns. | |
```{r setup, include=FALSE} | |
# Set the default CRAN repository | |
local({r <- getOption("repos") | |
r["CRAN"] <- "http://cran.r-project.org" | |
options(repos=r) | |
}) | |
if (!require("tidyr")) { | |
install.packages("tidyr") | |
library(qpcR) | |
} | |
if (!require("dplyr")) { | |
install.packages("dplyr") | |
library(qpcR) | |
} | |
if (!require("qpcR")) { | |
install.packages("qpcR") | |
library(qpcR) | |
} | |
``` | |
# Importing data frames | |
```{r} | |
# Importing PIXL | |
#pixl.df <- readRDS("~/DAR-Mars-F24/Data/samples_pixl_wide.Rds") #Old PIXL, missing Lat and Lon | |
pixl.df <- readRDS("~/DAR-Mars-F24/StudentData/pixl_sol_coordinates.Rds") | |
# Importing LIBS | |
libs.df <- readRDS("~/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds") | |
libs_type.df <- readRDS("~/DAR-Mars-F24/StudentData/libs_typed.Rds") | |
# Importing Lithology | |
lithology.df<- readRDS("~/DAR-Mars-F24/Data/mineral_data_static.Rds") | |
# Importing and reformating Sherloc | |
sherloc.df <- readRDS("~/DAR-Mars-F24/Data/abrasions_sherloc_samples.Rds") | |
## Clean up data types | |
sherloc.df$Mineral <- as.factor(sherloc.df$Mineral) | |
sherloc.df[sapply(sherloc.df, is.character)] <- lapply(sherloc.df[sapply(sherloc.df, is.character)],as.numeric) | |
## Change N/A's to "0" | |
sherloc.df <- sherloc.df %>% replace(is.na(.), 0) | |
## Reformat data so that rows are "abrasions" and columns list the presence of minerals. | |
## Do this by "pivoting" to a long format, and then back to the desired wide format. | |
sherloc.df <- sherloc.df %>% pivot_longer(!Mineral, names_to = "Name", values_to = "Presence") | |
## Make abrasion a factor | |
sherloc.df$Name <- as.factor(sherloc.df$Name) | |
## Make it a matrix | |
sherloc.matrix <- sherloc.df %>% pivot_wider(names_from = Mineral, values_from = Presence) | |
sherloc.df <- cbind(sherloc.matrix,pixl.df[,"sample"]) | |
# pixl and libs combined data frame | |
pixl_libs.df <- readRDS("PIXL_LIBS_Combined.Rds") | |
``` | |
# Renaming Columns | |
```{r} | |
# Renaming PIXL names | |
colnames(pixl.df) <- c("Lat","Lon","Sol","Sample", | |
"Na2O","MgO","Al2O3","SiO2","P2O5","SO3","Cl","K2O","CaO","TiO2","Cr2O3","MnO","FeOT", | |
"Name","Type","Campaign","Location","Abrasion") | |
# Renaming LIBS | |
libs.df <- cbind(libs.df,libs_type.df$"type") | |
colnames(libs.df) <- c("Sol","Lat","Lon","Target","Point", | |
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev", | |
"Total","distance_mm","Tot.Em.", | |
"Type") | |
libs_type <- libs_type.df$"earthsample?" | |
libs_type <- as.logical(libs_type) # Was originally "numeric" | |
# Renaming Lithology | |
colnames(lithology.df) <- c("Sample","Name","SampleType","Campaign","Abrasion", | |
"Feldspar","Plagioclase","Pyroxene","Olivine","Quartz", | |
"Apatite","FeTi oxides","Iron oxide","Sulfate","Perchlorates", | |
"Phosphate","Ca-sulfate","Carbonate","Fe-Mg-clay minerals","Fe-Mg carbonate", | |
"Mg-sulfate","Phyllosilicates","Chlorite","Halite","Organic matter", | |
"Hydrated Ca-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Na-perchlorate","Amorphous Silicate", | |
"Hydrated Carbonates","Disordered Silicates","Hydrated Iron oxide","Sulfate+Organic matter","Other hydrated phases", | |
"Kaolinite (hydrous Al-clay)","Chromite","Ilmenite","Zircon/Baddeleyite","Spinels") | |
# Renaming Sherloc | |
colnames(sherloc.df) <- c("Name", | |
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate", | |
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates", | |
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene", | |
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates", | |
"Disordered Silicates","Feldspar","Quartz","Apatite", | |
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide", | |
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates", | |
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite", | |
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels","Sample") | |
# Renaming Pixl and Libs combined data set | |
colnames(pixl_libs.df) <- c("Target.libs","Lat.libs","Lon.libs","Sol.libs","Point.libs", | |
"Distance", | |
"Abrasion.pixl","Lat.pixl","Lon.pixl","Campaign.pixl", | |
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs") | |
``` | |
# Creating Sample metadata data frame | |
```{r} | |
# Creating meta data data frame | |
sample_meta.df <- qpcR:::cbind.na(pixl.df[,c("Sol","Lat","Lon","Type","Campaign","Abrasion","Name","Location")], lithology.df[,c("Sample","SampleType")]) | |
# Reordering it | |
sample_meta.df <- sample_meta.df[,c("Sample","Name","Sol","Lat","Lon","Abrasion","Campaign","Type","SampleType")] | |
# Changing atmospherics type from "N/A" to "Atmospheric" | |
sample_meta.df[1,"Type"] <- "Atmospheric" | |
``` | |
# Seperating Libs | |
Separating out earth reference Libs from normal Libs | |
```{r} | |
# Creating a data frame with only the Scct Libs data | |
libs_earth_references.df <- libs.df[libs_type,] | |
# Removing the Scct Libs data from Libs.df | |
libs.df <- libs.df[!libs_type,] | |
``` | |
# Adding LIBS clusters | |
Performs k-means and saves clusters | |
```{r} | |
libs_ternary <- libs.df %>% select(c(SiO2, Al2O3, FeOT, MgO, CaO, Na2O, K2O))%>% | |
mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>% | |
select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O)) %>% | |
drop_na() | |
set.seed(10) | |
k <- 4 | |
tern.km <- kmeans(libs_ternary, k) | |
Cluster <- as.factor(tern.km$cluster) | |
libs.df <- cbind(libs.df, Cluster) | |
``` | |
# Reordering Columns and removing meta data from PIXL, Lithology, LIBS, & Sherloc | |
```{r} | |
# Resorting PIXL columns | |
pixl.df <- pixl.df[,c("Sample", | |
"SiO2","TiO2","Al2O3","FeOT","MgO","CaO","Na2O","K2O", #These show up in LIBS | |
"P2O5","SO3","Cl","Cr2O3","MnO" #These ones don't show up in LIBS | |
)] | |
# Resorting LIBS columns | |
libs.df <- libs.df[,c("Target","Point","Sol","Lat","Lon","Type","Cluster", | |
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev", | |
"Total","distance_mm","Tot.Em.")] | |
# Resorting LIBS columns | |
libs_earth_references.df <- libs_earth_references.df[,c("Target","Point","Sol","Lat","Lon","Type", | |
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev", | |
"Total","distance_mm","Tot.Em.")] | |
# Resorting Lithology columns | |
lithology.df <- lithology.df[1:16,c("Sample", | |
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate", | |
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates", | |
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene", | |
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates", | |
"Disordered Silicates","Feldspar","Quartz","Apatite", | |
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide", | |
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates", | |
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite", | |
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")] | |
# Resorting Sherloc columns | |
sherloc.df <- sherloc.df[,c("Sample", | |
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate", | |
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates", | |
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene", | |
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates", | |
"Disordered Silicates","Feldspar","Quartz","Apatite", | |
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide", | |
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates", | |
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite", | |
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")] | |
# Resorting Pixl and Libs combined data set | |
pixl_libs.df <- pixl_libs.df[,c("Target.libs","Point.libs","Sol.libs","Lat.libs","Lon.libs", | |
"Distance", | |
"Abrasion.pixl","Campaign.pixl","Lat.pixl","Lon.pixl", | |
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs")] | |
``` | |
# Fixing data types | |
Check types and fix them (ex Sample, Sol, Lat, Lon -> numeric, Name -> character, Abrasion, Campaign, Type, SampleType -> Factor) | |
```{r} | |
# Pixl | |
## Already good! | |
## Sample is integer and concentrations are numeric! | |
# Libs | |
libs.df$Point <- as.factor(libs.df$Point) # Was originally "character" | |
# Lithology | |
lithology.df[,2:36] <- lapply(lithology.df[,2:36],as.factor) # Was originally "character" | |
lithology.df$Sample <- as.integer(lithology.df$Sample) #To match Pixl | |
# Sherloc | |
sherloc.df[] <- data.frame(lapply(sherloc.df[],as.factor)) # Was originally "character" | |
sherloc.df$Sample <- as.integer(sherloc.df$Sample) # Back to original, since prior line changed it | |
# Sample Meta | |
sample_meta.df$Sample <- as.integer(sample_meta.df$Sample) | |
# sample_meta.df$Name <- as.character(sample_meta.df$Name) # Already in the format! | |
sample_meta.df$Sol <- as.numeric(sample_meta.df$Sol) | |
sample_meta.df$Lat <- as.numeric(sample_meta.df$Lat) | |
sample_meta.df$Lon <- as.numeric(sample_meta.df$Lon) | |
sample_meta.df$Abrasion <- as.factor(sample_meta.df$Abrasion) | |
sample_meta.df$Campaign <- as.factor(sample_meta.df$Campaign) | |
sample_meta.df$Type <- as.factor(sample_meta.df$Type) | |
sample_meta.df$SampleType <- as.factor(sample_meta.df$SampleType) | |
# Pixl and Libs combined | |
## Already good! | |
``` | |
# Saving New data frames | |
```{r} | |
saveRDS(sample_meta.df, "v1_sample_meta.Rds") | |
saveRDS(libs.df, "v1_libs.Rds") | |
saveRDS(libs_earth_references.df, "v1_libs_earth_references.Rds") | |
saveRDS(lithology.df, "v1_lithology.Rds") | |
saveRDS(sherloc.df, "v1_sherloc.Rds") | |
saveRDS(pixl.df, "v1_pixl.Rds") | |
saveRDS(pixl_libs.df, "v1_libs_to_sample.Rds") | |
``` |