Permalink
October 16, 2024 17:19
December 5, 2024 11:52
October 16, 2024 17:19
December 14, 2024 18:22
October 16, 2024 17:19
October 16, 2024 17:19
October 23, 2024 12:54
October 16, 2024 17:19
October 16, 2024 17:19
November 20, 2024 11:20
December 5, 2024 11:52
October 16, 2024 17:19
November 20, 2024 11:53
October 16, 2024 17:19
October 22, 2024 19:32
October 16, 2024 17:19
December 5, 2024 11:52
October 16, 2024 17:19
November 20, 2024 11:20
October 16, 2024 17:19
December 5, 2024 11:52
October 16, 2024 17:19
October 16, 2024 17:19
October 23, 2024 12:54
November 20, 2024 11:53
October 16, 2024 17:19
November 13, 2024 12:04
October 22, 2024 19:32
October 23, 2024 16:10
October 22, 2024 19:32
October 16, 2024 17:19
October 16, 2024 17:19
October 16, 2024 17:19
Newer
100644
226 lines (198 sloc)
10.7 KB
1
This notebook takes our four main data sets (PIXL, LIBS, SHERLOC, & LITHOLOGY) and gives them a consistent naming scheme for their columns.
2
3
```{r setup, include=FALSE}
4
# Set the default CRAN repository
5
local({r <- getOption("repos")
6
r["CRAN"] <- "http://cran.r-project.org"
7
options(repos=r)
8
})
9
10
if (!require("tidyr")) {
11
install.packages("tidyr")
12
library(qpcR)
13
}
14
if (!require("dplyr")) {
15
install.packages("dplyr")
16
library(qpcR)
17
}
18
if (!require("qpcR")) {
19
install.packages("qpcR")
20
library(qpcR)
21
}
22
```
23
24
# Importing data frames
25
```{r}
26
# Importing PIXL
27
#pixl.df <- readRDS("~/DAR-Mars-F24/Data/samples_pixl_wide.Rds") #Old PIXL, missing Lat and Lon
28
pixl.df <- readRDS("~/DAR-Mars-F24/StudentData/pixl_sol_coordinates.Rds")
29
30
# Importing LIBS
31
libs.df <- readRDS("~/DAR-Mars-F24/Data/supercam_libs_moc_loc.Rds")
33
34
# Importing Lithology
35
lithology.df<- readRDS("~/DAR-Mars-F24/Data/mineral_data_static.Rds")
36
37
# Importing and reformating Sherloc
38
sherloc.df <- readRDS("~/DAR-Mars-F24/Data/abrasions_sherloc_samples.Rds")
39
## Clean up data types
40
sherloc.df$Mineral <- as.factor(sherloc.df$Mineral)
41
sherloc.df[sapply(sherloc.df, is.character)] <- lapply(sherloc.df[sapply(sherloc.df, is.character)],as.numeric)
42
## Change N/A's to "0"
43
sherloc.df <- sherloc.df %>% replace(is.na(.), 0)
44
## Reformat data so that rows are "abrasions" and columns list the presence of minerals.
45
## Do this by "pivoting" to a long format, and then back to the desired wide format.
46
sherloc.df <- sherloc.df %>% pivot_longer(!Mineral, names_to = "Name", values_to = "Presence")
47
## Make abrasion a factor
48
sherloc.df$Name <- as.factor(sherloc.df$Name)
49
## Make it a matrix
50
sherloc.matrix <- sherloc.df %>% pivot_wider(names_from = Mineral, values_from = Presence)
51
sherloc.df <- cbind(sherloc.matrix,pixl.df[,"sample"])
52
53
# pixl and libs combined data frame
54
pixl_libs.df <- readRDS("PIXL_LIBS_Combined.Rds")
55
```
56
57
# Renaming Columns
58
```{r}
59
# Renaming PIXL names
60
colnames(pixl.df) <- c("Lat","Lon","Sol","Sample",
61
"Na2O","MgO","Al2O3","SiO2","P2O5","SO3","Cl","K2O","CaO","TiO2","Cr2O3","MnO","FeOT",
62
"Name","Type","Campaign","Location","Abrasion")
63
# Renaming LIBS
65
colnames(libs.df) <- c("Sol","Lat","Lon","Target","Point",
66
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
69
libs_type <- libs_type.df$"earthsample?"
70
libs_type <- as.logical(libs_type) # Was originally "numeric"
72
# Renaming Lithology
73
colnames(lithology.df) <- c("Sample","Name","SampleType","Campaign","Abrasion",
74
"Feldspar","Plagioclase","Pyroxene","Olivine","Quartz",
75
"Apatite","FeTi oxides","Iron oxide","Sulfate","Perchlorates",
76
"Phosphate","Ca-sulfate","Carbonate","Fe-Mg-clay minerals","Fe-Mg carbonate",
77
"Mg-sulfate","Phyllosilicates","Chlorite","Halite","Organic matter",
78
"Hydrated Ca-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Na-perchlorate","Amorphous Silicate",
79
"Hydrated Carbonates","Disordered Silicates","Hydrated Iron oxide","Sulfate+Organic matter","Other hydrated phases",
80
"Kaolinite (hydrous Al-clay)","Chromite","Ilmenite","Zircon/Baddeleyite","Spinels")
81
# Renaming Sherloc
82
colnames(sherloc.df) <- c("Name",
83
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
84
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
85
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
86
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
87
"Disordered Silicates","Feldspar","Quartz","Apatite",
88
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
89
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
90
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
91
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels","Sample")
94
colnames(pixl_libs.df) <- c("Target.libs","Lat.libs","Lon.libs","Sol.libs","Point.libs",
95
"Distance",
96
"Abrasion.pixl","Lat.pixl","Lon.pixl","Campaign.pixl",
97
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs")
98
```
99
100
# Creating Sample metadata data frame
101
```{r}
102
# Creating meta data data frame
103
sample_meta.df <- qpcR:::cbind.na(pixl.df[,c("Sol","Lat","Lon","Type","Campaign","Abrasion","Name","Location")], lithology.df[,c("Sample","SampleType")])
104
105
# Reordering it
106
sample_meta.df <- sample_meta.df[,c("Sample","Name","Sol","Lat","Lon","Abrasion","Campaign","Type","SampleType")]
107
108
# Changing atmospherics type from "N/A" to "Atmospheric"
109
sample_meta.df[1,"Type"] <- "Atmospheric"
110
```
111
112
# Seperating Libs
113
Separating out earth reference Libs from normal Libs
114
```{r}
115
# Creating a data frame with only the Scct Libs data
116
libs_earth_references.df <- libs.df[libs_type,]
117
118
# Removing the Scct Libs data from Libs.df
119
libs.df <- libs.df[!libs_type,]
120
```
121
122
# Adding LIBS clusters
123
Performs k-means and saves clusters
124
```{r}
125
libs_ternary <- libs.df %>% select(c(SiO2, Al2O3, FeOT, MgO, CaO, Na2O, K2O))%>%
126
mutate(x=(SiO2+Al2O3)/100,y=(FeOT+MgO)/100,z=(CaO+Na2O+K2O)/100) %>%
127
select(-c(SiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O)) %>%
128
drop_na()
129
130
set.seed(10)
131
k <- 4
132
tern.km <- kmeans(libs_ternary, k)
133
Cluster <- as.factor(tern.km$cluster)
135
```
136
137
# Reordering Columns and removing meta data from PIXL, Lithology, LIBS, & Sherloc
138
```{r}
139
# Resorting PIXL columns
140
pixl.df <- pixl.df[,c("Sample",
141
"SiO2","TiO2","Al2O3","FeOT","MgO","CaO","Na2O","K2O", #These show up in LIBS
142
"P2O5","SO3","Cl","Cr2O3","MnO" #These ones don't show up in LIBS
143
)]
144
# Resorting LIBS columns
145
libs.df <- libs.df[,c("Target","Point","Sol","Lat","Lon","Type","Cluster",
146
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
147
"Total","distance_mm","Tot.Em.")]
148
# Resorting LIBS columns
149
libs_earth_references.df <- libs_earth_references.df[,c("Target","Point","Sol","Lat","Lon","Type",
150
"SiO2","SiO2_stdev","TiO2","TiO2_stdev","Al2O3","Al2O3_stdev","FeOT","FeOT_stdev","MgO","MgO_stdev","CaO","CaO_stdev","Na2O","Na2O_stdev","K2O","K2O_stdev",
151
"Total","distance_mm","Tot.Em.")]
152
# Resorting Lithology columns
154
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
155
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
156
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
157
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
158
"Disordered Silicates","Feldspar","Quartz","Apatite",
159
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
160
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
161
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
162
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")]
163
# Resorting Sherloc columns
164
sherloc.df <- sherloc.df[,c("Sample",
165
"Plagioclase","Sulfate","Ca-sulfate","Hydrated Ca-sulfate",
166
"Mg-sulfate","Hydrated Sulfates","Hydrated Mg-Fe sulfate","Perchlorates",
167
"Na-perchlorate","Amorphous Silicate","Phosphate","Pyroxene",
168
"Olivine","Carbonate","Fe-Mg carbonate","Hydrated Carbonates",
169
"Disordered Silicates","Feldspar","Quartz","Apatite",
170
"FeTi oxides","Halite","Iron oxide","Hydrated Iron oxide",
171
"Organic matter","Sulfate+Organic matter","Other hydrated phases","Phyllosilicates",
172
"Chlorite","Kaolinite (hydrous Al-clay)","Chromite","Ilmenite",
173
"Zircon/Baddeleyite","Fe-Mg-clay minerals","Spinels")]
174
175
# Resorting Pixl and Libs combined data set
176
pixl_libs.df <- pixl_libs.df[,c("Target.libs","Point.libs","Sol.libs","Lat.libs","Lon.libs",
177
"Distance",
178
"Abrasion.pixl","Campaign.pixl","Lat.pixl","Lon.pixl",
179
"SiO2.libs","TiO2.libs","Al2O3.libs","FeOT.libs","MgO.libs","CaO.libs","Na2O.libs","K2O.libs")]
181
```
182
184
Check types and fix them (ex Sample, Sol, Lat, Lon -> numeric, Name -> character, Abrasion, Campaign, Type, SampleType -> Factor)
185
```{r}
186
# Pixl
187
## Already good!
188
## Sample is integer and concentrations are numeric!
189
190
# Libs
191
libs.df$Point <- as.factor(libs.df$Point) # Was originally "character"
192
193
# Lithology
194
lithology.df[,2:36] <- lapply(lithology.df[,2:36],as.factor) # Was originally "character"
195
lithology.df$Sample <- as.integer(lithology.df$Sample) #To match Pixl
196
197
# Sherloc
198
sherloc.df[] <- data.frame(lapply(sherloc.df[],as.factor)) # Was originally "character"
199
sherloc.df$Sample <- as.integer(sherloc.df$Sample) # Back to original, since prior line changed it
200
201
# Sample Meta
202
sample_meta.df$Sample <- as.integer(sample_meta.df$Sample)
203
# sample_meta.df$Name <- as.character(sample_meta.df$Name) # Already in the format!
204
sample_meta.df$Sol <- as.numeric(sample_meta.df$Sol)
205
sample_meta.df$Lat <- as.numeric(sample_meta.df$Lat)
206
sample_meta.df$Lon <- as.numeric(sample_meta.df$Lon)
207
sample_meta.df$Abrasion <- as.factor(sample_meta.df$Abrasion)
208
sample_meta.df$Campaign <- as.factor(sample_meta.df$Campaign)
209
sample_meta.df$Type <- as.factor(sample_meta.df$Type)
210
sample_meta.df$SampleType <- as.factor(sample_meta.df$SampleType)
211
217
# Saving New data frames
218
```{r}
219
saveRDS(sample_meta.df, "v1_sample_meta.Rds")
220
saveRDS(libs.df, "v1_libs.Rds")
222
saveRDS(lithology.df, "v1_lithology.Rds")
223
saveRDS(sherloc.df, "v1_sherloc.Rds")
224
saveRDS(pixl.df, "v1_pixl.Rds")
226
```