From 107f82bb28bb7204d2fb670d42f52658047624f5 Mon Sep 17 00:00:00 2001
From: compta Mars 2020 Mission Data Notebook:
DAR Assignment 2 (Fall 2024)
Ashton Compton
-04 September 2024
+11 September 2024
@@ -2920,26 +2920,259 @@ 4 Analysis of Data (Part
exact same analysis. If you want to use the same clustering for your
team (which is okay but then vary rest), make sure you use the same
random seeds.
I worked on H
Describe the data set contained in the data frame and
+
Scale this data appropriately (you can choose the scaling -method or decide to not scale data): Explain why you chose a -scaling method or to not scale. (3 pts)
Cluster the data using k-means or your favorite clustering +samples? (3 pts)
There are 16 rows of data. 10 features are metadata, also they are +duplicates. That leaves 89 features that are measurements.
+We should scale the data for clustering (maybe future PCA). Let’s use +scale()
+sherloc_lithology_pixl_scaled.matrix <- scale(sherloc_lithology_pixl.matrix)
+summary(sherloc_lithology_pixl_scaled.matrix)
+## Plagioclase Sulfate Ca-sulfate Hydrated Ca-sulfate
+## Min. :-0.4651 Min. :-1.4133 Min. :-0.7264 Min. :-0.366
+## 1st Qu.:-0.4651 1st Qu.:-1.0095 1st Qu.:-0.7264 1st Qu.:-0.366
+## Median :-0.4651 Median : 0.7403 Median :-0.7264 Median :-0.366
+## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
+## 3rd Qu.:-0.4651 3rd Qu.: 0.7403 3rd Qu.: 1.3867 3rd Qu.:-0.366
+## Max. : 2.0156 Max. : 0.7403 Max. : 1.3867 Max. : 2.562
+##
+## Mg-sulfate Hydrated Sulfates Hydrated Mg-Fe sulfate Perchlorates
+## Min. :-0.4651 Min. :-0.366 Min. :-0.4651 Min. :-0.25
+## 1st Qu.:-0.4651 1st Qu.:-0.366 1st Qu.:-0.4651 1st Qu.:-0.25
+## Median :-0.4651 Median :-0.366 Median :-0.4651 Median :-0.25
+## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.00
+## 3rd Qu.:-0.4651 3rd Qu.:-0.366 3rd Qu.:-0.4651 3rd Qu.:-0.25
+## Max. : 2.0156 Max. : 2.562 Max. : 2.0156 Max. : 3.75
+##
+## Na-perchlorate Amorphous Silicate Phosphate Pyroxene
+## Min. :-0.25 Min. :-0.7733 Min. :-0.5704 Min. :-1.4361
+## 1st Qu.:-0.25 1st Qu.:-0.7733 1st Qu.:-0.5704 1st Qu.:-1.4361
+## Median :-0.25 Median :-0.7733 Median :-0.5704 Median : 0.6528
+## Mean : 0.00 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.:-0.25 3rd Qu.: 0.6014 3rd Qu.: 0.3071 3rd Qu.: 0.6528
+## Max. : 3.75 Max. : 1.9761 Max. : 2.2376 Max. : 0.6528
+##
+## Olivine Carbonate Fe-Mg carbonate Hydrated Carbonates
+## Min. :-1.0830 Min. :-1.9823 Min. :-0.366 Min. : NA
+## 1st Qu.:-1.0830 1st Qu.:-0.8014 1st Qu.:-0.366 1st Qu.: NA
+## Median : 0.1911 Median : 0.7170 Median :-0.366 Median : NA
+## Mean : 0.0000 Mean : 0.0000 Mean : 0.000 Mean :NaN
+## 3rd Qu.: 0.9556 3rd Qu.: 0.7170 3rd Qu.:-0.366 3rd Qu.: NA
+## Max. : 0.9556 Max. : 0.7170 Max. : 2.562 Max. : NA
+## NA's :16
+## Disordered Silicates Feldspar Quartz Apatite
+## Min. :-0.366 Min. :-0.366 Min. :-0.366 Min. :-0.4122
+## 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.4122
+## Median :-0.366 Median :-0.366 Median :-0.366 Median :-0.4122
+## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
+## 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.4122
+## Max. : 2.562 Max. : 2.562 Max. : 2.562 Max. : 2.5188
+##
+## FeTi oxides Halite Iron oxide Hydrated Iron oxide
+## Min. :-0.4122 Min. :-0.4651 Min. :-0.7092 Min. :-0.25
+## 1st Qu.:-0.4122 1st Qu.:-0.4651 1st Qu.:-0.7092 1st Qu.:-0.25
+## Median :-0.4122 Median :-0.4651 Median :-0.7092 Median :-0.25
+## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00
+## 3rd Qu.:-0.4122 3rd Qu.:-0.4651 3rd Qu.: 0.5516 3rd Qu.:-0.25
+## Max. : 2.5188 Max. : 2.0156 Max. : 1.8123 Max. : 3.75
+##
+## Organic matter Sulfate+Organic matter Other hydrated phases
+## Min. :-1.2319 Min. :-0.55156 Min. :-0.9139
+## 1st Qu.:-1.2319 1st Qu.:-0.55156 1st Qu.:-0.9139
+## Median : 0.8429 Median :-0.55156 Median :-0.3917
+## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
+## 3rd Qu.: 0.8429 3rd Qu.: 0.07879 3rd Qu.: 1.1750
+## Max. : 0.8429 Max. : 1.96987 Max. : 1.1750
+##
+## Phyllosilicates Chlorite Kaolinite (hydrous Al-clay)
+## Min. :-0.5217 Min. :-0.366 Min. :-0.4651
+## 1st Qu.:-0.5217 1st Qu.:-0.366 1st Qu.:-0.4651
+## Median :-0.5217 Median :-0.366 Median :-0.4651
+## Mean : 0.0000 Mean : 0.000 Mean : 0.0000
+## 3rd Qu.:-0.1739 3rd Qu.:-0.366 3rd Qu.:-0.4651
+## Max. : 2.2607 Max. : 2.562 Max. : 2.0156
+##
+## Chromite Ilmenite Zircon/Baddeleyite Fe-Mg-clay minerals
+## Min. :-0.366 Min. :-0.366 Min. :-0.366 Min. :-0.4651
+## 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.4651
+## Median :-0.366 Median :-0.366 Median :-0.366 Median :-0.4651
+## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
+## 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.4651
+## Max. : 2.562 Max. : 2.562 Max. : 2.562 Max. : 2.0156
+##
+## Spinels feldspar plagioclase pyroxene
+## Min. :-0.366 Min. :-0.366 Min. :-0.4651 Min. :-1.4361
+## 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.4651 1st Qu.:-1.4361
+## Median :-0.366 Median :-0.366 Median :-0.4651 Median : 0.6528
+## Mean : 0.000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.4651 3rd Qu.: 0.6528
+## Max. : 2.562 Max. : 2.562 Max. : 2.0156 Max. : 0.6528
+##
+## olivine quartz apatite FeTi_Oxides
+## Min. :-1.25 Min. :-0.366 Min. :-0.4651 Min. :-0.4651
+## 1st Qu.:-1.25 1st Qu.:-0.366 1st Qu.:-0.4651 1st Qu.:-0.4651
+## Median : 0.75 Median :-0.366 Median :-0.4651 Median :-0.4651
+## Mean : 0.00 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.: 0.75 3rd Qu.:-0.366 3rd Qu.:-0.4651 3rd Qu.:-0.4651
+## Max. : 0.75 Max. : 2.562 Max. : 2.0156 Max. : 2.0156
+##
+## Iron_Oxide Sulfate Perchlorates Phosphate
+## Min. :-0.8539 Min. :-1.677 Min. :-0.25 Min. :-0.6528
+## 1st Qu.:-0.8539 1st Qu.: 0.000 1st Qu.:-0.25 1st Qu.:-0.6528
+## Median :-0.8539 Median : 0.559 Median :-0.25 Median :-0.6528
+## Mean : 0.0000 Mean : 0.000 Mean : 0.00 Mean : 0.0000
+## 3rd Qu.: 1.0979 3rd Qu.: 0.559 3rd Qu.:-0.25 3rd Qu.: 1.4361
+## Max. : 1.0979 Max. : 0.559 Max. : 3.75 Max. : 1.4361
+##
+## Ca_Sulfate Carbonate Fe_Mg_clay Fe_Mg_carbonate
+## Min. :-0.75 Min. :-3.75 Min. :-0.4651 Min. :-0.366
+## 1st Qu.:-0.75 1st Qu.: 0.25 1st Qu.:-0.4651 1st Qu.:-0.366
+## Median :-0.75 Median : 0.25 Median :-0.4651 Median :-0.366
+## Mean : 0.00 Mean : 0.00 Mean : 0.0000 Mean : 0.000
+## 3rd Qu.: 1.25 3rd Qu.: 0.25 3rd Qu.:-0.4651 3rd Qu.:-0.366
+## Max. : 1.25 Max. : 0.25 Max. : 2.0156 Max. : 2.562
+##
+## Mg_sulfate Phyllosilicates Chlorite Halite
+## Min. :-0.4651 Min. :-0.559 Min. :-0.366 Min. :-0.4651
+## 1st Qu.:-0.4651 1st Qu.:-0.559 1st Qu.:-0.366 1st Qu.:-0.4651
+## Median :-0.4651 Median :-0.559 Median :-0.366 Median :-0.4651
+## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
+## 3rd Qu.:-0.4651 3rd Qu.: 0.000 3rd Qu.:-0.366 3rd Qu.:-0.4651
+## Max. : 2.0156 Max. : 1.677 Max. : 2.562 Max. : 2.0156
+##
+## Organic_matter Hydrated_Ca_Sulfate Hydrated_Sulfates Hydrated_Mg_Fe_Sulfate
+## Min. :-1.4361 Min. :-0.366 Min. :-0.366 Min. :-0.4651
+## 1st Qu.:-1.4361 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.4651
+## Median : 0.6528 Median :-0.366 Median :-0.366 Median :-0.4651
+## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
+## 3rd Qu.: 0.6528 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.4651
+## Max. : 0.6528 Max. : 2.562 Max. : 2.562 Max. : 2.0156
+##
+## Na_Perchlorate Amorphous_Silicate Hydrated_Carbonates Disordered_Silicates
+## Min. :-0.25 Min. :-0.8539 Min. : NA Min. :-0.366
+## 1st Qu.:-0.25 1st Qu.:-0.8539 1st Qu.: NA 1st Qu.:-0.366
+## Median :-0.25 Median :-0.8539 Median : NA Median :-0.366
+## Mean : 0.00 Mean : 0.0000 Mean :NaN Mean : 0.000
+## 3rd Qu.:-0.25 3rd Qu.: 1.0979 3rd Qu.: NA 3rd Qu.:-0.366
+## Max. : 3.75 Max. : 1.0979 Max. : NA Max. : 2.562
+## NA's :16
+## Hydrated_Iron_Oxide Sulfate+Organic_Matter Other_hydrated_phases
+## Min. :-0.25 Min. :-0.6528 Min. :-0.9682
+## 1st Qu.:-0.25 1st Qu.:-0.6528 1st Qu.:-0.9682
+## Median :-0.25 Median :-0.6528 Median : 0.0000
+## Mean : 0.00 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.:-0.25 3rd Qu.: 1.4361 3rd Qu.: 0.9682
+## Max. : 3.75 Max. : 1.4361 Max. : 0.9682
+##
+## Kaolinite Chromite Ilmenite Zircon/Baddeleyite
+## Min. :-0.4651 Min. :-0.366 Min. :-0.366 Min. :-0.366
+## 1st Qu.:-0.4651 1st Qu.:-0.366 1st Qu.:-0.366 1st Qu.:-0.366
+## Median :-0.4651 Median :-0.366 Median :-0.366 Median :-0.366
+## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.000
+## 3rd Qu.:-0.4651 3rd Qu.:-0.366 3rd Qu.:-0.366 3rd Qu.:-0.366
+## Max. : 2.0156 Max. : 2.562 Max. : 2.562 Max. : 2.562
+##
+## Spinels Na20 Mgo Al203
+## Min. :-0.366 Min. :-1.1206 Min. :-1.3765 Min. :-0.8991
+## 1st Qu.:-0.366 1st Qu.:-0.5494 1st Qu.:-1.1500 1st Qu.:-0.7605
+## Median :-0.366 Median :-0.5176 Median : 0.1404 Median :-0.3631
+## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.:-0.366 3rd Qu.: 1.2245 3rd Qu.: 0.9322 3rd Qu.: 0.5455
+## Max. : 2.562 Max. : 1.9280 Max. : 1.3847 Max. : 1.7407
+##
+## Si02 P205 S03 Cl
+## Min. :-1.44691 Min. :-0.7945 Min. :-0.6389 Min. :-1.11146
+## 1st Qu.:-0.66513 1st Qu.:-0.5999 1st Qu.:-0.5433 1st Qu.:-0.69646
+## Median : 0.02732 Median :-0.1820 Median :-0.3957 Median :-0.08165
+## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
+## 3rd Qu.: 0.23751 3rd Qu.: 0.2720 3rd Qu.:-0.2354 3rd Qu.: 0.17964
+## Max. : 1.68204 Max. : 3.0392 Max. : 2.1333 Max. : 2.03944
+##
+## K20 Cao Ti02 Cr203
+## Min. :-0.8190 Min. :-1.1256 Min. :-1.1321 Min. :-0.5791
+## 1st Qu.:-0.5931 1st Qu.:-0.5313 1st Qu.:-0.4192 1st Qu.:-0.5383
+## Median :-0.5366 Median :-0.2920 Median :-0.2182 Median :-0.3263
+## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.: 0.3495 3rd Qu.: 0.3203 3rd Qu.: 0.3119 3rd Qu.:-0.1060
+## Max. : 1.8640 Max. : 2.1006 Max. : 3.0535 Max. : 2.5204
+##
+## Mno FeO-T
+## Min. :-1.5772 Min. :-1.3848
+## 1st Qu.:-0.5678 1st Qu.:-0.7989
+## Median : 0.1051 Median : 0.4077
+## Mean : 0.0000 Mean : 0.0000
+## 3rd Qu.: 0.6099 3rd Qu.: 0.7173
+## Max. : 1.7314 Max. : 1.4512
+##
+#Prepare matrix for cluster plot
+sherloc_lithology_pixl_scaled.matrix <- sherloc_lithology_pixl_scaled.matrix[, -16]
+sherloc_lithology_pixl_scaled.matrix <- sherloc_lithology_pixl_scaled.matrix[, -60]
+
+# s_l_p_scaled.df <- data.frame(sherloc_lithology_pixl_scaled.matrix)
+# Sample <- 1:16
+# s_l_p_scaled.df <- cbind(Sample,s_l_p_scaled.df)
+#
+# ggplot(sherloc_lithology_pixl_scaled.df, aes(x=Sample), colour = Sample) +
+# geom_line()
+_Perform a creative analysis that provides -insights into what one or more of the clusters are and what they tell -you about the MARS data: Alternatively do another creative analysis of -your datasets that leads to one of more findings. Make sure to explain -what your analysis and discuss your the results.
wssplot <- function(data, nc = 15, seed =10) {
+ wss <- data.frame(cluster=1:nc, quality=c(0))
+ for (i in 1:nc){
+ set.seed(seed)
+ wss[i,2] <- kmeans(data, centers=i)$tot.withinss}
+ ggplot(data=wss,aes(x=cluster,y=quality)) +
+ geom_line() +
+ ggtitle("Quality of k-means by Cluster")
+}
+
+wssplot(sherloc_lithology_pixl_scaled.matrix, nc=8, seed=2469)
+
+#Select 4 clusters based on plot
+
+kmean <- kmeans(sherloc_lithology_pixl_scaled.matrix, centers=4)
+#Make 2 heatmaps, look for connections
+pheatmap(kmean$centers[,1:41],scale="none")
+
+pheatmap(kmean$centers[,42:81],scale="none")
+
+# H.pca <- prcomp(sherloc_lithology_pixl_scaled.matrix,scale=FALSE)
+#
+# ggbiplot::ggbiplot(H.pca,
+# groups = as.factor(kmean$cluster),varname.size=1, var.axes = 0)+
+# xlim(-3,3) + ylim(-3,3)
+
+
+#Determine Cluster sizes
+kmean[["cluster"]]
+## [1] 3 2 2 4 4 4 4 4 4 4 4 1 1 4 4 1
+#1: 3 Samples
+#2: 2 Samples
+#3: 1 Sample
+#4: 10 Samples