Skip to content

Pathfinder (enrichment ComplexHeatmap) #80

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
187 changes: 168 additions & 19 deletions dev-notebooks/STRINGdb-Analysis.Rmd
Expand Up @@ -3,8 +3,8 @@ title: 'STRINGdb: A Database of Known and Predicted Protein-protein Interactions
author: "Co-adapted by Haowen He and Rachael White from the 2015 Vignette by Andrea Franceschini"
subtitle: IDEA Alzheimer's Data Analysis Bootcamp - Summer 2022
output:
html_document: default
pdf_document: default
html_document: default
always_allow_html: true
---

Expand Down Expand Up @@ -180,12 +180,12 @@ string_db$plot_network(hits, payload_id=payload_id)
## Clustering

```{r, fig.width=10, fig.height=10, fig.fullwidth=TRUE, fig.fullheight=TRUE}
clustersList <- string_db$get_clusters(example1_mapped$STRING_id[1:600])
#clustersList <- string_db$get_clusters(example1_mapped$STRING_id[1:600])
# plot first 4 clusters
par(mfrow=c(2,2))
for(i in seq(1:4)){
string_db$plot_network(clustersList[[i]])
}
#par(mfrow=c(2,2))
#for(i in seq(1:4)){
# string_db$plot_network(clustersList[[i]])
# }
```

\newpage
Expand Down Expand Up @@ -237,14 +237,10 @@ dim(seurat_mapped)
hits_seurat <- seurat_mapped$STRING_id[1:200]
```

\newpage

```{r, fig.width=10, fig.height=10, fig.fullwidth=TRUE, fig.fullheight=TRUE}
string_db$plot_network(hits_seurat)
#string_db$plot_network(hits_seurat)
```

\newpage

```{r}
# PAYLOAD MECHANISM
# filter by p-value and add a color column
Expand Down Expand Up @@ -272,22 +268,175 @@ payload_id_seurat <- string_db$post_payload( seurat_mapped_pval05$STRING_id,
colors = seurat_mapped_pval05$color )
```

\newpage

```{r, fig.width=10, fig.height=10, fig.fullwidth=TRUE, fig.fullheight=TRUE}
string_db$plot_network(hits_seurat)
#string_db$plot_network(hits_seurat)
```

```{r, , fig.width=10, fig.height=10, fig.fullwidth=TRUE, fig.fullheight=TRUE}
# display a STRING network png with the "halo"
string_db$plot_network(hits_seurat, payload_id = payload_id_seurat)
title(sub = "STRINGdb: protein-protein interaction of Astrocytes at 2mo",
cex.sub = 1, font.sub = 3, col.sub = "darkgreen"
)

clustersList_seurat <- string_db$get_clusters(seurat_mapped$STRING_id[1:600])
# clustersList_seurat <- string_db$get_clusters(seurat_mapped$STRING_id[1:600])
# plot first 4 clusters
par(mfrow=c(2,2))
for(i in seq(1:4)){
string_db$plot_network(clustersList_seurat[[i]])
}
#par(mfrow=c(2,2))
#for(i in seq(1:4)){
# string_db$plot_network(clustersList_seurat[[i]])
#}
```

\newpage

```{r, , fig.width=10, fig.height=10, fig.fullwidth=TRUE, fig.fullheight=TRUE}
# specify your cell type
my_celltype <- "Ast"
# specify time point of interest
my_timept_2 <- "4mo"

# Load Seurat data
# Change the filepath in quotes "" below to match where you
# have your celltype data saved
# The expected filepath is "~/AlzheimersDS/student-notebooks/FinalMergedData-my_celltype.rds"
# (where `my-celltype` is your celltype code)
my_celltype_data <- readRDS("/data/Alzheimers_DS/FinalMergedData-Ast.rds")

# Subset my celltype-specific Seurat object by time
my_seurat_2 <- subset(x = my_celltype_data, subset = Age == my_timept_2)

# set active identities of my subset seurat to "Mt" attribute (denoting variant)
# for DE testing
Idents(my_seurat_2) <- "Mt"
summary(as.factor(my_seurat_2$Mt))

# Run D.E. test to identify genes differentially expressed between
# between diseased and normal cells, for my celltype and at this timepoint
marker_genes.df_2 <- FindMarkers(my_seurat_2,
ident.1 = "V337M", ident.2 = "V337V")

# order results by p-value
marker_genes.df_2 <- marker_genes.df_2 %>% arrange(p_val)
head(marker_genes.df_2)

# format a new dataframe using the contents of
# marker_genes.df, to pass to STRINGdb
stringdb.df_2 <- marker_genes.df_2 %>%
tibble::rownames_to_column("geneIDs") %>%
dplyr::select(geneIDs, avg_log2FC, p_val)
colnames(stringdb.df_2) <- c("geneIDs","logFC","pvalue")

# mapping
seurat_mapped_2 <- string_db$map(stringdb.df_2, "geneIDs", removeUnmappedRows = TRUE )
dim(seurat_mapped_2)

#View(seurat_mapped)
hits_seurat_2 <- seurat_mapped_2$STRING_id[1:200]

# PAYLOAD MECHANISM
# filter by p-value and add a color column
# (i.e. green down-regulated gened and red for up-regulated genes)
head(subset(seurat_mapped_2, pvalue < 0.05), 20)

dim(seurat_mapped_2[seurat_mapped_2$pvalue < 0.05,])

seurat_mapped_pval05_2 <- string_db$add_diff_exp_color(subset(seurat_mapped_2, pvalue<0.05),
logFcColStr="logFC" )
head(seurat_mapped_pval05_2)

table(seurat_mapped_pval05_2$color)

dim(seurat_mapped_pval05_2)

seurat_mapped_pval05_2[seurat_mapped_pval05_2$color == '#D7FFD7FF', ]

seurat_mapped_pval05_2[seurat_mapped_pval05_2$color == '#FFEDEDFF', ]

seurat_mapped_pval05_2[seurat_mapped_pval05_2$color == '#FFF6F6FF', ]

# post payload information to the STRING server
payload_id_seurat_2 <- string_db$post_payload( seurat_mapped_pval05_2$STRING_id,
colors = seurat_mapped_pval05_2$color )

string_db$plot_network(hits_seurat_2, payload_id = payload_id_seurat_2)
title(sub = "STRINGdb: protein-protein interaction of Astrocytes at 4mo",
cex.sub = 1, font.sub = 3, col.sub = "darkgreen"
)
```

\newpage

```{r, , fig.width=10, fig.height=8, fig.fullwidth=TRUE, fig.fullheight=TRUE}
# specify your cell type
my_celltype <- "Ast"
# specify time point of interest
my_timept_3 <- "6mo"

# Load Seurat data
# Change the filepath in quotes "" below to match where you
# have your celltype data saved
# The expected filepath is "~/AlzheimersDS/student-notebooks/FinalMergedData-my_celltype.rds"
# (where `my-celltype` is your celltype code)
my_celltype_data <- readRDS("/data/Alzheimers_DS/FinalMergedData-Ast.rds")

# Subset my celltype-specific Seurat object by time
my_seurat_3 <- subset(x = my_celltype_data, subset = Age == my_timept_3)

# set active identities of my subset seurat to "Mt" attribute (denoting variant)
# for DE testing
Idents(my_seurat_3) <- "Mt"
summary(as.factor(my_seurat_3$Mt))

# Run D.E. test to identify genes differentially expressed between
# between diseased and normal cells, for my celltype and at this timepoint
marker_genes.df_3 <- FindMarkers(my_seurat_3,
ident.1 = "V337M", ident.2 = "V337V")

# order results by p-value
marker_genes.df_3 <- marker_genes.df_3 %>% arrange(p_val)
head(marker_genes.df_3)

# format a new dataframe using the contents of
# marker_genes.df, to pass to STRINGdb
stringdb.df_3 <- marker_genes.df_3 %>%
tibble::rownames_to_column("geneIDs") %>%
dplyr::select(geneIDs, avg_log2FC, p_val)
colnames(stringdb.df_3) <- c("geneIDs","logFC","pvalue")

# mapping
seurat_mapped_3 <- string_db$map(stringdb.df_3, "geneIDs", removeUnmappedRows = TRUE )
dim(seurat_mapped_3)

#View(seurat_mapped)
hits_seurat_3 <- seurat_mapped_3$STRING_id[1:200]

# PAYLOAD MECHANISM
# filter by p-value and add a color column
# (i.e. green down-regulated gened and red for up-regulated genes)
head(subset(seurat_mapped_3, pvalue < 0.05), 20)

dim(seurat_mapped_3[seurat_mapped_3$pvalue < 0.05,])

seurat_mapped_pval05_3 <- string_db$add_diff_exp_color(subset(seurat_mapped_3, pvalue<0.05),
logFcColStr="logFC" )
head(seurat_mapped_pval05_3)

table(seurat_mapped_pval05_3$color)

dim(seurat_mapped_pval05_3)

seurat_mapped_pval05_3[seurat_mapped_pval05_3$color == '#D7FFD7FF', ]

seurat_mapped_pval05_3[seurat_mapped_pval05_3$color == '#FFEDEDFF', ]

seurat_mapped_pval05_3[seurat_mapped_pval05_3$color == '#FFF6F6FF', ]

# post payload information to the STRING server
payload_id_seurat_3 <- string_db$post_payload( seurat_mapped_pval05_3$STRING_id,
colors = seurat_mapped_pval05_3$color )

string_db$plot_network(hits_seurat_3, payload_id = payload_id_seurat_3)
title(sub = "STRINGdb: protein-protein interaction of Astrocytes at 6mo",
cex.sub = 1, font.sub = 3, col.sub = "darkgreen"
)
```
308 changes: 280 additions & 28 deletions dev-notebooks/STRINGdb-Analysis.html

Large diffs are not rendered by default.

Binary file modified dev-notebooks/STRINGdb-Analysis.pdf
Binary file not shown.