From 6616976d8ca020719e6dc51f2a036469e65820f1 Mon Sep 17 00:00:00 2001 From: Rachael White Date: Thu, 3 Nov 2022 00:29:50 -0400 Subject: [PATCH] Dotpots update --- app.R | 532 +++++------------- ..._data_allgenes_alltimepoints_withpvals.rds | 3 + 2 files changed, 147 insertions(+), 388 deletions(-) create mode 100644 avgexp_pctexp_data_allgenes_alltimepoints_withpvals.rds diff --git a/app.R b/app.R index 4afe8bb..742bee4 100755 --- a/app.R +++ b/app.R @@ -21,7 +21,8 @@ suppressMessages(suppressWarnings(library(gprofiler2, warn.conflicts = FALSE, qu #suppressMessages(suppressWarnings(library(enrichplot, warn.conflicts = FALSE, quietly=TRUE))) #suppressMessages(suppressWarnings(library(DOSE, warn.conflicts = FALSE, quietly=TRUE))) -# Utility functions #################################################### +# Utility and plotting functions ####################################################################### + # Convert cell type codes to their real names for plotting setCellTypeNames <- function(df) { @@ -64,7 +65,7 @@ summaryPlot <- function(my_celltype,marker_genes.df,ngenes) { return(p) } - +##################################################################################### # Volcano Plots: # put the biological significance (fold changes) # and statistical significance (p-value) in one plot @@ -89,7 +90,7 @@ volcanoPlot <- function(results.df, fold_cutoff=0.5, ifelse(logFC < -(fold_cutoff), "Significant, downregulated in Tau-mutant cells", ifelse(logFC > fold_cutoff, - "Significant, upregulated in Tau-mutant cells", "Significant"))) + "Significant, upregulated in Tau-mutant cells", "Significant"))) stats.df$diffexpressed <- as.factor(stats.df$diffexpressed) # Base plot @@ -97,7 +98,8 @@ volcanoPlot <- function(results.df, fold_cutoff=0.5, col=percent_expressed, text = geneIDs)) + geom_point() + ggtitle(title) + - labs(y="-log10(pvalue)",color = "Percent of Tau-mutant cells expressing",shape="Differential expression status") + + labs(y="-log10(pvalue)",color = "Percent of Tau-mutant cells expressing", + shape="Differential expression status") + geom_vline(xintercept=c(-fold_cutoff, fold_cutoff), col="red") + # scale_color_manual(values=c("black", "blue", "red"), # name = "Differential expression\nstatus") + @@ -105,221 +107,67 @@ volcanoPlot <- function(results.df, fold_cutoff=0.5, return(plot) } - -# Modified seurat dot plot -DotPlot_mod <- function( - object, - assay = NULL, - features, - cols = c("lightgrey", "blue"), - col.min = -2.5, - col.max = 2.5, - dot.min = 0, - dot.scale = 6, - idents = NULL, - group.by = NULL, - split.by = NULL, - cluster.idents = FALSE, - scale = F, - scale.by = 'radius', - scale.min = NA, - scale.max = NA -) { - assay <- assay %||% DefaultAssay(object = object) - DefaultAssay(object = object) <- assay - split.colors <- !is.null(x = split.by) - scale.func <- switch( - EXPR = scale.by, - 'size' = scale_size, - 'radius' = scale_radius, - stop("'scale.by' must be either 'size' or 'radius'") - ) - feature.groups <- NULL - if (is.list(features) | any(!is.na(names(features)))) { - feature.groups <- unlist(x = sapply( - X = 1:length(features), - FUN = function(x) { - return(rep(x = names(x = features)[x], each = length(features[[x]]))) - } - )) - if (any(is.na(x = feature.groups))) { - warning( - "Some feature groups are unnamed.", - call. = FALSE, - immediate. = TRUE - ) - } - features <- unlist(x = features) - names(x = feature.groups) <- features - } - cells <- unlist(x = CellsByIdentities(object = object, idents = idents)) +##################################################################################### +DotPlotggSig <- function(input_gene, genes.df, cellcats.to.plot, log.scale=F, + dot.scale.factor = 15, + scale.min = 10, + scale.max = 100, + title = "Average expression by variant, over time") { - data.features <- FetchData(object = object, vars = features, cells = cells) - data.features$id <- if (is.null(x = group.by)) { - Idents(object = object)[cells, drop = TRUE] - } else { - object[[group.by, drop = TRUE]][cells, drop = TRUE] - } - if (!is.factor(x = data.features$id)) { - data.features$id <- factor(x = data.features$id) - } - id.levels <- levels(x = data.features$id) - data.features$id <- as.vector(x = data.features$id) - if (!is.null(x = split.by)) { - splits <- object[[split.by, drop = TRUE]][cells, drop = TRUE] - if (split.colors) { - if (length(x = unique(x = splits)) > length(x = cols)) { - stop("Not enough colors for the number of groups") - } - cols <- cols[1:length(x = unique(x = splits))] - names(x = cols) <- unique(x = splits) - } - data.features$id <- paste(data.features$id, splits, sep = '_') - unique.splits <- unique(x = splits) - id.levels <- paste0(rep(x = id.levels, each = length(x = unique.splits)), "_", rep(x = unique(x = splits), times = length(x = id.levels))) - } - data.plot <- lapply( - X = unique(x = data.features$id), - FUN = function(ident) { - data.use <- data.features[data.features$id == ident, 1:(ncol(x = data.features) - 1), drop = FALSE] - avg.exp <- apply( - X = data.use, - MARGIN = 2, - FUN = function(x) { - return(mean(x = expm1(x = x))) - } - ) - pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, threshold = 0) - return(list(avg.exp = avg.exp, pct.exp = pct.exp)) - } - ) - names(x = data.plot) <- unique(x = data.features$id) - if (cluster.idents) { - mat <- do.call( - what = rbind, - args = lapply(X = data.plot, FUN = unlist) - ) - mat <- scale(x = mat) - id.levels <- id.levels[hclust(d = dist(x = mat))$order] - } - data.plot <- lapply( - X = names(x = data.plot), - FUN = function(x) { - data.use <- as.data.frame(x = data.plot[[x]]) - data.use$features.plot <- rownames(x = data.use) - data.use$id <- x - return(data.use) - } - ) - data.plot <- do.call(what = 'rbind', args = data.plot) - if (!is.null(x = id.levels)) { - data.plot$id <- factor(x = data.plot$id, levels = id.levels) - } - ngroup <- length(x = levels(x = data.plot$id)) - if (ngroup == 1) { - scale <- FALSE - warning( - "Only one identity present, the expression values will be not scaled", - call. = FALSE, - immediate. = TRUE - ) - } else if (ngroup < 5 & scale) { - warning( - "Scaling data with a low number of groups may produce misleading results", - call. = FALSE, - immediate. = TRUE - ) - } - avg.exp.scaled <- sapply( - X = unique(x = data.plot$features.plot), - FUN = function(x) { - data.use <- data.plot[data.plot$features.plot == x, 'avg.exp'] - if (scale) { - data.use <- scale(x = data.use) - data.use <- MinMax(data = data.use, min = col.min, max = col.max) - } else { - data.use <- log1p(x = data.use) - } - return(data.use) - } - ) - avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled)) - if (split.colors) { - avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled, breaks = 20)) - } - data.plot$avg.exp.scaled <- avg.exp.scaled - data.plot$features.plot <- factor( - x = data.plot$features.plot, - levels = features - ) - data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA - data.plot$pct.exp <- data.plot$pct.exp * 100 - if (split.colors) { - splits.use <- vapply( - X = as.character(x = data.plot$id), - FUN = gsub, - FUN.VALUE = character(length = 1L), - pattern = paste0( - '^((', - paste(sort(x = levels(x = object), decreasing = TRUE), collapse = '|'), - ')_)' - ), - replacement = '', - USE.NAMES = FALSE - ) - data.plot$colors <- mapply( - FUN = function(color, value) { - return(colorRampPalette(colors = c('grey', color))(20)[value]) - }, - color = cols[splits.use], - value = avg.exp.scaled - ) - } - color.by <- ifelse(test = split.colors, yes = 'colors', no = 'avg.exp.scaled') - if (!is.na(x = scale.min)) { - data.plot[data.plot$pct.exp < scale.min, 'pct.exp'] <- scale.min - } - if (!is.na(x = scale.max)) { - data.plot[data.plot$pct.exp > scale.max, 'pct.exp'] <- scale.max - } - if (!is.null(x = feature.groups)) { - data.plot$feature.groups <- factor( - x = feature.groups[data.plot$features.plot], - levels = unique(x = feature.groups) - ) - } - plot <- ggplot(data = data.plot, mapping = aes_string(x = 'features.plot', y = 'id')) + - geom_point(mapping = aes_string(size = 'pct.exp', color = color.by)) + - scale.func(range = c(0, dot.scale), limits = c(scale.min, scale.max)) + - guides(size = guide_legend(title = 'Percent of\nCells Expressing')) - if (!is.null(x = feature.groups)) { - plot <- plot + facet_grid( - facets = ~feature.groups, - scales = "free_x", - space = "free_x", - switch = "y" - ) + theme( - panel.spacing = unit(x = 1, units = "lines"), - strip.background = element_blank() + # prep data + res.gene <- genes.df %>% dplyr::filter(features.plot==all_of(input_gene)) %>% + dplyr::filter(CellType %in% cellcats.to.plot) + + res.gene$Variant <- recode_factor(res.gene$Variant,"V337M" = "FTD-Tau mutant cells", + "V337V" = "Isogenic control cells") + # significance anaotations + res.gene <- res.gene %>% mutate(plot_annotation = ifelse(is.na(p_val),"","*")) + + #res.gene <- setCellTypeNames(res.gene) + + # set desired coloring variable + color.by <- ifelse(test = log.scale, yes = 'avg.exp.scaled', + no = 'avg.exp') + color.by.legend.text <- ifelse(test = log.scale, + yes = 'Average normalized expression\n(log1p-transformed)', + no = 'Average normalized expression') + + # plot + dp <- ggplot(data = res.gene, mapping = aes(x = Timepoint, y = Variant)) + + geom_point(mapping = aes_string(size = 'pct.exp', color = color.by),na.rm=TRUE) + + + geom_text( + aes( label = plot_annotation), nudge_y = 1.5, + size = 8, color = "black") + + + scale_radius(range = c(0, dot.scale.factor), limits = c(scale.min, scale.max)) + + scale_color_gradient(low = "lightgray", high = "blue") + + guides(size = guide_legend(title = 'Percent Cells\nExpressing'), + color = guide_colorbar(title = color.by.legend.text)) + + labs(x = 'Developmental Timepoint', y = 'Condition', title=title, + caption = '* = significant difference in y-axis conditions at 0.01 threshold, + as found by differential expression testing with a generalized linear model for single-cell RNAseq data.') + + + # faceting + facet_wrap(~CellType) + + + # aesthetics + theme_linedraw()+ + theme(plot.caption.position = "plot", + plot.caption = element_text(size = 14,hjust = 0), + strip.text = element_text(size = 14), + axis.text = element_text(size = 16), + legend.text = element_text(size = 16), + legend.title = element_text(size = 16), + plot.title = element_text(size=16), + axis.title=element_text(size=16) ) - } - if (split.colors) { - plot <- plot + scale_color_identity() - # plot <- plot + scale_color_identity('Average Expression (Log Scale)',guide="legend",labels = data.plot$avg.exp.scaled, - # breaks = levels(as.factor(data.plot$colors))) - } else if (length(x = cols) == 1) { - plot <- plot + scale_color_distiller(palette = cols) - } else { - plot <- plot + scale_color_gradient(low = cols[1], high = cols[2]) - } - if (!split.colors) { - plot <- plot + guides(color = guide_colorbar(title = 'Log1p Averaged Expression')) - } - return(plot) + + + return(dp) } - - +##################################################################################### # Logos and footer prep # logos <- fixedRow(div( @@ -327,11 +175,9 @@ DotPlot_mod <- function( # img(src="NSCI_Full_Logo.png",align="left",width="150px")) # ) -###################################################################### +##################################################################################### # Data read-in - - # full dataset for all other app features seurat.all <- readRDS("FinalMergedData-downsampled.rds") # Switch active identities to cell type labels for plotting @@ -341,10 +187,13 @@ Idents(seurat.all) <- "CellType" celltype_data_with_codes <- readRDS("FetchDataOutput-AllCells.rds") %>% relocate(CellType) celltype_props <- readRDS("overall_celltype_props_data.rds") -# Average expression data for Gene Explorer Tab 2 line plots +# Average expression & percent expressed data for Gene Explorer line plots res_combined_ann <- readRDS("average_expression_allcelltypes_timepoints_mtvswt_withpvals.rds") + +# Average expression & percent expressed data for Gene Explorer dot plots +avg.pct.df <- readRDS("avgexp_pctexp_data_allgenes_alltimepoints_withpvals.rds") -# Marker gene statistics for CellType Explorer Tab 3 +# Marker gene statistics for Differential Expression Statistics Tab data <- read.csv("DEgenes_MtvsWt_alltimepts_allcelltypes.csv") @@ -501,7 +350,7 @@ GitHub_links <- "

Our source code is freely available at our
.container-fluid'); @@ -588,7 +437,7 @@ header.append('

* indicates significant difference in expression between V337M (FTD-Tau) and isogenic control cells + at an FDR-adjusted p-value threshold of 0.01,
as found by D.E. testing with the
MAST + generalized linear model for single-cell RNAseq data.

"))), + + hr() + + + ), + + tabPanel(title = "Expression Over Time - Trendline Plots", fixedRow(tags$p("Gene expression trajectory over time, colored by variant, and plotted separately for each cell type"), uiOutput("GeneTrajecConditional"), align="center"), fixedRow(plotOutput("CellTypesByVariant", height= "700px",width = "80%"), align="center"), hr(), fixedRow(align="center", + tags$p("About this plot:"), HTML("

* indicates significant difference in expression between V337M (FTD-Tau) and isogenic control cells at an FDR-adjusted p-value threshold of 0.01,
as found by D.E. testing with the MAST hurdle model.

"), + href='https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0844-5#:~:text=MAST%20accounts%20for%20the%20bimodality,or%20gene%20set%2Dbased%20statistics'>MAST hurdle model for scRNAseq data.

"), HTML("

Expression data shown is averaged from raw counts then log-normalized, separately at each timepoint and within each celltype.

"), hr() ) ), - tabPanel(title = "Expression Over Time - Expression Distributions at 2, 4 and 6 Months", + tabPanel(title = "Expression Over Time - Violin Plots", fixedRow(column(12,align="center", tags$p("Expression data shown is original read counts, log-normalized separately at each timepoint. This data normalization mode is consistent with the data shown in the previous tab.") ) @@ -697,28 +600,6 @@ header.append('
CellType: ", - # selected_ct, - # "
") - # ) - # ) - # ) - # - # }) - + # Tab 2: Gene Explorer ########################################################### # Plot UMAPS colored by input gene expression level with plot_density @@ -1772,90 +1594,24 @@ server <- function(input, output, session) { }) - output$GeneDotPlot2mo <- renderPlot({ - - time="2months" - tcode=substr(time,1,3) - sub <- subset(seurat.all,subset=Age==tcode) - sub$Mt <- recode_factor(sub$Mt,"V337M" = "FTD-Tau mutant\ncells", "V337V" = "Isogenic controls") - Idents(sub) <- "Mt" - DotPlot( - sub, - assay = "SCT", - features = input$gene, - cols = c("lightgrey", "blue"), - col.min = -2.5, - col.max = 2.5, - dot.min = 0, - dot.scale = 15, - # use log1p-normalized expression data - scale=F, - #set percentage scales - scale.min = 0, - scale.max = 100, - group.by = "Mt")+ - coord_fixed(ratio=2.5) + - labs(x= element_blank(),y=element_blank())+ - ggtitle(sprintf("%s expression,\n%s",input$gene,time))+ - theme(legend.position = "none") - }) - output$GeneDotPlot4mo <- renderPlot({ + output$GeneDotPlot <- renderPlot({ + + scale = T + if (input$dotplot_scale == "Unscaled") {scale=F} - time="4months" - tcode=substr(time,1,3) - sub <- subset(seurat.all,subset=Age==tcode) - sub$Mt <- recode_factor(sub$Mt,"V337M" = "FTD-Tau mutant\ncells", "V337V" = "Isogenic controls") - Idents(sub) <- "Mt" - DotPlot( - sub, - assay = "SCT", - features = input$gene, - cols = c("lightgrey", "blue"), - col.min = -2.5, - col.max = 2.5, - dot.min = 0, - dot.scale = 15, - # use log1p-normalized expression data - scale=F, - #set percentage scales - scale.min = 0, - scale.max = 100, - group.by = "Mt")+ - coord_fixed(ratio=2.5) + - labs(x= element_blank(),y=element_blank())+ - ggtitle(sprintf("%s expression,\n%s",input$gene,time))+ - theme(legend.position = "none") - }) - output$GeneDotPlot6mo <- renderPlot({ + p <- DotPlotggSig(input_gene=input$gene, cellcats.to.plot = c(input$dotplot_celltypes), + genes.df = avg.pct.df, + log.scale=scale, + dot.scale.factor = 15, + scale.min = 10, + scale.max = 100, + title = sprintf("Average %s expression by variant, over time", + input$gene)) + + p - time="6months" - tcode=substr(time,1,3) - sub <- subset(seurat.all,subset=Age==tcode) - sub$Mt <- recode_factor(sub$Mt,"V337M" = "FTD-Tau mutant\ncells", "V337V" = "Isogenic controls") - Idents(sub) <- "Mt" - DotPlot( - sub, - assay = "SCT", - features = input$gene, - cols = c("lightgrey", "blue"), - col.min = -2.5, - col.max = 2.5, - dot.min = 0, - dot.scale = 15, - # use log1p-normalized expression data - scale=F, - #set percentage scales - scale.min = 0, - scale.max = 100, - group.by = "Mt")+ - coord_fixed(ratio=2.5) + - labs(x= element_blank(),y=element_blank())+ - ggtitle(sprintf("%s expression,\n%s",input$gene,time)) + - guides(size=guide_legend(title=sprintf("Percent of Cells\nin which Gene is Detected")), - color=guide_colorbar(title="Log1p(Averaged Expression)")) - }) - + # Tab 3: CellType Explorer ########################################################### diff --git a/avgexp_pctexp_data_allgenes_alltimepoints_withpvals.rds b/avgexp_pctexp_data_allgenes_alltimepoints_withpvals.rds new file mode 100644 index 0000000..6e26eef --- /dev/null +++ b/avgexp_pctexp_data_allgenes_alltimepoints_withpvals.rds @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b658185cf3e582f082b9513daf957080ea494392c9e9e77d09672df9db5540 +size 72054923