This guide explores a few ways to plot fold changes and p-values in a volcano plot using the pasilla
results in the hciR package.
library(hciR)
data(pasilla)
res <- pasilla$results
Plot transparent blue dots and label the top 4 genes.
plot(res$log2FoldChange, -log10(res$padj), pch=19, col=rgb(0,0,1,.3), xlim=c(-6,6),
xlab="Log2 fold change", ylab= "-Log10 p-value", bty="l")
x <- filter(res, padj < 1e-100)
text(x$log2FoldChange, -log10(x$padj), x$gene_name, pos=4)
Skip the rows with missing p-values to avoid a warning.
library(ggplot2)
x <- filter( res, !is.na(padj))
p <- ggplot(data=x, aes(x=log2FoldChange, y= -log10(padj), text=gene_name )) +
geom_point(alpha=0.3, size=1.5, color="blue") +
xlab("Log2 fold change") + ylab("-Log10 p-value") +xlim(-6,6)
y <- filter(x, padj < 1e-100)
p + geom_text( data=y, aes(x=log2FoldChange, y= -log10(padj), label=gene_name),
hjust="left", nudge_x=.1)
Use Plotly to make interactive plots using the ggplotly
function to convert the ggplot2 object above. This plot has 8K points and the response to zooming and mouseover may be slow (and most volcano plots will have over 20K points).
library(plotly)
ggplotly(p)
You can use the plot_ly
function for more control over the plot. This code will add Ensembl IDs to missing gene names and then set the gene name to NA to remove the hover box if that gene is above a p-value cutoff of 1e-10 or absolute fold change below 1. Finally, set hoverinfo to text to remove the p-value and fold change values from the hover box.
x <- filter( res, !is.na(padj))
x$gene_name <- ifelse(is.na(x$gene_name), x$id, x$gene_name )
x$gene_name[x$padj > 1e-10 & abs(x$log2FoldChange) < 1] <- NA
plot_ly(data = x, x = ~ log2FoldChange , y = ~ -log10(padj),
type="scatter", mode="markers", hoverinfo="text", text = ~ gene_name,
marker = list(size = 10, color = 'rgba(0, 0, 255, .3)')) %>%
layout( yaxis = list(title = "-Log10 p-value", zeroline = FALSE),
xaxis = list(title = "Log2 fold change", zeroline = FALSE, range=c(-6,6)))
If the response is still slow, another hack is to remove 80% or more of the points by randomly sampling rows from genes below the cutoff (at the bottom center of the plot where most points are overlapping)
y <- bind_rows(
filter(x, !is.na(gene_name)),
filter(x, is.na(gene_name)) %>% sample_n(1000)
)
Highcharter is another package for interactive plots. Genes names with NAs will display as an empty box, so you can use the enableMouseTracking option on a vector to display the gene.
library(highcharter)
x <- filter( res, !is.na(padj))
x$gene_name <- ifelse(is.na(x$gene_name), x$id, x$gene_name )
x$sig <- ifelse(x$padj > 1e-10 & abs(x$log2FoldChange) < 1, "N", "Y")
hchart(x, "scatter", hcaes(log2FoldChange, -log10(padj), group = sig, value = gene_name),
color = "rgba(0,0,255, 0.3)", enableMouseTracking = c(FALSE, TRUE),
showInLegend = FALSE, marker = list(radius = 4)) %>%
hc_tooltip(pointFormat = "{point.value}", headerFormat = "") %>%
hc_xAxis(title = list(text = "Log2 fold change"), gridLineWidth = 1,
tickLength = 0, startOnTick = "true", endOnTick = "true", min = -6, max = 6) %>%
hc_yAxis(title = list(text = "-Log10 p-value")) %>%
hc_chart(zoomType = "xy", width=700) %>%
hc_exporting(enabled = TRUE, filename = "volcano")
The code above is also included in the plot_volcano
function in the hciR package.
plot_volcano(res, padj = 1e-10, log2FoldChange = 1)
Use crosstalk to link a d3 scatterplot and data table. This requires the development version of DT on Github to use the sharedData object. You can click and drag to create a box to highlight points and view matching rows, or select rows to view points. You can also drag the box around the plot to easily highlight other points and rows.
Note that Plotly should be compatible with crosstalk, but I can only select a single point and not a region to display in the table.
library(crosstalk)
library(d3scatter)
library(DT)
x <- filter( res, !is.na(padj)) %>% dplyr::select(c(1:2,6:7,11))
sd <- SharedData$new(x)
bscols(
d3scatter(sd, x = ~log2FoldChange, y = ~ -log10(padj), color= "blue", width = "99%", height = 350,
x_label = "log2 Fold change", y_label = "-log10 p-value", x_lim= c(-6,6) ),
datatable(sd, rownames = FALSE, width = "99%", extensions = c('Scroller', 'Buttons'),
options = list( scrollY = 200, scroller = TRUE, dom = "Bfrtip",
buttons = c('copy', 'csv', 'pdf'))) %>%
formatRound(3, 1) %>% formatSignif( 4:5)
)