Plot scientific figures in R

Download rmd files here

Pre-requests

  1. Download Rstudio here: https://posit.co/download/rstudio-desktop/
  2. Download R executable here (For windows): https://cran.rstudio.com/bin/windows/base/

Why use R?

  1. Comprehensive Statistical Analysis Toolkit
  2. High-quality Graphics
  3. Extensible through Packages
  4. Integration with Other Languages and Tools
  5. Strong Community Support

Contents

Install necessary packages for data analysis and visualization

# # Markdown support
# if (!require(rmarkdown)) {install.packages("rmarkdown")}
# if (!require(remotes)) {install.packages("remotes") }
# if (!require(ggpubr)) {install.packages("ggpubr")}
# # Data
# if (!require(palmerpenugins)) {install.packages("palmerpenguins")}
# # Data clean, visualization support
# if (!require(tidyverse)) {install.packages("tidyverse")}
# if (!require(patchwork)) {install.packages("patchwork")}
# if (!require(ggsci)) {install.packages("ggsci")}
# if (!require(ggridges)) {install.packages("ggridges")}
# if (!require(RColorBrewer)) {install.packages("RColorBrewer")}
# # Python support
# if (!require(png)) {install.packages("png")}
# if (!require(reticulate)) {install.packages("reticulate")}
library(ggpubr)
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(RColorBrewer)
library(palmerpenguins)
library(remotes)
# remotes::install_github("psyteachr/introdataviz")
library(introdataviz)
library(ggridges)

The basics of ggplot2

  • A dataset

  • A set of geoms: A geom refers to the geometric object used to represent your data. For example, you can use points to create a scatterplot, bars to create a bar chart, lines to create a line diagram, etc.

  • A set of aesthetic attributes: An aesthetic is a visual property of an object in your plot. You can think of an aesthetic as a connection, or mapping, between a visual feature in your plot and a variable in your data. For example, in a scatterplot, aesthetics include things like the size, shape, color, or location (x-axis, y-axis) of your data points.

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g))

We will use sample dataset: penguin, part of the tidyverse package

penguin species
penguin species
dataset parameters
dataset parameters
df <- penguins %>% dplyr::filter(!is.na(sex))

Aesthetic

Aesthetic is an important concept in ggplot, which controls the visualization property of the plot. Here is some examples: - Common aesthetic parameters

  1. x
  2. y
  3. color
  4. shape
  5. alpha
  6. fill
  7. size

Example 1: color aesthetic

ggplot(data = df) +
  geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g, color = sex))

Mappings specified in ggplot() will be inherited in the following layers

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g)) +
  geom_point()

Example 2: shape aesthetic

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, shape = island)) +
  geom_point()

Example 3: fill aesthetic

ggplot(data = df, aes(x = island, y = body_mass_g, fill = sex)) +
  geom_boxplot()

Example 4: size aesthetic

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, size = bill_depth_mm, color = species)) +
  geom_point()

Example 5: alpha aesthetic Adjust the transparency of the figure elements

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, alpha = bill_depth_mm, color = species)) +
  geom_point()

Example 6: combined aesthetic

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, color = sex, shape = island, alpha = year, size = bill_depth_mm)) +
  geom_point()

Example 7: special aesthetic The aesthetic specified outsize the aes() function will override the mappings in the aes()

ggplot(data = df) +
  geom_point(aes(x = flipper_length_mm, y = body_mass_g, size = bill_depth_mm, color = species))

ggplot(data = df ) +
  geom_point(aes(x = flipper_length_mm, y = body_mass_g, size = bill_depth_mm, color = species), color = "purple")

Frequently used ggplot2 functions

  • ggplot()
  • geom_line()
  • geom_smooth()
  • geom_point()
  • geom_bar()
  • geom_histogram()
  • geom_boxplot()
  • geom_violin()
  • geom_segment()
  • geom_split_violin()
  • geom_path()

geom_line

ggplot(data = df ) +
  geom_line(aes(x = flipper_length_mm, y = body_mass_g))

geom_smooth

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g)) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

  #geom_point()

geom_bar

ggplot(data = df, aes(x = year)) +
  geom_bar(position = "identity")

#> geom_histogram()

geom_boxplot

ggplot(data = df, aes(x = sex, y = body_mass_g)) +
  geom_boxplot() 

#  geom_point(position = position_jitter(width = 0.15))
# specify the fill aesthetic, fill = island
ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
  geom_boxplot() +
  geom_point(aes(color = island), position = position_dodge(width = 0.8)) +
  scale_color_manual(values = c("blue", "red", "green"))

# specify the fill aesthetic, fill = island

geom_violin

library(introdataviz)
# ggplot(data = df, aes(x = sex, y = body_mass_g)) +
#   geom_violin() 
### Plot many groups
# ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
#   geom_violin()
### plot split violin 
ggplot(data = df, aes(x = island, y = body_mass_g, fill = sex)) +
  geom_split_violin()

geom_*line

ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
  geom_boxplot() +
  geom_point(aes(color = island), position = position_dodge(width = 0.8)) +
  scale_color_manual(values = c("blue", "red", "green")) +
  geom_hline(yintercept = 4000, linetype = "dashed", color = "firebrick", linewidth = 1.0) +
  geom_vline(xintercept = 1.5, color = "steelblue") +
  geom_linerange(aes(x = 1.2, ymin = 5500, ymax = 6000), color = "red", linewidth = 2)

# specify the fill aesthetic, fill = island

geom_tile

Create a correlation matrix

corr_matrix <- data.frame(
  value = runif(36, min = -1, max = 1),
  colname = rep(c("var1", "var2", "var3", "var4", "var5", "var6"), 6),
  rowname = rep(c("var1", "var2", "var3", "var4", "var5", "var6"), each = 6)
)
corr_matrix <- corr_matrix %>% mutate(label = as.character(round(value, 2)))
ggplot(corr_matrix, aes(x = rowname, y = colname, fill = value)) +
  geom_tile() +
  geom_text(aes(label = label)) +
  scale_fill_distiller(palette = "PuOr")

geom_density

Create a contour plot

# ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g)) +
#   geom_density_2d()

### filled density plot
ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g)) +
  geom_density_2d_filled(show.legend = FALSE) +
  coord_cartesian(expand = FALSE)

geom_density_ridges

Create ridge plot

library(ggridges)
# ggplot(data = df, aes(x = flipper_length_mm, y = factor(year))) +
#   geom_density_ridges(fill = "gray90") +
#   theme_minimal()

### ridges with colors
ggplot(data = df, aes(x = flipper_length_mm, y = factor(year), fill = year)) +
  geom_density_ridges_gradient(scale = 0.9, gradient_lwd = 0.5, color = "black") +
  scale_fill_viridis_c(option = "plasma", name = "") +
  theme_minimal()
## Picking joint bandwidth of 4.86

Towards more aesthetic scientific graph

Working with statistics

Comparison between groups

compare_list <- combn(c("Biscoe", "Dream", "Torgersen"), 2, simplify = FALSE)
ggplot(data = df, aes(x = island, y = body_mass_g)) +
  geom_boxplot() +
  geom_jitter(position = position_jitter(width = 0.15)) +
  stat_compare_means(comparisons = compare_list) +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 1),
    axis.line.y.left = element_line(linewidth = 1)
  )

Comparison within groups

dftmp <- df %>% mutate(si_interaction = paste(island, sex, sep = '_'))
compare_list <- combn(c("Biscoe_female", "Dream_male", "Torgersen_female"), 2, simplify = FALSE)
ggplot(data = dftmp, aes(x = si_interaction, y = body_mass_g, fill = sex)) +
  geom_boxplot() +
  stat_compare_means(comparisons = compare_list) +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 1),
    axis.line.y.left = element_line(linewidth = 1),
    axis.text.x = element_text(angle = 45, vjust = 0.5)
  )

Working with text

  • Change the labels, tick labels in ggplot
dftmp <- df %>% mutate(island_label = paste0("Super_long_prefix_", island))
ggplot(data = dftmp, aes(x = island_label, y = body_mass_g, fill = sex)) +
  geom_boxplot() +
  labs(title = "The graph title", x = "Bill Length", y = "Body Mass") +
  annotate("text", x = 1.8, y = 6000, label = "This is annotation!") +
  theme(
    axis.title.x = element_text(face = "bold", color = "red", size = 18, vjust = 0.5),
    axis.text.x = element_text(face = "italic", color = "dodgerblue", size = 12, angle = 45, hjust = 0.9, vjust = 1),
    plot.title = element_text(face = "bold", margin = margin(10, 0, 10, 0), size = 20, hjust = 0.5)
  )

Working with legends

R will create legend automatically for each mapping

ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, size = bill_depth_mm, shape = island, color = species)) +
  geom_point() +
  theme(
    legend.background = element_blank(),
    legend.key = element_rect(fill = "white"),
    legend.title = element_text(color = "red")
    # legend.position = "top"
  ) +
  ### Change the legend title text in the labs() function 
  labs(color = c("Species_new", "Bill depth new", "island new")) +
  scale_color_discrete("Changed in scale_color_discrete",
                       labels = c("Adelie in scale_color_discrete", "Chinstrap in scale_color_discrete",
                                  "Gentoo in scale_color_discrete")) +
  guides(color = guide_legend(override.aes = list(size = 6)))

You can also manually add legend by creating a single mapping! See https://www.cedricscherer.com/2019/08/05/a-ggplot2-tutorial-for-beautiful-plotting-in-r/

Working with theme

Change the background and themes

ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
  geom_boxplot() +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 0.8),
    axis.line.y.left = element_line(linewidth = 0.8),
    panel.grid.major = element_line(color = "red1", linetype = "dashed"),
    axis.text = element_text(size = 12)
  )

Working with colors

R color palette

library(ggsci)
ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
  geom_boxplot() +
  scale_fill_manual(values = c("yellow", "lightblue", "purple")) +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 0.8),
    axis.line.y.left = element_line(linewidth = 0.8)
  )

library(ggsci)
ggplot(data = df, aes(x = sex, y = body_mass_g, fill = island)) +
  geom_boxplot() +
  # scale_fill_jco()+
  #scale_fill_npg() +
  # scale_fill_aaas() +
  # scale_fill_lancet() +
  scale_fill_jama() +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 0.8),
    axis.line.y.left = element_line(linewidth = 0.8)
  )

### Most of times, these would be enough, if you want to specify color manually, you can use brewer.pal()
### paletee <- brewer.pal(n = 11, name = "Spectral")
library(ggsci)
color_palette <- brewer.pal(n = 10, name = "Spectral")
p1 <- ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, color = bill_depth_mm)) +
  geom_point() +
  scale_color_gradientn(colors = color_palette) +
  theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 0.8),
    axis.line.y.left = element_line(linewidth = 0.8),
    legend.key.size = unit(0.5, "lines")
  ) 
p2 <- ggplot(corr_matrix, aes(x = rowname, y = colname, fill = value)) +
  geom_tile() +
  geom_text(aes(label = label)) +
  scale_fill_gradientn(colors = color_palette) +
  theme(
    panel.background = element_blank(),
    legend.key.size = unit(0.5, "lines")
  )
Name column column2 column3
YlOrRd PuBu Set3 Spectral
YlOrBr OrRd Set2 RdYlGn
YlGnBu Oranges Set1 RdYlBu
Reds Greys Pastel2 RdGy
RdPu Greens Pastel1 RdBu
Purples GnBu Paired PRGn
PuRd BuPu Dark2 PiYG
PuBuGn BuGn Accent BrBG
R Color palette
R Color palette

Working with multiple figures

Create multi-panel figures

p3 <- ggplot(data = df, aes(x = flipper_length_mm, y = body_mass_g, color = sex)) +
  geom_point() +
  facet_grid(island~year) +
  scale_x_continuous(breaks = seq(170, 230, length.out = 3)) +
 theme(
    panel.background = element_blank(),
    axis.line.x.bottom = element_line(linewidth = 0.8),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)
  )

Create multi-grid figures

library(patchwork)

layout <- "
11333
22333
"
p1 + p2 + p3 +plot_layout(design = layout) +
  plot_annotation(tag_levels = c("A"),
                  tag_prefix = "(",
                  tag_suffix = ")") +
  theme(
    legend.position = "top"
  )

ggsave("test.jpg", width = 32, height = 18, units = "cm")
library(patchwork)

layout <- c(
  area(t = 0, l = 0, b = 2, r = 3),
  area(t = 3, l = 0, b = 5, r = 3),
  area(t = 1, l = 4, b = 5, r = 6)
)
p1 + p2 + p3 +plot_layout(design = layout) +
  plot_annotation(tag_levels = c("A"),
                  tag_prefix = "(",
                  tag_suffix = ")") +
  theme(
    legend.position = "top"
  )

ggsave("test.eps", width = 32, height = 18, units = "cm")

Toolbox and references

Working with python

If you are more comfortable with python, you can install plotnine package to use ggplot, it is integrated with python pandas

(bash) pip install plotnine

# reticulate::install_python(version = '3.10')
from plotnine.data import mpg
## \AppData\Local\R\win-library\4.2\reticulate\python\rpytools\loader.py:117: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
##   return _find_and_load(name, import_)
from plotnine import ggplot, geom_point, aes
mpg.head()
##   manufacturer model  displ  year  cyl       trans drv  cty  hwy fl    class
## 0         audi    a4    1.8  1999    4    auto(l5)   f   18   29  p  compact
## 1         audi    a4    1.8  1999    4  manual(m5)   f   21   29  p  compact
## 2         audi    a4    2.0  2008    4  manual(m6)   f   20   31  p  compact
## 3         audi    a4    2.0  2008    4    auto(av)   f   21   30  p  compact
## 4         audi    a4    2.8  1999    6    auto(l5)   f   16   26  p  compact
(
  ggplot(aes(x = 'displ', y = 'cty', color = 'class'), mpg) +
    geom_point()
)
## <Figure Size: (640 x 480)>
## 
## <string>:1: FutureWarning: Using repr(plot) to draw and show the plot figure is deprecated and will be removed in a future version. Use plot.show().