Learning objectives

  • Learn how to create a useful and attractive scatter plot using ggplot.
  • Create scatter plot where color and size of the points vary with variables and values.
  • Learn how to modify axis and plot properties.
  • Export ggplot image in desired resolution/dimension.

Content

The tutorial will guide from beginner level (level 1) to the Pro level in scatter plot. A lot of the functions used in the tutorial will be useful while plotting barplot, boxplot, line plot, etc. Below is the list of topics that are coverd in this page.

  • Simple ggplot
  • Axis limit and interval
  • ggplot themes
  • Point colors
  • Color of points by values
  • Point color/size by values
  • Draw best fit line
  • Pro in scatter plot
  • Export ggplot image
library(ggplot2) # if you haven't installed ggplot2, then enter install.packages('ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
# Check default dataset in R
data()

# we will use R default dataset "trees". It contains diameter, 
# height and volume for Black Cherry Trees
head(trees)
##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7
plot(trees[,'Height'], trees[,'Volume'], xlab = c('Height'), ylab = c('Volume'))

Level 1: Simple ggplot

ggplot(trees, aes(x = Height, y = Volume)) +
  geom_point() +
  xlab('This is height') + ylab('This is volume')

Level 2: Axis limit and interval

  • use xlim() and ylim() functions to fix the axis limit
  • use scale_x_continuous() and scale_y_continuous() to add intervals to the axis labels
ggplot(trees, aes(x = Height, y = Volume))+
  geom_point() +
  xlab('This is height')+ ylab('This is volume')+
  xlim(c(30,90)) +
  ylim(c(0,100))

  # scale_y_continuous(breaks = seq(0, 80, by=10), limits=c(0,80))+
  # scale_x_continuous(breaks = seq(0, 100, by=10), limits=c(0,100))

Level 3: ggplot themes

ggplot(trees, aes(x= Height, y = Volume))+
  geom_point()+
  xlab('This is height')+ ylab('This is volume')+
  theme_bw()

  # theme_classic()+
  # theme_minimal()+
  # theme_gray()+
  # theme(axis.text.x = element_text(color = "grey20", size = 10),
  #     # axis.text.x = element_text(color = "grey20", size = 10, angle = 0, hjust = .5, vjust = .5),
  #       axis.title.x = element_text(color = "grey20", size = 12))+
  # theme(text = element_text(size=15))+

Level 4: Point colors

ggplot(trees, aes(x= Height, y = Volume))+
  geom_point(color = 'red',size = 2)+
  # geom_point(color = 'black',size = 2, fill = 'red',pch=21)+
  theme_bw()

  # definition of pch can be found here https://www.datanovia.com/en/blog/ggplot-point-shapes-best-tips/

Level 5: Color of points by values

ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
  geom_point(color = 'black',size = 3, pch=21)+
  theme_bw()+
  scale_fill_gradientn(colours = terrain.colors(10))

  # scale_fill_gradient(name = 'aa', low = "blue", high = "red")+
  # theme(legend.position = 'bottom',
  #       legend.direction = "horizontal")+
  # theme(legend.position = c(0.1, 0.7),
  #       legend.background = element_rect(linetype='solid', color='black', size =0.5))
  # ggtitle("Plot of height vs volume")

Level 6: Point color/size by values

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
# ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species, size = Petal.Length))+
  geom_point(color = 'black', pch=21)+
  # scale_fill_gradientn(colours = terrain.colors(10))+
  # scale_fill_gradient(name = 'aa', low = "blue", high = "red")+
  # theme(legend.position = 'bottom',
  #       legend.direction = "horizontal")+
  # ggtitle("Plot of height vs volume")+
  theme_bw()

Level 7: Draw best fit line

ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
  geom_point(color = 'black',size = 3, pch=21)+
  geom_smooth(method='lm',se=F,alpha=.5)+
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
  geom_point(color = 'black',size = 3, pch=21)+
  geom_smooth(aes(color = Species),method='lm',se=F,alpha=.5)+
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

Level 8: Pro in scatter plot

p =ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
  geom_point(color = 'black',size = 3, pch=21)+
  scale_y_continuous(breaks = seq(1, 8, by=1), limits=c(1,8))+
  scale_x_continuous(breaks = seq(3, 9, by=1), limits=c(3,9))+
  xlab('Sepal length')+ ylab('Petal length')+
  theme_bw()+
  theme(axis.text.y = element_text(color = "black", size = 12),
        axis.title.y = element_text(color = "black", size = 13),
        axis.text.x = element_text(color = "black", size = 12, angle = 0, hjust = .5, vjust = .5),
        axis.title.x = element_text(color = "black", size = 13))+
  scale_fill_manual(values=c('#999999','#E69F00','#56B4E9'))+
  theme(legend.position = c(0.2, 0.8),
        legend.background = element_rect(linetype='solid', color='black', size =0.5))+
  annotate("text", x = 8, y = 2, label = "Scatter plot", size = 5)

p

Export ggplot image

# export the image in png format 
#setting working directory to export image.
setwd('C:/sarfaraz/Project_R_tutorials/R-tutorial/R_beginner_part3_files/')
ggsave(p,filename="scatterplot.png",
       width = 20, height = 15, units = "cm")