Polished Graphics

Graphics that Stand Alone

From now on, all graphics must “stand alone”. This means informative titles, axes, labels, captions, annotations - when appropriate, etc…

Bar Charts with Count Data / Statistical Transformation

seattle <- read_csv('http://math.montana.edu/ahoegh/teaching/stat408/datasets/SeattleHousing.csv') |>
  mutate(zipcode = factor(zipcode))

In Figure 1 we see a bar chart made without using tally to count up the number of houses sold in each zipcode

seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure 1. First Bar Plot: # of houses sold in King County, WA zipcodes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none') 
Figure 1: Standard Bar Chart without tally()

Activity #1

Recreate this figure using seattle_tallied and use library(patchwork) to combine the images into a single figure.

seattle_tallied <- seattle |>
  group_by(zipcode) |>
  tally()
library(patchwork)
f1 <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure 2a. First Bar Plot: # of houses sold in King County, WA zipcodes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none') 

f2 <- seattle_tallied |>
  ggplot(aes(x = zipcode, y = n, fill = zipcode)) +
  geom_bar(stat = "identity") + 
  labs(title = 'Figure 2b. First Bar Plot: # of houses sold in King County, WA zipcodes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none') 

f1 / f2
Figure 2: One plot, two ways

Activity #2

Now expand on Figure 1 and also include waterfront in this figure. One option using faceting is shown in Figure 3.

seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure 3. Faceted Bar Plot: # of houses sold in King County, WA zipcodes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none') +
  facet_wrap(waterfront ~ ., nrow = 2)
Figure 3: Ugly alert

A better approach in this case would be a stacked bar plot as in Figure 4. You could also do a side-by-side bar chart using geom_bar(position = 'dodge')

seattle |>
  mutate(waterfront = factor(waterfront)) |>
  ggplot(aes(x = zipcode, color = waterfront)) +
  geom_bar() + 
  labs(title = 'Figure 4. Stacked Bar Plot: # of houses sold in King County, WA zipcodes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'bottom') 
Figure 4: Better than Figure 3

On bar charts

  • Bar charts are good for summarizing counts (better than tables)

  • Bar charts are bad at showing variability in the data…

bar1 <- seattle |>
  group_by(zipcode) |>
  summarise(price = mean(price)) |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_bar(stat = 'identity') + 
  labs(title = 'Figure 5. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') 

box1 <- seattle |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_boxplot() + 
  labs(title = 'Figure 5. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') 


bar1 / box1
Figure 5: Boxplot shows much more information, with less ink

Activity #3

Update Figure 5 so that the y label is easy to interpret (in dollars).

library(scales)
bar1 <- seattle |>
  group_by(zipcode) |>
  summarise(price = mean(price)) |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_bar(stat = 'identity') + 
  labs(title = 'Figure 6. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') +
scale_y_continuous(labels = dollar)

box1 <- seattle |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_boxplot() + 
  labs(title = 'Figure 6. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') +
  scale_y_continuous(labels = dollar)


bar1 / box1
Figure 6: Boxplot shows much more information, with less ink

Activity #4

Make the y-axes on both plots the same.

Update Figure 5 so that the y label is easy to interpret (in dollars).

library(scales)

fancy_pants <- seattle |>
  filter(price > 4000000)

bar1 <- seattle |>
  group_by(zipcode) |>
  summarise(price = mean(price)) |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_bar(stat = 'identity') + 
  labs(title = 'Figure 7. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') + 
scale_y_continuous(labels = dollar, limits = c(NA, 8000000)) +
  annotate('text', x = 3, y = 6400000, label = 'fancy pants houses up here') +
  geom_curve(
  aes(x = 3, y = 6000000, xend = 5, yend = 7000000),
  arrow = arrow(
    length = unit(0.03, "npc"), type="closed" ), ) +
  labs(caption = 'bar chart only shows 1 piece of information - the mean') +
  geom_text(data = fancy_pants, label = 'Expensive!') 


box1 <- seattle |>
  ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
  geom_boxplot() + 
  labs(title = 'Figure 7. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
  ylab("Average Sales Price") +
  theme_minimal() +
  theme(legend.position = 'none') +
  scale_y_continuous(labels = dollar)


bar1 / box1
Figure 7: Adjusting axis limits and adding annotation

Accessibility (color blindness)

Code from # Code from https://github.com/clauswilke/colorblindr

fig <- ggplot(iris, aes(Sepal.Length, fill = Species)) + geom_density(alpha = 0.7)
fig

library(remotes)
#remotes::install_github("clauswilke/colorblindr")
library(colorblindr)
Loading required package: colorspace
cvd_grid(fig)

Activity #5

Improve this plot of price vs sqft_living (with bedrooms). Add titles, labels, and use a colorblind friendly way to highlight the number of bedrooms in a house. Also explore different themes.

seattle |>
  ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
  geom_point()

library(viridis)
Loading required package: viridisLite

Attaching package: 'viridis'
The following object is masked from 'package:scales':

    viridis_pal
seattle |>
  ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
  geom_point() +
  scale_color_viridis() +
  facet_wrap(.~zipcode) +
  theme(legend.position = 'bottom') +
  theme_dark() +
  xlab(expression(paste("Living space (",ft^2,')',sep=""))) +
  labs('House price by living space in King County, WA')

library(viridis)
seattle |>
  mutate(bedrooms = factor(bedrooms)) |>
  ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
  geom_point() +
  scale_color_viridis(discrete = T) +
  facet_wrap(.~zipcode, ncol = 1) +
  theme_minimal() +
  xlab(expression(paste("Living space (",ft^2,')',sep=""))) +
  labs('House price by living space in King County, WA') +
  theme(legend.position = 'bottom') +
  guides(colour = guide_legend(nrow = 1))

Additional Color Palletes

a <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure X. More Color Palletes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis(discrete = T) 


b <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure X. More Color Palletes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis(discrete = TRUE, option = 'B')
  
c <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure X. More Color Palletes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis(discrete = TRUE, option = 'C')

d <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure X. More Color Palletes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis(discrete = TRUE, option = 'D')

e <- seattle |>
  ggplot(aes(x = zipcode, fill = zipcode)) +
  geom_bar() + 
  labs(title = 'Figure X. More Color Palletes') +
  ylab("number of houses") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis(discrete = TRUE, option = 'E')


(a + b) / (c + d) / e