<- read_csv('http://math.montana.edu/ahoegh/teaching/stat408/datasets/SeattleHousing.csv') |>
seattle mutate(zipcode = factor(zipcode))
Polished Graphics
Graphics that Stand Alone
From now on, all graphics must “stand alone”. This means informative titles, axes, labels, captions, annotations - when appropriate, etc…
Bar Charts with Count Data / Statistical Transformation
In Figure 1 we see a bar chart made without using tally to count up the number of houses sold in each zipcode
|>
seattle ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure 1. First Bar Plot: # of houses sold in King County, WA zipcodes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none')
Activity #1
Recreate this figure using seattle_tallied
and use library(patchwork)
to combine the images into a single figure.
<- seattle |>
seattle_tallied group_by(zipcode) |>
tally()
library(patchwork)
<- seattle |>
f1 ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure 2a. First Bar Plot: # of houses sold in King County, WA zipcodes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none')
<- seattle_tallied |>
f2 ggplot(aes(x = zipcode, y = n, fill = zipcode)) +
geom_bar(stat = "identity") +
labs(title = 'Figure 2b. First Bar Plot: # of houses sold in King County, WA zipcodes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none')
/ f2 f1
Activity #2
Now expand on Figure 1 and also include waterfront in this figure. One option using faceting is shown in Figure 3.
|>
seattle ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure 3. Faceted Bar Plot: # of houses sold in King County, WA zipcodes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none') +
facet_wrap(waterfront ~ ., nrow = 2)
A better approach in this case would be a stacked bar plot as in Figure 4. You could also do a side-by-side bar chart using geom_bar(position = 'dodge')
|>
seattle mutate(waterfront = factor(waterfront)) |>
ggplot(aes(x = zipcode, color = waterfront)) +
geom_bar() +
labs(title = 'Figure 4. Stacked Bar Plot: # of houses sold in King County, WA zipcodes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'bottom')
On bar charts
Bar charts are good for summarizing counts (better than tables)
Bar charts are bad at showing variability in the data…
<- seattle |>
bar1 group_by(zipcode) |>
summarise(price = mean(price)) |>
ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_bar(stat = 'identity') +
labs(title = 'Figure 5. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none')
<- seattle |>
box1 ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_boxplot() +
labs(title = 'Figure 5. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none')
/ box1 bar1
Activity #3
Update Figure 5 so that the y label is easy to interpret (in dollars).
library(scales)
<- seattle |>
bar1 group_by(zipcode) |>
summarise(price = mean(price)) |>
ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_bar(stat = 'identity') +
labs(title = 'Figure 6. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none') +
scale_y_continuous(labels = dollar)
<- seattle |>
box1 ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_boxplot() +
labs(title = 'Figure 6. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none') +
scale_y_continuous(labels = dollar)
/ box1 bar1
Activity #4
Make the y-axes on both plots the same.
Update Figure 5 so that the y label is easy to interpret (in dollars).
library(scales)
<- seattle |>
fancy_pants filter(price > 4000000)
<- seattle |>
bar1 group_by(zipcode) |>
summarise(price = mean(price)) |>
ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_bar(stat = 'identity') +
labs(title = 'Figure 7. Bad Bar Chart \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none') +
scale_y_continuous(labels = dollar, limits = c(NA, 8000000)) +
annotate('text', x = 3, y = 6400000, label = 'fancy pants houses up here') +
geom_curve(
aes(x = 3, y = 6000000, xend = 5, yend = 7000000),
arrow = arrow(
length = unit(0.03, "npc"), type="closed" ), ) +
labs(caption = 'bar chart only shows 1 piece of information - the mean') +
geom_text(data = fancy_pants, label = 'Expensive!')
<- seattle |>
box1 ggplot(aes(x = zipcode, y = price, fill = zipcode)) +
geom_boxplot() +
labs(title = 'Figure 7. Better Box Plot \n # of houses sold in King County, WA zipcodes') +
ylab("Average Sales Price") +
theme_minimal() +
theme(legend.position = 'none') +
scale_y_continuous(labels = dollar)
/ box1 bar1
Accessibility (color blindness)
Code from # Code from https://github.com/clauswilke/colorblindr
<- ggplot(iris, aes(Sepal.Length, fill = Species)) + geom_density(alpha = 0.7)
fig fig
library(remotes)
#remotes::install_github("clauswilke/colorblindr")
library(colorblindr)
Loading required package: colorspace
cvd_grid(fig)
Activity #5
Improve this plot of price vs sqft_living (with bedrooms). Add titles, labels, and use a colorblind friendly way to highlight the number of bedrooms in a house. Also explore different themes.
|>
seattle ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
geom_point()
library(viridis)
Loading required package: viridisLite
Attaching package: 'viridis'
The following object is masked from 'package:scales':
viridis_pal
|>
seattle ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
geom_point() +
scale_color_viridis() +
facet_wrap(.~zipcode) +
theme(legend.position = 'bottom') +
theme_dark() +
xlab(expression(paste("Living space (",ft^2,')',sep=""))) +
labs('House price by living space in King County, WA')
library(viridis)
|>
seattle mutate(bedrooms = factor(bedrooms)) |>
ggplot(aes(y = price, x= sqft_living, color = bedrooms)) +
geom_point() +
scale_color_viridis(discrete = T) +
facet_wrap(.~zipcode, ncol = 1) +
theme_minimal() +
xlab(expression(paste("Living space (",ft^2,')',sep=""))) +
labs('House price by living space in King County, WA') +
theme(legend.position = 'bottom') +
guides(colour = guide_legend(nrow = 1))
Additional Color Palletes
<- seattle |>
a ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure X. More Color Palletes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = T)
<- seattle |>
b ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure X. More Color Palletes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = TRUE, option = 'B')
<- seattle |>
c ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure X. More Color Palletes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = TRUE, option = 'C')
<- seattle |>
d ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure X. More Color Palletes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = TRUE, option = 'D')
<- seattle |>
e ggplot(aes(x = zipcode, fill = zipcode)) +
geom_bar() +
labs(title = 'Figure X. More Color Palletes') +
ylab("number of houses") +
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = TRUE, option = 'E')
+ b) / (c + d) / e (a