install.packages("tidyverse")
library(tidyverse)
ggplot(data = midwest) +
geom_point(mapping = aes(x = popdensity, y = percollege))
ggplot(midwest) +
geom_point(aes(x = popdensity, y = percollege))
popdensity
by state
.county
by state
. ggplot(data = midwest)
from above. ggplot(midwest) +
geom_point(aes(x = popdensity, y = percollege, color = state))
ggplot(midwest) +
geom_point(aes(x = popdensity, y = percollege), color = 'pink')
alpha
instead. colors()
. ggplot(midwest) +
geom_point(aes(x = popdensity, y = percollege, color = 'green'))
ggplot(midwest) +
geom_smooth(aes(x = popdensity, y = percollege))
ggplot(midwest) +
geom_smooth(aes(x = popdensity, y = percollege, linetype = state),
se = FALSE)
ggplot(midwest) +
geom_point(aes(x = popdensity, y = percollege, color = state)) +
geom_smooth(aes(x = popdensity, y = percollege, color = state),
se = FALSE)
ggplot(midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
geom_smooth(se = FALSE)
ggplot(midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
scale_x_continuous("Population Density",
breaks = seq(0, 80000, 20000)) +
scale_y_continuous("Percent College Graduates") +
scale_color_discrete("State")
ggplot(midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
geom_smooth(se = FALSE) +
theme_bw()
p1 <- ggplot(midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
scale_x_continuous("Population Density",
breaks = seq(0, 80000, 20000)) +
scale_y_continuous("Percent College Graduates") +
theme_bw()
p1 +
labs(title = "Percent College Educated by Population Density",
subtitle = "County level data for five midwest states")
p1 + scale_color_grey("State")
p1 + scale_color_brewer("State", palette = 'Dark2')
library(viridis)
p1 + scale_color_viridis(discrete = TRUE)
p1 + scale_color_viridis(option = 'cividis', discrete = TRUE)
ggplot(data = midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
scale_x_continuous("Population Density") +
scale_y_continuous("Percent College Graduates") +
scale_color_discrete("State") +
coord_cartesian(xlim = c(0, 15000))
scale_x_continuous
- Bad Practiceggplot(data = midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
geom_smooth(se = FALSE) +
scale_x_continuous("Population Density", limits = c(0, 15000)) +
scale_y_continuous("Percent College Graduates") +
scale_color_discrete("State")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
lotr <- read_tsv('https://raw.githubusercontent.com/jennybc/lotr/master/lotr_clean.tsv')
## Parsed with column specification:
## cols(
## Film = col_character(),
## Chapter = col_character(),
## Character = col_character(),
## Race = col_character(),
## Words = col_integer()
## )
head(lotr)
## # A tibble: 6 x 5
## Film Chapter Character Race Words
## <chr> <chr> <chr> <chr> <int>
## 1 The Fellowship Of The Ring 01: Prologue Bilbo Hobbit 4
## 2 The Fellowship Of The Ring 01: Prologue Elrond Elf 5
## 3 The Fellowship Of The Ring 01: Prologue Galadriel Elf 460
## 4 The Fellowship Of The Ring 02: Concerning Hobbits Bilbo Hobbit 214
## 5 The Fellowship Of The Ring 03: The Shire Bilbo Hobbit 70
## 6 The Fellowship Of The Ring 03: The Shire Frodo Hobbit 128
ggplot(lotr, aes(x = Words)) +
geom_histogram() +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(lotr, aes(x = Words)) +
geom_histogram(bins = 20) +
theme_bw()
ggplot(lotr, aes(x = Words)) +
geom_histogram(binwidth = 25) +
theme_bw()
ggplot(lotr, aes(x = Words, color = Film)) +
geom_histogram(binwidth = 25) +
theme_bw()
ggplot(lotr, aes(x = Words)) +
geom_histogram(binwidth = 25) +
theme_bw() +
facet_wrap(~ Film)
With more than two groups, histograms are difficult to interpret due to overlap. Instead, use the geom_density
to create a density plot for Words
for each film.
Using geom_boxplot
, create boxplots with Words
as the y variable and Film
as the x variable. Bonus: facet this plot by the variable Race
. Bonus2: Zoom in on the bulk of the data.
ggplot(lotr, aes(x = Film, y = Words)) +
geom_boxplot() +
facet_wrap(~ Race) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
coord_flip
is betterggplot(lotr, aes(x = Film, y = Words)) +
geom_boxplot() +
facet_wrap(~ Race) +
theme_bw() +
coord_flip()
ggplot(lotr, aes(x = Race)) +
geom_bar() +
theme_bw()
ggplot(lotr, aes(x = Race)) +
geom_bar(aes(fill = Film)) +
theme_bw()
ggplot(lotr, aes(x = Race)) +
geom_bar(aes(fill = Film), position = 'fill') +
theme_bw() +
ylab("Proportion")
ggplot(lotr, aes(x = Race)) +
geom_bar(aes(fill = Film), position = 'dodge') +
theme_bw()
ggplot(lotr, aes(x = Race)) +
geom_bar(aes(fill = Film), position = 'fill') +
theme_bw() +
ylab("Proportion") +
scale_fill_viridis(option = 'cividis', discrete = TRUE)
partyid
.marital
to the bar chart created in step 1. Do you prefer a stacked or dodged version?install.packages("plotly")
library(plotly)
p <- ggplot(data = midwest) +
geom_point(mapping = aes(x = popdensity, y = percollege))
print(ggplotly(p))
p <- ggplot(midwest,
aes(x = popdensity, y = percollege, color = state)) +
geom_point() +
scale_x_continuous("Population Density",
breaks = seq(0, 80000, 20000)) +
scale_y_continuous("Percent College Graduates") +
scale_color_discrete("State") +
theme_bw()
print(ggplotly(p))
starwars
data, create a static ggplot and use the ggplotly
function to turn it interactive. lotr <- read_tsv('https://raw.githubusercontent.com/jennybc/lotr/master/lotr_clean.tsv')
## Parsed with column specification:
## cols(
## Film = col_character(),
## Chapter = col_character(),
## Character = col_character(),
## Race = col_character(),
## Words = col_integer()
## )
lotr
## # A tibble: 682 x 5
## Film Chapter Character Race Words
## <chr> <chr> <chr> <chr> <int>
## 1 The Fellowship Of The Ring 01: Prologue Bilbo Hobb~ 4
## 2 The Fellowship Of The Ring 01: Prologue Elrond Elf 5
## 3 The Fellowship Of The Ring 01: Prologue Galadriel Elf 460
## 4 The Fellowship Of The Ring 02: Concerning Hobbits Bilbo Hobb~ 214
## 5 The Fellowship Of The Ring 03: The Shire Bilbo Hobb~ 70
## 6 The Fellowship Of The Ring 03: The Shire Frodo Hobb~ 128
## 7 The Fellowship Of The Ring 03: The Shire Gandalf Wiza~ 197
## 8 The Fellowship Of The Ring 03: The Shire Hobbit K~ Hobb~ 10
## 9 The Fellowship Of The Ring 03: The Shire Hobbits Hobb~ 12
## 10 The Fellowship Of The Ring 04: Very Old Friends Bilbo Hobb~ 339
## # ... with 672 more rows
plot_ly(lotr, x = ~Words) %>% add_histogram() %>% print()
one_plot <- function(d) {
plot_ly(d, x = ~Words) %>%
add_histogram() %>%
add_annotations(
~unique(Film), x = 0.5, y = 1,
xref = "paper", yref = "paper", showarrow = FALSE
)
}
lotr %>%
split(.$Film) %>%
lapply(one_plot) %>%
subplot(nrows = 1, shareX = TRUE, titleX = FALSE) %>%
hide_legend() %>% print()
plot_ly(lotr, x = ~Race, color = ~Film) %>% add_histogram() %>% print()
# number of diamonds by cut and clarity (n)
lotr_count <- count(lotr, Race, Film)
# number of diamonds by cut (nn)
lotr_prop <- left_join(lotr_count, count(lotr_count, Race, wt = n))
lotr_prop %>%
mutate(prop = n / nn) %>%
plot_ly(x = ~Race, y = ~prop, color = ~Film) %>%
add_bars() %>%
layout(barmode = "stack") %>% print()
gss_cat
data, create a histrogram for the tvhours
variable. gss_cat
data, create a bar chart showing the partyid
variable by the marital
status.plot_ly(midwest, x = ~popdensity, y = ~percollege) %>%
add_markers() %>% print()
plot_ly(midwest, x = ~popdensity, y = ~percollege) %>%
add_markers(symbol = ~state) %>% print()
plot_ly(midwest, x = ~popdensity, y = ~percollege) %>%
add_markers(color = ~state, colors = viridis::viridis(5)) %>% print()
storms_yearly <- storms %>%
group_by(year) %>%
summarise(num = length(unique(name)))
plot_ly(storms_yearly, x = ~year, y = ~num) %>%
add_lines() %>% print()
gss_cat
data, create a scatterplot showing the age
and tvhours
variables.devtools::install_github("jbkunst/highcharter")
hchart
functionlibrary(highcharter)
lotr_count <- lotr %>%
count(Film, Race)
hchart(lotr_count, "column", hcaes(x = Race, y = n, group = Film)) %>% print()
hchart
hchart(midwest, "scatter", hcaes(x = popdensity, y = percollege, group = state)) %>% print()
hchart(lotr$Words) %>% print()
hchart
function, create a bar chart or histogram with the gss_cat
data.hchart
function, create a scatterplot with the gss_cat
data.hc <- highchart() %>%
hc_xAxis(categories = lotr_count$Race) %>%
hc_add_series(name = 'The Fellowship Of The Ring',
data = filter(lotr_count, Film == 'The Fellowship Of The Ring')$n) %>%
hc_add_series(name = 'The Two Towers',
data = filter(lotr_count, Film == 'The Two Towers')$n) %>%
hc_add_series(name = 'The Return Of The King',
data = filter(lotr_count, Film == 'The Return Of The King')$n)
hc %>% print()
hc <- hc %>%
hc_chart(type = 'column')
hc %>% print()
hc <- hc %>%
hc_colors(substr(viridis(3), 0, 7))
hc %>% print()
hc <- hc %>%
hc_xAxis(title = list(text = "Race")) %>%
hc_yAxis(title = list(text = "Number of Words Spoken"),
showLastLabel = FALSE)
hc %>% print()
hc <- hc %>%
hc_title(text = 'Number of Words Spoken in Lord of the Rings Films',
align = 'left') %>%
hc_subtitle(text = 'Broken down by <i>Film</i> and <b>Race</b>',
align = 'left') %>%
hc_legend(align = 'right', verticalAlign = 'top', layout = 'vertical',
x = 0, y = 80) %>%
hc_exporting(enabled = TRUE)
hc %>% print()
gss_cat
data.select(storms, wind, pressure, ts_diameter, hu_diameter) %>%
cor(use = "pairwise.complete.obs") %>%
hchart() %>% print()
library(leaflet)
storms %>%
filter(name %in% c('Ike', 'Katrina'), year > 2000) %>%
leaflet() %>%
addTiles() %>%
addCircles(lng = ~long, lat = ~lat, popup = ~name, weight = 1,
radius = ~wind*1000) %>% print()
{r gganimate, eval = FALSE}
install.packages("gganimate")
library(gganimate)
ggplot(storms, aes(x = pressure, y = wind, color = status)) +
geom_point(show.legend = FALSE) +
xlab("Pressure") +
ylab("Wind Speed (MPH)") +
facet_wrap(~status) +
theme_bw(base_size = 14) +
labs(title = 'Year: {frame_time}') +
transition_time(as.integer(year)) +
ease_aes('linear')