- The counties in California with the most COVID cases are Los Angeles, Riverside, Orange, San Bernardino, and San Diego.
- The counties in California with the most new COVID cases are Los Angeles, San Bernardino, Riverside, Fresno, and Orange.
- Our data shows that these were the top counties for COVID cases in the state. The county of Alpine showed zero cases, making it the safest county in the state.
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.1
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts --------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readxl)
PopulationEstimates <- read_excel("../data/PopulationEstimates.xls",
skip = 2)
url="https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
covid = read_csv(url)
## Parsed with column specification:
## cols(
## date = col_date(format = ""),
## county = col_character(),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
state.of.interest = "California"
covid2 = covid %>%
filter(state == state.of.interest) %>%
group_by(county) %>%
mutate(new_cases = cases - lag(cases)) %>%
ungroup()
mostcases = covid2 %>%
filter(date == max(date)) %>%
slice_max(cases, n = 5) %>%
select(county, cases)
mostnewcases = covid2 %>%
filter(date == max(date)) %>%
slice_max(new_cases, n = 5) %>%
select(county, new_cases)
knitr::kable(mostcases, caption = "California Counties with the Most COVID Cases",
col.names = c("County","Cases"),
format.args = list(big.mark = ","))
California Counties with the Most COVID Cases
Los Angeles |
221,950 |
Riverside |
46,720 |
Orange |
43,709 |
San Bernardino |
41,231 |
San Diego |
34,741 |
knitr::kable(mostnewcases, caption = "California Counties with the Most New COVID Cases",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ","))
California Counties with the Most New COVID Cases
Los Angeles |
1,188 |
San Bernardino |
789 |
Fresno |
703 |
Sacramento |
373 |
Orange |
342 |
PopEstCA = PopulationEstimates %>%
filter(State == "CA") %>%
select(POP_ESTIMATE_2019, FIPStxt)
left_join(covid2, PopEstCA, by = c('fips' = 'FIPStxt')) ->
CAcovidpop
mostcasescap = CAcovidpop %>%
filter(date == max(date)) %>%
mutate(Most_cases_capita = (cases / POP_ESTIMATE_2019) * 100) %>%
slice_max(Most_cases_capita, n = 5) %>%
select(county, Most_cases_capita)
knitr::kable(mostcasescap, caption = "California Counties with the Most COVID Cases per capita",
col.names = c("County", "Cases per capita"))
California Counties with the Most COVID Cases per capita
Imperial |
5.550313 |
Kings |
3.605335 |
Kern |
2.951560 |
Tulare |
2.630230 |
Merced |
2.440579 |
mostnewcasescap = CAcovidpop %>%
filter(date == max(date)) %>%
mutate(Most_new_cases_capita = (new_cases / POP_ESTIMATE_2019) * 100) %>%
slice_max(Most_new_cases_capita, n = 5) %>%
select(county, Most_new_cases_capita)
knitr::kable(mostnewcasescap, caption = "California Counties with the Most New COVID Cases per capita",
col.names = c("County", "New Cases per Capita"))
California Counties with the Most New COVID Cases per capita
Monterey |
0.0783300 |
Fresno |
0.0703633 |
Kings |
0.0686544 |
Inyo |
0.0443484 |
Sutter |
0.0443432 |
CAcovidpop3 = CAcovidpop %>%
group_by(county) %>%
mutate(total_new_cases = rollsum(new_cases, 14, fill = NA, align = "right")) %>%
ungroup() %>%
filter(date > max(date) - 14) %>%
group_by(county) %>%
summarise(total_new_cases)
## `summarise()` regrouping output by 'county' (override with `.groups` argument)
states.of.interest = c("California", "New York", "Louisiana", "Florida")
CACOVIDSTATES2 = covid %>%
filter(state %in% states.of.interest) %>%
group_by(state, date) %>%
summarise(cases = sum(cases)) %>%
mutate(new_states_cases = cases - lag(cases),
dailymean = rollmean(new_states_cases, 7, fill = NA, align = "right")) %>%
filter(new_states_cases >= 0) %>%
ungroup()
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
CACOVIDSTATES2 %>%
ggplot(aes(x = date)) +
geom_col(aes(y = new_states_cases, col = state)) +
geom_line(aes(y = dailymean), size = 1, col = "blue") +
facet_wrap(~state, scales = 'free_y') +
labs(title = "New COVID Cases by State",
x = "",
y = "",
caption = "NY Times COVID Data",
subtitle = "7-day Average (Blue)") +
theme_dark() +
theme(legend.position = "NA") ->
lab2plot
ggsave(lab2plot, file = "lab2plot.png",
width = 8,
unit = "in")
## Saving 8 x 5 in image
## Warning: Removed 6 row(s) containing missing values (geom_path).