Geography 176A

The counties in California with the most COVID cases are Los Angeles, Riverside, Orange, San Bernardino, and San Diego.
The counties in California with the most new COVID cases are Los Angeles, San Bernardino, Riverside, Fresno, and Orange.
Our data shows that these were the top counties for COVID cases in the state. The county of Alpine showed zero cases, making it the safest county in the state.

library(tidyverse)

## -- Attaching packages ------------------------------------------------------------ tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.1
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts --------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(knitr)

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(readxl)

PopulationEstimates <- read_excel("../data/PopulationEstimates.xls",
  skip = 2)

url="https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"

covid = read_csv(url)

## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )

state.of.interest = "California"

covid2 = covid %>%
  filter(state == state.of.interest) %>% 
  group_by(county) %>% 
  mutate(new_cases = cases - lag(cases)) %>% 
  ungroup()

mostcases = covid2 %>% 
  filter(date == max(date)) %>% 
  slice_max(cases, n = 5) %>% 
  select(county, cases)

mostnewcases = covid2 %>% 
  filter(date == max(date)) %>% 
  slice_max(new_cases, n = 5) %>% 
  select(county, new_cases)
  
knitr::kable(mostcases, caption = "California Counties with the Most COVID Cases",
  col.names = c("County","Cases"),
  format.args = list(big.mark = ","))

California Counties with the Most COVID Cases
County	Cases
Los Angeles	221,950
Riverside	46,720
Orange	43,709
San Bernardino	41,231
San Diego	34,741

knitr::kable(mostnewcases, caption = "California Counties with the Most New COVID Cases",
  col.names = c("County", "New Cases"),
  format.args = list(big.mark = ","))

California Counties with the Most New COVID Cases
County	New Cases
Los Angeles	1,188
San Bernardino	789
Fresno	703
Sacramento	373
Orange	342

PopEstCA = PopulationEstimates %>% 
  filter(State == "CA") %>% 
  select(POP_ESTIMATE_2019, FIPStxt)

left_join(covid2, PopEstCA, by = c('fips' = 'FIPStxt')) ->
CAcovidpop

mostcasescap = CAcovidpop %>% 
  filter(date == max(date)) %>% 
  mutate(Most_cases_capita = (cases / POP_ESTIMATE_2019) * 100) %>% 
  slice_max(Most_cases_capita, n = 5) %>% 
  select(county, Most_cases_capita)

knitr::kable(mostcasescap, caption = "California Counties with the Most COVID Cases per capita",
  col.names = c("County", "Cases per capita"))

California Counties with the Most COVID Cases per capita
County	Cases per capita
Imperial	5.550313
Kings	3.605335
Kern	2.951560
Tulare	2.630230
Merced	2.440579

mostnewcasescap = CAcovidpop %>% 
  filter(date == max(date)) %>% 
  mutate(Most_new_cases_capita = (new_cases / POP_ESTIMATE_2019) * 100) %>% 
  slice_max(Most_new_cases_capita, n = 5) %>% 
  select(county, Most_new_cases_capita)

knitr::kable(mostnewcasescap, caption = "California Counties with the Most New COVID Cases per capita",
  col.names = c("County", "New Cases per Capita"))

California Counties with the Most New COVID Cases per capita
County	New Cases per Capita
Monterey	0.0783300
Fresno	0.0703633
Kings	0.0686544
Inyo	0.0443484
Sutter	0.0443432

CAcovidpop3 = CAcovidpop %>% 
  group_by(county) %>% 
  mutate(total_new_cases = rollsum(new_cases, 14, fill = NA, align = "right")) %>% 
  ungroup() %>% 
  filter(date > max(date) - 14) %>% 
  group_by(county) %>% 
  summarise(total_new_cases)

## `summarise()` regrouping output by 'county' (override with `.groups` argument)

states.of.interest = c("California", "New York", "Louisiana", "Florida")

CACOVIDSTATES2 = covid %>% 
  filter(state %in% states.of.interest) %>%
  group_by(state, date) %>% 
  summarise(cases = sum(cases)) %>% 
   mutate(new_states_cases = cases - lag(cases),
         dailymean = rollmean(new_states_cases, 7, fill = NA, align = "right")) %>%
  filter(new_states_cases >= 0) %>% 
  ungroup()

## `summarise()` regrouping output by 'state' (override with `.groups` argument)

CACOVIDSTATES2 %>% 
  ggplot(aes(x = date)) +
  geom_col(aes(y = new_states_cases, col = state)) +
  geom_line(aes(y = dailymean), size = 1, col = "blue") +
  facet_wrap(~state, scales = 'free_y') +
  labs(title = "New COVID Cases by State",
       x = "",
       y = "",
       caption = "NY Times COVID Data",
       subtitle = "7-day Average (Blue)") +
  theme_dark() +
  theme(legend.position = "NA") ->
  lab2plot
ggsave(lab2plot, file = "lab2plot.png",
       width = 8,
       unit = "in")

## Saving 8 x 5 in image

## Warning: Removed 6 row(s) containing missing values (geom_path).

Geography 176A

Lab 02: COVID-19 Pandemic

Leslie Alonzo