Chi Zhang

COVID-19 Pandemic

covid = read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')

pop = readxl:::read_excel("data/populationEstimates.xls", skip = 2) %>%
  select(pop2019 = POP_ESTIMATE_2019, State = State, fips = FIPStxt)

pop_ca2019 = pop %>%
  filter(State == "CA") %>%
  slice_max(pop2019, n = 1)

cases = covid %>%
  filter(state %in% c("California")) %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup() %>%
  filter(date == max(date))
  

most_cases = cases %>%
  slice_max(cases, n = 5) %>%
  select(county, cases)

knitr::kable(most_cases,
             caption = "Most Cases California Counties",
             col.names = c("County", "Cases"))
Most Cases California Counties
County Cases
Los Angeles 2908425
San Diego 824586
Riverside 626695
Orange 600384
San Bernardino 597377
most_new_cases = cases %>%
  slice_max(newCases, n = 5) %>%
  select(county, newCases)

knitr::kable(most_new_cases,
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"))
Most New Cases California Counties
County New Cases
Los Angeles 3942
San Diego 2829
Santa Clara 1749
Alameda 1515
Orange 1430
pop_data1 = right_join(pop, cases, by = "fips") %>%
  mutate(cases_percapita = (cases / pop2019) * 100000,
         newCases_percapita = (newCases / pop2019) * 100000)

most_cases_percapita = pop_data1 %>%
  slice_max(cases_percapita, n = 5) %>%
  select(county, cases_percapita)

knitr::kable(most_cases_percapita,
             caption = "Most Cumulative Cases Per Capita",
             col.names = c("County", "Cases"))
Most Cumulative Cases Per Capita
County Cases
Imperial 36815.39
Kings 36242.32
Lassen 35165.01
Tulare 29199.15
Los Angeles 28970.95
most_new_cases_percapita = pop_data1 %>%
  slice_max(newCases_percapita, n = 5) %>%
  select(county, newCases_percapita)

knitr::kable(most_new_cases_percapita,
             caption = "Most New Cases Per Capita",
             col.names = c("County", "New Cases"))
Most New Cases Per Capita
County New Cases
Tuolumne 178.0535
Inyo 160.7628
Marin 158.7939
San Francisco 124.3266
Mono 117.6959
pop_data2 = right_join(pop, covid, by = "fips") %>%
  filter(date >= max(date) - 13, state == "California") 
  
state_daysdata = pop_data2 %>%
 group_by(state, date) %>%
  summarise(cases = sum(cases)) %>%
  mutate(newCases = cases - lag(cases),
         newCases_percapita = (newCases / 39512223) * 100000) %>%
  ungroup()

safe_county = pop_data2 %>%
  group_by(county, pop2019) %>%
  summarize(max_cases = max(cases), min_cases = min(cases)) %>%
  mutate(newCases = max_cases - min_cases,
         newCases_percapita = (newCases / pop2019) * 100000) %>%
  filter(newCases_percapita <= 100) %>%
  ungroup()
  
Numofcounty = length(safe_county$county)
Numofnewcases = format(max(state_daysdata$cases) - min(state_daysdata$cases))
Numofcases = format(max(state_daysdata$cases))

As the data showing on above, in California’s last 14 days, the total number of cases is 9351630, the total number of new cases is 114496, and the total number of safe county is 14.

covid %>%
  filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>%
  group_by(state, date) %>%
  summarise(cases = max(cases))%>%
  mutate(newCases = cases - lag(cases), roll7 = rollmean(newCases, 7, fill = NA, align = "right")) %>%
  ungroup() %>%
  ggplot(aes(x = date)) +
  geom_col(aes(y = newCases), col = NA, fill = "#F5B8B5") +
  geom_line(aes(y = roll7), col = "darkred", size = 1) +
  theme_gray() +
  labs(title = "New Reported Cases by day in 4 States", x = "Date", y = "Newcases") +
  theme(aspect.ratio = .5) +
  facet_grid(~state, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90))

COVID1 = covid %>%
  filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>%
  right_join(pop, by = "fips") 
  

state_pop = pop %>%
  filter(State %in% c("NY", "CA","LA", "FL")) %>%
  group_by(State) %>%
  slice_max(pop2019, n = 1) %>%
  right_join(COVID1, by = "State") %>%
  select(pop = pop2019.x, date, state, cases, deaths) %>%
  ungroup() %>%
  filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>%
  group_by(pop, state,date) %>%
  summarise(cases = max(cases)) %>%
  mutate(newCases1 = cases - lag(cases), newCases_Percapita = newCases1 / pop, Roll7 = rollmean(newCases_Percapita, 7, fill = NA, align = "right")) %>%
  ungroup()

ggplot(state_pop, aes(x = date)) +
  geom_col(aes(y = newCases_Percapita), col = "yellow", fill = "#F5B8B5") +
  geom_line(aes(y = Roll7), col = "blue", size = 1) +
  theme_gray() +
  labs(title = "New Reported Cases Per Capita by day in 4 States ", x = "Date", y = "Newcases") +
  theme(aspect.ratio = .5) +
  facet_grid(~state, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90))

With the data of population, the graph is more sensitive to the local condition of the pandemic. Because some states’ population is small such as Louisiana, the basic infected number is too small to evaluate and we cannot analyze the changes from the cases itself. Cases per capita solves the problem when we try to monitor local pandemic changes. In addition, cases per capita also can help us observe the control of the infection locally. But sometimes it is biased. The total number of the cases matters when we try to define the disease’s pandemic condition in specific area. Analysis of both of them is better way to monitor the pandemic.

local = na.omit(readr::read_csv("data/county-centroids.csv"))  %>%
  select(fips, county = name, state = state_name, LON, LAT)

local_data1 = covid %>%
  group_by(date, county, fips) %>%
  ungroup() %>%
  left_join(local, by = "fips") %>%  
  select(date, county = county.x, fips, cases, LAT, LON) %>%
  mutate(Month = format(date, "%m")) %>%
  group_by(Month) %>%
  summarise(X = sum(LAT * cases, na.rm = TRUE) / sum(cases, na.rm = TRUE), 
            Y = sum(LON * cases, na.rm = TRUE) / sum(cases, na.rm = TRUE),
            Totalcases = sum(cases, na.rm =TRUE )) 
  
 
  


ggplot(local_data1, aes(x = Y, y = X)) +
  borders("state", fill = "gray90", colour = "white") +
  geom_point(aes(color = Month, size = Totalcases)) +
  labs(title = "COVID-19 Pandemic Trending ", 
       x = "Longitude", 
       y = "Latitude",
       caption = "based on https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv",
       subtitle = 'Data Source: NY-Times',
       color = "Month") +
  theme_gray()

From the data trending, total number of cases continually increases from last summer in 2020. The weighted mean center of the is near the southeast of the States and slightly moved to west last winter and back to southeast this summer because the huge infection number in southeast states such as Florida last summer and increase of cases in California last winter. With the injection of the vaccine, rate of increase started to decrease in spring 2021.