11 Population by Demographic

To calculate infection rates by demographic groups (such as county, health officer region, sex, or race/ethnicity), we first summarize total population counts for each demographic category within the population dataframe. The population dataset is then joined to both the master database (which merges all three datasets) and the individual California and Los Angeles datasets.

By creating and maintaining these summarized population counts, we avoid having to recalculate them each time we focus on a different demographic group. For example, once this population dataset is joined to the California data, we can easily calculate population-adjusted infection rates that allow valid comparisons across counties with differing population sizes.

Code

step2_pop_df <- step2_pop_df_recat %>%
  
##-- join population database to the race/ethnicity map
  mutate(race_short = clean(race_ethnicity)) %>%
  select(-race_ethnicity) %>%
  left_join(race_ethnicity_map, by = "race_short") %>%
  relocate(race_coded, race_short, race_long, .after = sex) %>%
  
##-- calculate population totals by demographic
  group_by(county, health_officer_region) %>%
    mutate(total_cnty_pop = sum(pop)) %>% ungroup() %>%
  group_by(county, health_officer_region, race_coded, race_short, race_long) %>%
    mutate(total_race_pop = sum(pop)) %>% ungroup() %>%
  
  group_by(county, age_cat) %>%
   mutate(total_age_pop = sum(pop)) %>% ungroup() %>%
  
  group_by(county, health_officer_region, sex) %>%
    mutate(total_sex_pop = sum(pop)) %>% ungroup() %>%
  group_by(health_officer_region) %>%
    mutate(total_HOR_pop = sum(pop)) %>% ungroup()

##-- get total CA population
total_ca_pop <- step2_pop_df %>%
  distinct(health_officer_region, total_HOR_pop) %>%
  summarise(total_ca_pop = sum(total_HOR_pop, na.rm = TRUE)) %>%
  pull(total_ca_pop)

##-- add in to df
step2_pop_df <- step2_pop_df %>%
  mutate(total_ca_pop = total_ca_pop)

Code

#--join datasets to new population dataframe
step2_ca_df <- step2_ca_df %>%
  left_join(step2_pop_df, by = c("county" ,"sex", "race_coded", 
            "race_short", "race_long", "age_cat")) %>%
  relocate(health_officer_region,  .after = county) %>%
  relocate(pop, .after = race_long) %>%
  mutate(age_cat = factor(age_cat, levels = c("0-17", "18-49", "50-64", "65+")))

step2_la_cnty_df <- step2_la_cnty_df %>%
  left_join(step2_pop_df, by = c("county", "sex", "race_coded", 
            "race_short", "race_long", "age_cat")) %>%
  relocate(health_officer_region,  .after = county) %>%
  relocate(pop, .after = race_long) %>%
  mutate(age_cat = factor(age_cat, levels = c("0-17", "18-49", "50-64", "65+")))

Code

#-write.csv(step2_ca_df, file = here("data/tbl_outputs_03/step2_ca_df.csv"), row.names = FALSE)
#-write.csv(step2_la_cnty_df, file = here("data/tbl_outputs_03/step2_la_cnty_df.csv"), row.names = FALSE)
#-write.csv(step2_pop_df, file = here("data/tbl_outputs_03/step2_pop_df.csv"), row.names = FALSE)

Table 2: Calculating Infection Rate by 100,000 Population Example

Code

by_cnty_year <- step2_ca_df %>%
group_by(health_officer_region, county) %>%
summarise(
    total_cnty_pop = first(total_cnty_pop),
    total_infected = max(cumulative_infected),
    total_inf_prop = total_infected / total_cnty_pop,
    inf_rate_100k = round((total_inf_prop * 10^4),1),
    .groups = "drop"
)

cnty_slice <- by_cnty_year %>%
  select(-health_officer_region) %>%
  group_by(county) %>%
  slice(1) %>%
  ungroup() %>%
  slice_head(n=3) %>%
  arrange(desc(inf_rate_100k))

cnty_slice %>%
kbl(
     escape = FALSE, 
      align = "c",
  col.names = c(
    "County", "Total Population", 
    "Total Infected", "Proportion Infected", 
    "Infection Rate per 100K"
  )
) %>%
row_spec(0, 
  bold = TRUE, 
  background = "#0f172a",
  extra_css = 
   "font-size: 16px!important;color:#ffffff;"
) %>%
column_spec(5, 
  bold = TRUE, 
  background = "#5ce1e6"
) %>%  
kable_styling(bootstrap_options = c("bordered"))

County	Total Population	Total Infected	Proportion Infected	Infection Rate per 100K
Alpine	1165	25	0.0214592	214.6
Amador	40122	767	0.0191167	191.2
Alameda	1656037	12427	0.0075041	75.0