11  Population by Demographic

To calculate infection rates by demographic groups (such as county, health officer region, sex, or race/ethnicity), we first summarize total population counts for each demographic category within the population dataframe. The population dataset is then joined to both the master database (which merges all three datasets) and the individual California and Los Angeles datasets.

By creating and maintaining these summarized population counts, we avoid having to recalculate them each time we focus on a different demographic group. For example, once this population dataset is joined to the California data, we can easily calculate population-adjusted infection rates that allow valid comparisons across counties with differing population sizes.

Code
step2_pop_df <- step2_pop_df_recat %>%
  
##-- join population database to the race/ethnicity map
  mutate(race_short = clean(race_ethnicity)) %>%
  select(-race_ethnicity) %>%
  left_join(race_ethnicity_map, by = "race_short") %>%
  relocate(race_coded, race_short, race_long, .after = sex) %>%
  
##-- calculate population totals by demographic
  group_by(county, health_officer_region) %>%
    mutate(total_cnty_pop = sum(pop)) %>% ungroup() %>%
  group_by(county, health_officer_region, race_coded, race_short, race_long) %>%
    mutate(total_race_pop = sum(pop)) %>% ungroup() %>%
  
  group_by(county, age_cat) %>%
   mutate(total_age_pop = sum(pop)) %>% ungroup() %>%
  
  group_by(county, health_officer_region, sex) %>%
    mutate(total_sex_pop = sum(pop)) %>% ungroup() %>%
  group_by(health_officer_region) %>%
    mutate(total_HOR_pop = sum(pop)) %>% ungroup()

##-- get total CA population
total_ca_pop <- step2_pop_df %>%
  distinct(health_officer_region, total_HOR_pop) %>%
  summarise(total_ca_pop = sum(total_HOR_pop, na.rm = TRUE)) %>%
  pull(total_ca_pop)

##-- add in to df
step2_pop_df <- step2_pop_df %>%
  mutate(total_ca_pop = total_ca_pop)
Code
#--join datasets to new population dataframe
step2_ca_df <- step2_ca_df %>%
  left_join(step2_pop_df, by = c("county" ,"sex", "race_coded", 
            "race_short", "race_long", "age_cat")) %>%
  relocate(health_officer_region,  .after = county) %>%
  relocate(pop, .after = race_long) %>%
  mutate(age_cat = factor(age_cat, levels = c("0-17", "18-49", "50-64", "65+")))

step2_la_cnty_df <- step2_la_cnty_df %>%
  left_join(step2_pop_df, by = c("county", "sex", "race_coded", 
            "race_short", "race_long", "age_cat")) %>%
  relocate(health_officer_region,  .after = county) %>%
  relocate(pop, .after = race_long) %>%
  mutate(age_cat = factor(age_cat, levels = c("0-17", "18-49", "50-64", "65+")))
Code
#-write.csv(step2_ca_df, file = here("data/tbl_outputs_03/step2_ca_df.csv"), row.names = FALSE)
#-write.csv(step2_la_cnty_df, file = here("data/tbl_outputs_03/step2_la_cnty_df.csv"), row.names = FALSE)
#-write.csv(step2_pop_df, file = here("data/tbl_outputs_03/step2_pop_df.csv"), row.names = FALSE)

Table 2: Calculating Infection Rate by 100,000 Population Example

Code
by_cnty_year <- step2_ca_df %>%
group_by(health_officer_region, county) %>%
summarise(
    total_cnty_pop = first(total_cnty_pop),
    total_infected = max(cumulative_infected),
    total_inf_prop = total_infected / total_cnty_pop,
    inf_rate_100k = round((total_inf_prop * 10^4),1),
    .groups = "drop"
)

cnty_slice <- by_cnty_year %>%
  select(-health_officer_region) %>%
  group_by(county) %>%
  slice(1) %>%
  ungroup() %>%
  slice_head(n=3) %>%
  arrange(desc(inf_rate_100k))

cnty_slice %>%
kbl(
     escape = FALSE, 
      align = "c",
  col.names = c(
    "County", "Total Population", 
    "Total Infected", "Proportion Infected", 
    "Infection Rate per 100K"
  )
) %>%
row_spec(0, 
  bold = TRUE, 
  background = "#0f172a",
  extra_css = 
   "font-size: 16px!important;color:#ffffff;"
) %>%
column_spec(5, 
  bold = TRUE, 
  background = "#5ce1e6"
) %>%  
kable_styling(bootstrap_options = c("bordered"))
County Total Population Total Infected Proportion Infected Infection Rate per 100K
Alpine 1165 25 0.0214592 214.6
Amador 40122 767 0.0191167 191.2
Alameda 1656037 12427 0.0075041 75.0