vignettes/bartik.Rmd
bartik.Rmd
The goal here is to use lagged industry shares at the regional level and aggregate changes in an output variable to generate cross-regional variation. We will show the example of employment which is easily done with the County Business Pattern (CBP)
There are essentially two steps:
We are going to use the following libraries
The CBP includes sic code from 1986 to 1997 and naics code from 1998 to 2016. This forces us to break the aggregation of the dataset into two parts.
Then we create a small function that downloads the data for a given year and process it keeping only the variables we are interested:
read_cbp_sic <- function(year_target){
# Download the data from the census at the county level
dt1 <- download_all_cbp(year_target, year_target, aggregation_level = "county")
# clean the data
dt1[, fips := as.numeric(fipstate)*1000 + as.numeric(fipscty) ]
dt1 <- dt1[ !is.na(as.numeric(sic)) ]
# impute employment for each size class
dt1[ empflag == "A", emp := 10 ]
dt1[ empflag == "B", emp := 60 ]
dt1[ empflag == "C", emp := 175 ]
dt1[ empflag == "E", emp := 375 ]
dt1[ empflag == "F", emp := 750 ]
dt1[ empflag == "G", emp := 1750 ]
dt1[ empflag == "H", emp := 3750 ]
dt1[ empflag == "I", emp := 7500 ]
dt1[ empflag == "J", emp := 17500 ]
dt1[ empflag == "K", emp := 37500 ]
dt1[ empflag == "L", emp := 75000 ]
dt1[ empflag == "M", emp := 100000 ]
# aggregate and clean up
dt1[, fips := paste0(fipstate, fipscty) ]
dt1 <- dt1[, .(emp = sum(emp, na.rm = T)), by = list(fips, sic)][ order(sic, fips) ]
dt1[, fipsemp := sum(emp, na.rm = T), by = list(fips) ]
dt1[, date_y := year_target ]
return(dt1)
}
Then we download for every years where we have sic codes:
Then we create a small function that downloads the data for a given year and process it keeping only the variables we are interested:
read_cbp_naics <- function(year_target){
# Download the data from the census at the county level
dt1 <- download_all_cbp(year_target, year_target, aggregation_level = "county")
# clean the data and only keep 4 digits naics codes
dt1[, naics := gsub("\\D", "", naics) ]
dt1 <- dt1[ str_length(naics) == 4 ]
# impute employment for each size class
dt1[ empflag == "A", emp := 10 ]
dt1[ empflag == "B", emp := 60 ]
dt1[ empflag == "C", emp := 175 ]
dt1[ empflag == "E", emp := 375 ]
dt1[ empflag == "F", emp := 750 ]
dt1[ empflag == "G", emp := 1750 ]
dt1[ empflag == "H", emp := 3750 ]
dt1[ empflag == "I", emp := 7500 ]
dt1[ empflag == "J", emp := 17500 ]
dt1[ empflag == "K", emp := 37500 ]
dt1[ empflag == "L", emp := 75000 ]
dt1[ empflag == "M", emp := 100000 ]
# aggregate and clean up
dt1[, fips := paste0(fipstate, fipscty) ]
dt1 <- dt1[, .(emp = sum(emp, na.rm = T)), by = .(fips, naics)][ order(naics, fips) ]
dt1[, fipsemp := sum(emp, na.rm = T), by = list(fips) ]
dt1[, date_y := year_target ]
return(dt1)
}
Then we download for every years where we have naics codes:
First we create the shares of employment for a given industry in the region:
dt_emp_sic[, share_ind_cty := emp / fipsemp ]
dt_emp_sic[, l_share_ind_cty := tlag(share_ind_cty, 1, time = date_y), by = .(fips, sic) ]
dt_emp_naics[, share_ind_cty := emp / fipsemp ]
dt_emp_naics[, l_share_ind_cty := tlag(share_ind_cty, 1, time = date_y), by = .(fips, naics) ]
Then we create a variable that include employment in all regions except the current one and estimate the growth of employment
dt_emp_sic[, fipsemp_clean := fipsemp - emp ]
dt_emp_sic[, d_fipsemp := log(fipsemp_clean / tlag(fipsemp_clean, 1, time = date_y)), by = .(fips, sic) ]
dt_emp_naics[, fipsemp_clean := fipsemp - emp ]
dt_emp_naics[, d_fipsemp := log(fipsemp_clean / tlag(fipsemp_clean, 1, time = date_y) ), by = .(fips, naics) ]
Finally we weight the aggregate change in employment at the industry level by the local industry shares from above:
dt_emp_sic[, .(d_emp = wtd.mean(d_fipsemp, l_share_ind_cty, na.rm = T)), by = .(date_y, fips) ]
dt_emp_naics[, .(d_emp = wtd.mean(d_fipsemp, l_share_ind_cty, na.rm = T)), by = .(date_y, fips) ]
To obtain the whole time series we simply append them together
dt_bartik <-
rbind(dt_emp_sic[, .(d_emp = wtd.mean(d_fipsemp, l_share_ind_cty, na.rm = T)), by = .(date_y, fips) ],
dt_emp_naics[, .(d_emp = wtd.mean(d_fipsemp, l_share_ind_cty, na.rm = T)), by = .(date_y, fips) ])
dt_bartik[]