/************************************************************************************************************* This ado-file provides alternative cluster-robust standard errors. It is intended for use as a post-estimation command. Written by Gabriel Chodorow-Reich and provided without any warranty First version: May 2018 This version: July 2019 Options: small: apply Stata small sample adjustment to standard errors lz2: use bias-adjusted cluster version of hc2 standard errors as in Imbens and Kolesar (RESTAT 2016). bm: use Bell-McCaffrey degrees of freedom adjustment as in Imbens and Kolesar (RESTAT 2016). Some code borrowed from: https://blog.stata.com/2016/01/19/programming-an-estimation-command-in-stata-adding-robust-and-cluster-robust-vces-to-our-mata-based-ols-command/ You may also cite: Chodorow-Reich, Gabriel, Gita Gopinath, Prachi Mishra, and Abhinav Narayanan. 2019. "Cash and the Economy: Evidence from India's Demonetization." Quarterly Journal of Economics 135 (1): 57-103. *************************************************************************************************************/ cap program drop clustermore program define clustermore, eclass version 12.1 syntax varlist(max=2), [small lz2 bm noreport debug] /************************************************************************************************************* Prelinaries. *************************************************************************************************************/ if `"`e(cmdline)'"'==`""' { di as error "Regression command required before using clustermore" exit } /*Save cmdline*/ local cmdline clustermore `*' /*Reporting*/ if `"`report'"'==`"noreport"' local quireport qui if `"`debug'"'==`""' local quidebug qui /*Mark samples*/ tempvar touse _n gen `touse' = e(sample) gen `_n' = _n /*VCE*/ local nclustervars: list sizeof varlist forvalues c = 1/`nclustervars' { /*Loop over cluster variables*/ local clustervar: word `c' of `varlist' tempvar clustervar_`c' cap confirm numeric variable `clustervar' if _rc==0 qui clonevar `clustervar_`c'' = `clustervar' if `touse' else qui egen `clustervar_`c'' = group(`clustervar') if `touse' } local vce cluster /*Current estimation result*/ tempname est eststo `est' /************************************************************************************************************* Dependent variable. *************************************************************************************************************/ tempvar Y gen `Y' = `e(depvar)' /************************************************************************************************************* Weights. *************************************************************************************************************/ if `"`e(wtype)'"'!=`""' { local wtcmd [`e(wtype)' `e(wexp)'] tempvar wt gen `wt' `e(wexp)' if `touse' } /************************************************************************************************************* Residuals. *************************************************************************************************************/ if `"`debug'"'!=`""' di `"Constructing residuals"' /*Incompatible with reg, abs(). predict, resid following reg, abs() excludes the fixed effects*/ if `"`e(absvar)'"'!=`""' & `"`e(cmd)'"'!=`"areg"' di `"{red:Caution:} typing {cmd:predict, resid} must produce residuals and not the sum of residuals and the fixed effect."' tempvar r qui predict `r' if `touse', resid /************************************************************************************************************* Regressors. *************************************************************************************************************/ if `"`debug'"'!=`""' di `"Constructing regressors"' local Xnames: colnames e(b) local instd `e(instd)' local j = 0 foreach var of local Xnames { local j = `j'+1 tempvar X`j' qui gen `X`j'' = `var' if `touse' /*Replace with fitted value if endogenous regressor*/ if `: list var in instd' { `quidebug' reg `X`j'' `e(insts)' if `touse' `wtcmd' /*First stage regression*/ drop `X`j'' `quidebug' predict `X`j'' if `touse', xb qui estimates restore `est' } local X `X' `X`j'' } if `"`debug'"'!=`""' list `X' in 1/10 /************************************************************************************************************* De-mean regressors if absorb specified. *************************************************************************************************************/ if `"`e(absvar)'"'!=`""' | `"`e(absvars)'"'!=`""' { if `"`debug'"'!=`""' di `"Demeaning regressors using absorb variable"' foreach var of varlist `X' { tempvar absvarsresid if `"`e(absvar)'"'!=`""' { `quidebug' areg `var' if `touse' `wtcmd', a(`e(absvar)') `quidebug' predict `absvarsresid' if `touse', resid } else if `"`e(absvars)'"'!=`""' `quidebug' reghdfe `var' if `touse' `wtcmd', a(`e(absvars)') resid(`absvarsresid') qui replace `var' = `absvarsresid' qui estimates restore `est' } if `"`e(cmd)'"'==`"areg"' local df_a = e(df_a)+1 /*Add one for constant*/ else if `"`e(cmd)'"'==`"reghdfe"' local df_a = e(df_a) else { qui levelsof `e(absvar)' if `touse', local(levels) local df_a = `: word count `levels'' } } else local df_a = 0 /************************************************************************************************************* Generate interaction of cluster variables if two-way clustering. *************************************************************************************************************/ if `nclustervars'==2 { tempvar clustervar_3 qui egen `clustervar_3' = group(`clustervar_1' `clustervar_2') if `touse' } /************************************************************************************************************* Estimate quadratic forms in Mata. *************************************************************************************************************/ if `"`debug'"'!=`""' di `"Constructing VCV in Mata"' tempname V V_ratio df_t forvalues c = 1/3 { if `c'>1 & `nclustervars'==1 continue /*One-way clustering specified*/ tempname N_clust`c' V`c' df_t`c' if `"`debug'"'!=`""' di `"mata: mywork(`"`Y'"',`"`r'"', `"`X'"', `"`wt'"', `"`touse'"', `df_a', "`vce'", "`clustervar_`c''", `"`N_clust`c''"', "`V`c''", `"`df_t`c''"', `"`small'"', `"`lz2'"', `"`bm'"', `"`debug'"')"' mata: mywork(`"`Y'"',`"`r'"', `"`X'"', `"`wt'"', `"`touse'"', `df_a', "`vce'", "`clustervar_`c''", `"`N_clust`c''"', "`V`c''", `"`df_t`c''"', `"`small'"', `"`lz2'"', `"`bm'"', `"`debug'"') } if `nclustervars'==1 mat `V' = `V1' /*One-way clustering*/ else mat `V' = `V1'+`V2'-`V3' /*Two-way clustering*/ if `"`debug'"'!=`""' & `"`bm'"'==`"bm"' & `nclustervars'>1 { mat list `df_t1' mat list `df_t2' mat list `df_t3' } if `"`bm'"'==`"bm"' mat `df_t' = `df_t1' /************************************************************************************************************* Additional results. *************************************************************************************************************/ if `"`debug'"'!=`""' di `"Additional results"' /*Ratio of adjusted and 2sls variance matrices*/ mata: st_matrix("`V_ratio'", st_matrix("`V'"):/st_matrix("e(V)")) /*Add labels to matrices*/ mat colnames `V' = `Xnames' mat colnames `V_ratio' = `Xnames' /*Report b and V matrices*/ if `"`debug'"'!=`""' { mat list `V' mat list `V_ratio' if `nclustervars'>1 { mat list `V1' mat list `V2' mat list `V3' } } /*P-values*/ tempname pvalues t forvalues j = 1/`=colsof(`V')' { tempname t`j' mat `t`j'' = abs(el(e(b),1,`j')/sqrt(el(`V',`j',`j'))) if (`"`bm'"'==`"bm"') local p = 2*(1-t(el(`df_t',1,`j'),el(`t`j'',1,1))) else local p = 2*(1-normal(el(`t`j'',1,1))) mat `pvalues' = (nullmat(`pvalues'),`p') mat `t' = (nullmat(`t'),`t`j'') } mat colnames `pvalues' = `Xnames' /************************************************************************************************************* Post results. *************************************************************************************************************/ if `"`debug'"'!=`""' di `"Post results"' ereturn repost V=`V' /*Replaces VCV matrix in estimation result*/ if `nclustervars'==1 qui estadd scalar N_clust = `N_clust1', replace /*Adds/replaces number of clusters*/ else { qui estadd scalar N_clust1 = `N_clust1', replace /*Adds/replaces number of clusters*/ qui estadd scalar N_clust2 = `N_clust2', replace /*Adds/replaces number of clusters*/ } qui estadd local clustvar `"`varlist'"', replace /*Adds/replaces name of cluster variable*/ qui estadd local vcetype `"Robust LZ2"', replace /*Adds/replaces vce description*/ qui estadd matrix p = `pvalues', replace /*Adds matrix with p-values using t-distribution if specified*/ qui estadd matrix t = `t', replace /*Adds matrix with t-statistics*/ if `"`bm'"'==`"bm"' { mat colnames `df_t' = `Xnames' qui estadd matrix df_t = `df_t', replace /*Adds matrix with t-distribution degrees of freedom*/ local diopts `diopts' dfmatrix(`df_t') } qui estadd matrix V_ratio = `V_ratio', replace cap tsset if _rc!=0 qui sort `_n' /************************************************************************************************************* Display results. *************************************************************************************************************/ _coef_table_header _coef_table, `diopts' _prefix_footnote end /************************************************************************************************************* Mata code to estimate OLS regression and construct VCV matrices. *************************************************************************************************************/ mata: void mywork( string scalar depvar, string scalar residual, string scalar indepvars, string scalar weight, string scalar touse, real scalar df_a, string scalar vcetype, string scalar clustervar, string scalar N_clust, string scalar vcename, string scalar df_tname, string scalar small, string scalar LZ2, string scalar BM, string scalar debug) { real vector Y, r, cvar real matrix X, M, info, xi, nonemptyX, lz2adj, IminusP, df_t, W, rootW real scalar n, p, k, nc, i, dfr, sizeadj /*Cluster variable*/ cvar = st_data(., clustervar, touse) sort = order(cvar,1) /*Permutation vector to sort on cvar*/ cvar = cvar[sort,.] /*Main variable matrices*/ Y = st_data(., depvar, touse) /*Dependent variable*/ X = st_data(., indepvars, touse) /*Regressors*/ r = st_data(., residual, touse) /*Residuals*/ /*Sample size*/ n = rows(r) /*Weight*/ if (weight!=`""') { /*Weighted regression*/ W = diag(st_data(., weight, touse)) /*Matrix with raw weights on main diagonal*/ W = W*n/trace(W) /*Matrix with weights on main diagonal scaled to sum to number of observations*/ rootW = matpowersym(W,0.5) /*Square root of scaled weighting matrix*/ Y = rootW * Y X = rootW * X r = rootW * r } /*Sort in same order as cluster variable*/ Y = Y[sort,.] X = X[sort,.] r = r[sort,.] /*Degrees of freedom*/ p = cols(X) k = p - diag0cnt(quadcross(X, X)) + df_a /*Cross-product*/ XpXi = quadcross(X, X) XpXi = invsym(XpXi) /*Note: if X'X is singular (perfect multicollinearity), function invsym will automatically set enough columns of (X'X)^{-1} to zero. These columns may not correspond to the omitted columns in the Stata regression. Standard errors for fixed effects may not be correct.*/ /*Debugging*/ if (debug == "debug") { n p k nonemptyX = select(X, colmaxabs(X)) /*X without zero columns, e.g. due to omitted levels of factor expansion*/ nonemptyXcols = selectindex(colmaxabs(X):!=0) /*Non-zero columns, for example omitted levels of factor expansion*/ nonemptyXcols XpXi*X'*Y invsym(nonemptyX'*nonemptyX)*nonemptyX'*Y } /*Meat of cluster sandwich formula*/ info = panelsetup(cvar, 1) /*Assigns cluster id to each observation*/ nc = rows(info) /*Number of clusters*/ M = J(p, p, 0) for(i=1; i<=nc; i++) { /*Loop over clusters*/ if (debug == "debug") i xi = panelsubmatrix(X,i,info) /*Submatrix of regressors X corresponding to i'th cluster*/ ri = panelsubmatrix(r,i,info) /*Submatrix of residuals r corresponding to i'th cluster*/ if (LZ2=="lz2") ri = invsym(matpowersym(I(rows(xi)) - xi*XpXi*xi',0.5)) * ri /*Bias-reduction modification developed by Bell and McCaffrey (2002) analogous to HC2*/ xipri = quadcross(xi,ri) /*Score xi'ri*/ M = M + quadcross(xipri', xipri') /*Cluster meat matrix: M = M + xi' * lz2adj * ri * ri' * lz2adj' * xi*/ } /*Degrees of freedom adjustment*/ if (small=="small") sizeadj = ((n-1)/(n-k))*(nc/(nc-1)) else sizeadj = 1 /*Cluster covariance matrix*/ M = XpXi * M * XpXi * sizeadj /*Bell and McCaffrey (2002) degrees of freedom for t-distribution*/ if (BM == `"bm"') { df_t = J(1,0,.) /*Initiate empty matrix*/ for(j=1; j<=p; j++) { G = J(n,0,.) /*Initiate matrix with n rows and 0 columns*/ for(i=1; i<=nc; i++) { if (debug == "debug") i xi = panelsubmatrix(X,i,info) /*Submatrix of regressors X corresponding to i'th cluster*/ lz2adj = invsym(matpowersym(I(rows(xi)) - xi*XpXi*xi',0.5)) Pi = X*XpXi*xi' /*Columns of projection matrix P corresponding to cluster i*/ if (i==1) Ii = (I(rows(xi)) \ J(n-info[i,2],rows(xi),0)) /*Columns of identity matrix corresponding to cluster i*/ else if (i==nc) Ii = (J(info[i-1,2],rows(xi),0) \ I(rows(xi))) /*Columns of identity matrix corresponding to cluster i*/ else Ii = (J(info[i-1,2],rows(xi),0) \ I(rows(xi)) \ J(n-info[i,2],rows(xi),0)) /*Columns of identity matrix corresponding to cluster i*/ G = (G,(Ii-Pi)*lz2adj*xi*XpXi*e(j,p)') } lambdas = eigenvalues(G'*G)' df_t = (df_t, (colsum(lambdas))^2/(lambdas'*lambdas)) } st_matrix(df_tname, Re(df_t)) } st_matrix(vcename, M) st_numscalar(N_clust, nc) } end