/************************************************************************************************************* This ado-file performs a seasonal adjustment procedure. Data must be tsset. Options: generate: name of seasonally-adjusted variable multiplicative: seasonal adjustment in logs additive: seasonal adjustment in levels ar: number of AR coefficients, adjustment using arima model with seasonal categorical variables ma: number of MA coefficients, adjustment using arima model with seasonal categorical variables polynomial: include polynomial of order `polynomial' in the time variable, adjustment using arima or OLS model wtih seasonal categorical variables noregress: do not apply regression model x11: apply X-11 moving average adjustment. If noregress is not specified, the x11 adjustment is applied after the regression adjustment. The Henderson filter is set to a 13 term filter for monthly and a 5 term filter for quarterly. The current version does not allow for alternatives. See http://www.seasonaladjustment.com/henderson/ for details. holidays: include categorical variables for certain holidays. noregress must not be specified. "Labor day": categorical variable for year-periods in which labor day occurs on the 7th, i.e. during the BLS reference week. "Easter": categorical variables for year-periods in which Good Friday or the Monday after Easter occur during the BLS reference week. "Veteran's day": categorical variable for Veteran's Day falling on a Saturday, such that observed day does not fall during BLS reference week. "Columbus day": categorical variable for Columbus Day falling on October 14, such that observed day does not fall during BLS reference week. dummy(numlist): include categorical variables for user-specified periods, entered in Stata internal form (SIF). noregress must not be specified. outliers: automatically dummy observations greater than `outliers' from next largeset/smallest seriesbreak(numlist): include indicator variables occurring after user-specified period, entered in Stata internal form (SIF). noregress must not be specified. Example: specify seriesbreak(`=tm(2001m1)') if your series has a jump in Jan-2001. forecast/backcast: use arima model to forecast/backcast series before applying moving average filter. replace: replace variable in memory with new variable inreplace: replace observations in adjusted sampled only verbose: report regression results Note: if ar=ma=0 and noregress is not specified, model is a linear regression on the category dummies Sample syntax arma(1,1) model with holiday adjustments, in logs: seasonallyadjust var, gen(var_sa) ar(1) ma(1) multiplicative holidays("labor day" "easter" "veteran's day" "columbus day") replace x11 model with no prior adjustment, in logs: seasonallyadjust var, gen(var_sa) noreg multiplicative replace x11 model after arma(1,1) with holiday adjustments, in logs: seasonallyadjust var, gen(var_sa) ar(1) ma(1) x11 multiplicative holidays("labor day" "easter" "veteran's day" "columbus day") replace Written by Gabriel Chodorow-Reich and provided without any warranty This version: November 2017 *************************************************************************************************************/ capture program drop seasonallyadjust program define seasonallyadjust version 12.1 syntax varname [if] [in], GENerate(name) [ar(numlist >0 integer)] [ma(numlist >0 integer)] [NOREGress] [x11] [POLynomial(integer 0)] [MULtiplicative] [ADDitive] [holidays(string)] [dummy(numlist) outliers(real 0) savepredicted(name)] [seriesbreak(numlist)] [forecast backcast] [replace] [inreplace] [verbose] #delimit; set more off; /************************************************************************************************************* 1. Prepare data *************************************************************************************************************/ /*Drop any system temporary variables*/ cap drop __00*; marksample touse, novarlist; if `"`verbose'"'!=`"verbose"' local qui qui; /*Define whether adjustment is in levels or logs*/ tempvar depvar; if `"`multiplicative'"'==`""' & `"`additive'"'==`""' {; di in red `"Must specify multiplicative or additive"'; break; }; else if `"`multiplicative'"'==`"multiplicative"' qui gen `depvar' = log(`varlist'); else qui gen `depvar' = `varlist'; /*Initialize SA variable at NSA level*/ tempvar depvar_sa; qui gen `depvar_sa' = `depvar' if `touse'; /*Replace or inreplace*/ if `"`replace'"'==`"replace"' cap drop `generate'; if `"`inreplace'"'==`"inreplace"' {; local create replace; cap describe `generate'; if _rc!=0 qui gen `generate' = .; }; else local create generate; /*tsset data*/ `qui' tsset; if r(timevar)==`""' {; di as error `"Must tsset data"'; break; }; else {; local timevar = r(timevar); local panelvar = r(panelvar); local unit = r(unit); }; if `"`unit'"'==`"monthly"' local nunits = 12; else if `"`unit'"'==`"quarterly"' local nunits = 4; else {; di as error `"Must specify unit of time variable in tsset command or by formatting time variable. See help datetime_display_formats."'; error 1==1; }; /*Expand data if forecast/backcast specified*/ tempvar original; qui gen `original' = 1; if `"`backcast'"'!=`""' | `"`forecast'"'!=`""' {; if `"`panelvar'"'==`"."' {; tempvar panelvar; gen byte `panelvar' = 1; }; if `"`unit'"'==`"monthly"' local add = 3*`nunits'; /*Maximum span of filter below*/ else if `"`unit'"'==`"quarterly"' local add = 2*`nunits'; /*Maximum span of filter below*/ `qui' levelsof `panelvar', local(panelvars); sum `timevar', meanonly; foreach side in back fore {; if `"``side'cast'"'!=`""' {; foreach id of local panelvars {; qui set obs `=`=_N'+1'; if `"`side'"'==`"back"' qui replace `timevar' = r(min) - `add' in `=_N'; else if `"`side'"'==`"fore"' qui replace `timevar' = r(max) + `add' in `=_N'; qui replace `panelvar' = `id' in `=_N'; }; }; }; qui tsfill; qui replace `original' = 0 if missing(`original'); }; /*Define temporal unit*/ tempvar unitvar; qui gen `unitvar' = `=regexr(`"`unit'"',`"ly$"',`""')'(dof`=substr(`"`unit'"',1,1)'(`timevar')); /*I.e. month(dofm(`timevar')) or quarter(dofq(`timevar'))*/ /*Error if number of units is too small because of missing data*/ qui tab `unitvar' if `touse' & !missing(`depvar'); if r(r)!=`nunits' {; di as error `"Not enough observations to do seasonal adjustment"'; error; }; /*Generate polynomial in time variable if specified*/ if `polynomial'!=0 {; sum `timevar' if `touse', meanonly; forvalues j = 1/`polynomial' {; tempvar trend`j'; qui gen `trend`j'' = (1 + `timevar' - r(min))^`j'; local trendvars `"`trendvars' `trend`j''"'; }; }; /************************************************************************************************************* 2. Holiday and user-specified adjustments *************************************************************************************************************/ if regexm(`"`holidays'"',`"[Ll]abor [Dd]ay"') & `"`unit'"'==`"monthly"' {; tempvar LaborDay7th; /*Tag labor day occurring during BLS reference week*/ qui gen `LaborDay7th' = dow(mdy(`unitvar',7,year(dofm(`timevar'))))==1 & (`unitvar'==9); local holiday_adjustments `"`LaborDay7th'"'; }; if regexm(`"`holidays'"',`"[Ee]aster"') & `"`unit'"'==`"monthly"' {; easterdates `timevar'; /*Tag Easter Monday or Good Friday in reference week*/ tempvar GoodFriday; tempvar EasterMonday; qui gen `GoodFriday' = `unitvar'==4 & inrange(day(easterdate-2),11,17); qui gen `EasterMonday' = `unitvar'==4 & inrange(day(easterdate+1),7,13); qui drop easterdate; local holiday_adjustments `"`holiday_adjustments' `GoodFriday' `EasterMonday'"'; }; if regexm(`"`holidays'"',`"[Vv]eteran[\']?s [Dd]ay"') & `"`unit'"'==`"monthly"' {; tempvar VeteransDay; /*Tag Veteran's Day falling on a Saturday, s.t. observed day does not fall during BLS reference week*/ qui gen `VeteransDay' = dow(mdy(`unitvar',11,year(dofm(`timevar'))))==6 & (`unitvar'==11); local holiday_adjustments `"`holiday_adjustments' `VeteransDay'"'; }; if regexm(`"`holidays'"',`"[Cc]olumbus [Dd]ay"') & `"`unit'"'==`"monthly"' {; tempvar ColumbusDay; /*Tag Columbus Day falling on October 14, s.t. observed day does not fall during BLS reference week*/ qui gen `ColumbusDay' = dow(mdy(`unitvar',14,year(dofm(`timevar'))))==1 & (`unitvar'==10); local holiday_adjustments `"`holiday_adjustments' `ColumbusDay'"'; }; /*Remove holidays that don't appear during the reference week during the sample*/ foreach holiday in `holiday_adjustments' {; sum `holiday' if `touse', meanonly; if r(mean)==r(max) {; local holiday_adjustments = regexr(`"`holiday_adjustments'"',`"`holiday'"',`""'); }; }; /*User-specified adjustments*/ if `outliers'!=0 {; /*Identify observations more than X% from closest observation in the same seasonal period*/ sort `touse' `unitvar' `depvar'; tempvar outliersmall outlierlarge nunitvar noutliers; qui gen byte `outliersmall' = abs(`depvar'-`depvar'[_n+1])>`outliers' if `touse' & `unitvar'==`unitvar'[_n+1]; /*Potential outlier if less than X of next largest value*/ qui gen byte `outlierlarge' = abs(`depvar'-`depvar'[_n-1])>`outliers' if `touse' & `unitvar'==`unitvar'[_n-1]; /*Potential outlier if larger than X of next smallest value*/ qui replace `outliersmall' = 0 if `touse' & `outliersmall'[_n-1]==0 & `unitvar'==`unitvar'[_n-1]; /*Only outlier if all observations smaller are also outliers*/ gsort `touse' `unitvar' -`depvar'; qui replace `outlierlarge' = 0 if `touse' & `outlierlarge'[_n-1]==0 & `unitvar'==`unitvar'[_n-1]; /*Only outlier if all observations larger are also outliers*/ qui egen `nunitvar' = count(`depvar') if `touse', by(`unitvar'); qui egen `noutliers' = total(`outliersmall'+`outlierlarge') if `touse', by(`unitvar'); `qui' levelsof `timevar' if `touse' & `noutliers'==`nunitvar'; /*Seasonals where all are identified as outliers*/ `qui' levelsof `timevar' if `touse' & (`noutliers'!=`nunitvar') & ((`outliersmall'==1)|(`outlierlarge'==1)), local(outlierperiods); foreach period of local outlierperiods {; local dummy `dummy' `period'; }; qui tsset; }; if `"`dummy'"'!=`""' {; /*Manual observations to dummy out*/ foreach d of local dummy {; tempvar D_`d'; qui gen `D_`d'' = `timevar'==`d'; local dummies `"`dummies' `D_`d''"'; }; }; if `"`seriesbreak'"'!=`""' {; foreach d of local seriesbreak {; tempvar B_`d'; qui gen `B_`d'' = `timevar'>=`d'; local series_adjustments `"`series_adjustments' `B_`d''"'; }; }; /************************************************************************************************************* 3. Regression *************************************************************************************************************/ if `"`noregress'"'!=`"noregress"' | `"`backcast'"'!=`""' | `"`forecast'"'!=`""' {; qui xi, noomit prefix(_) i.`unitvar'; if `"`ar'"'==`""' & `"`ma'"'==`""' local regcmd reg `depvar' _`unitvar'_* `holiday_adjustments' `dummies' `series_adjustments' `trendvars' if `touse', noconstant; else local regcmd arima `depvar' _`unitvar'_* `holiday_adjustments' `dummies' `series_adjustments' `trendvars' if `touse', ar(`ar') ma(`ma') noconstant; `qui' `regcmd'; /************************************************************************************************************* 3.1 Apply adjustments *************************************************************************************************************/ tempvar predicted; qui predict `predicted' if `touse' | `original'==0, xb; if `"`savepredicted'"'!=`""' {; cap drop `savepredicted'; qui gen `savepredicted' = `predicted'; }; qui replace `depvar_sa' = `predicted' if missing(`depvar_sa'); /*Subtract month/quarter dummy from SA variable*/ local unitmean = 0; forvalues m = 1/`nunits' {; qui replace `depvar_sa' = `depvar_sa' - _b[_`unitvar'_`m'] if `unitvar'==`m'; local unitmean = `unitmean' + _b[_`unitvar'_`m']; }; local unitmean = `unitmean'/`nunits'; /*Add back mean of month/quarter dummies to SA variable*/ qui replace `depvar_sa' = `depvar_sa' + `unitmean'; /*Subtract holiday adjustments*/ if `"`holiday_adjustments'"'!=`""' {; foreach adjustment of local holiday_adjustments {; qui replace `depvar_sa' = `depvar_sa' - `adjustment'*_b[`adjustment']; }; }; /*Replace with NSA if forecast/backcast specified and noregress specified*/ if (`"`backcast'"'!=`""' | `"`forecast'"'!=`""') & `"`noregress'"'==`"noregress"' {; qui replace `depvar' = `predicted' if `original'==0; qui replace `depvar_sa' = `depvar'; }; qui drop _`unitvar'_*; }; /************************************************************************************************************* 4. x11. The terminology follows Appendix A, Findley et al., 1998, "New Capabilities and Methods of the X-12-ARIMA Seasonal-Adjustment Program," Journal of Business and Economic Statistics, Vol. 16, No. 2, pp. 127-152. *************************************************************************************************************/ if `"`x11'"'==`"x11"' {; if `nunits'==12 {; local weights_trend1 `"0.5 1 1 1 1 1 <1> 1 1 1 1 1 0.5"'; local weights_S1_preliminary `"1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 <3> 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 1"'; local weights_S1 `"0.5 1 1 1 1 1 <1> 1 1 1 1 1 0.5"'; local weights_trend2 `"-0.01935 -0.02786 0 0.06549 0.14736 0.21434 <0.24006> 0.21434 0.14736 0.06549 0 -0.02786 -0.01935"'; /*Henderson 13 term weights*/ local weights_S2_preliminary `"1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 <3> 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 1"'; local weights_S2 `"0.5 1 1 1 1 1 <1> 1 1 1 1 1 0.5"'; }; else if `nunits'==4 {; local weights_trend1 `"0.5 1 <1> 1 0.5"'; local weights_S1_preliminary `"1 0 0 0 2 0 0 0 <3> 0 0 0 2 0 0 0 1"'; local weights_S1 `"0.5 1 <1> 1 0.5"'; local weights_trend2 `"-0.07343 0.29371 <0.55944> 0.29371 -0.07343"'; /*Henderson 5 term weights*/ local weights_S2_preliminary `"1 0 0 0 2 0 0 0 3 0 0 0 <3> 0 0 0 3 0 0 0 2 0 0 0 1"'; local weights_S2 `"0.5 1 <1> 1 0.5"'; }; tempvar A0; qui gen `A0' = `depvar_sa'; forvalues stage = 1/2 {; tempvar trend`stage'; tempvar SI`stage'; tempvar S`stage'_preliminary; tempvar S`stage'; tempvar A`stage'; /*Trend estimate via centered moving average*/ qui tssmooth ma `trend`stage'' = `A`=`stage'-1'' if `touse', weights(`weights_trend`stage''); /*SI residual*/ qui gen `SI`stage'' = `depvar_sa' - `trend`stage''; /*Preliminary seasonal factor*/ qui tssmooth ma `S`stage'_preliminary' = `SI`stage'' if `touse', weights(`weights_S`stage'_preliminary'); /*Seasonal factor*/ qui tssmooth ma `S`stage'' = `S`stage'_preliminary' if `touse', weights(`weights_S`stage''); qui replace `S`stage'' = `S`stage'_preliminary' - `S`stage''; /*Seasonal adjustment*/ qui gen `A`stage'' = `depvar_sa' - `S`stage''; }; qui replace `depvar_sa' = `A2'; }; /************************************************************************************************************* 5 Create seasonally-adjusted variable with specified name *************************************************************************************************************/ qui replace `depvar_sa' = . if missing(`depvar'); if `"`multiplicative'"'==`"multiplicative"' {; qui `create' `generate' = exp(`depvar_sa') if `touse'; }; else {; qui `create' `generate' = `depvar_sa' if `touse'; }; qui drop if `original'==0; end; /************************************************************************************************************* 6. Program to load easter dates. List generated from: #delimit; qui insheet using http://tlarsen2.tripod.com/thomaslarsen/easterdates.html, clear; qui split v1, parse(" "); foreach var of varlist v11-v14 {; qui gen date`var' = date(`var',`"D#MY"'); }; qui drop if missing(datev14); qui stack date*, into(easterdate) clear; qui format easterdate %td; *************************************************************************************************************/ capture program drop easterdates; program define easterdates; syntax name(name=timevar); tempfile easter; tempfile data; qui save `data'; clear; local easterdates `" 06apr1947 28mar1948 17apr1949 09apr1950 25mar1951 13apr1952 05apr1953 18apr1954 10apr1955 01apr1956 21apr1957 06apr1958 29mar1959 17apr1960 02apr1961 22apr1962 14apr1963 29mar1964 18apr1965 10apr1966 26mar1967 14apr1968 06apr1969 29mar1970 11apr1971 02apr1972 22apr1973 14apr1974 30mar1975 18apr1976 10apr1977 26mar1978 15apr1979 06apr1980 19apr1981 11apr1982 03apr1983 22apr1984 07apr1985 30mar1986 19apr1987 03apr1988 26mar1989 15apr1990 31mar1991 19apr1992 11apr1993 03apr1994 16apr1995 07apr1996 30mar1997 12apr1998 04apr1999 23apr2000 15apr2001 31mar2002 20apr2003 11apr2004 27mar2005 16apr2006 08apr2007 23mar2008 12apr2009 04apr2010 24apr2011 08apr2012 31mar2013 20apr2014 05apr2015 27mar2016 16apr2017 01apr2018 21apr2019 12apr2020 04apr2021 17apr2022 09apr2023 31mar2024 20apr2025 05apr2026 28mar2027 16apr2028 01apr2029 21apr2030 13apr2031 28mar2032 17apr2033 09apr2034 25mar2035 13apr2036 05apr2037 "'; qui gen easterdate = .; foreach date of local easterdates {; qui set obs `=`=_N'+1'; qui replace easterdate = td(`date') in `=_N'; }; qui format easterdate %td; qui gen year = year(easterdate); qui save `easter'; use `data', clear; capture qui gen year = year(dofm(`timevar')); merge m:1 year using `easter', keep(matched) assert(matched using) nogenerate noreport keepusing(easterdate); if _rc==0 {; qui drop year; }; end;