// owen ozier and david mckenzie // Why to use ex-post standard errors and minimum detectable effects // but why NOT to use the erroneous idea of "ex-post power" // April-May 2019 // Program 1 of 2: simulate data generating process version 14.1 clear all // FOUR PARTS: // 1: PATH SETUP // 2: PARAMETERS OF THE SIMULATION // 3: BASIC DATA STRUCTURE // 4: SIMULATIONS // ------------------------------------------------------------------------- // PART 1: PATH SETUP global mydatadir="C:\Users\PUT_YOUR_DIRECTORY_HERE_BEFORE_USING_THIS_PROGRAM" // ------------------------------------------------------------------------- // PART 2: PARAMETERS OF THE SIMULATION //loopmax: the number of simulations // note that there is a dependency here: the visualization file needs to have // the same parameter value here, because it is used in the filename where // simulation results are stored. // // in terms of timing, on my laptop of the moment: // 100 takes a couple of seconds; // 1000 takes closer to 30 seconds; // 10000 takes about 250 seconds. local loopmax=1000 //obsno: the number of observations in total local obsno 800 //trueeffect: this is the larger effect we'll try to detect. local trueeffect=0.2 //tinyeffect: this is the smaller effect we'll try to detect. local tinyeffect=0.05 //halfobs: this is half the number of observations, which are the ones to be "treated" // currently automatically calculated from the total number of observations. local halfobs=`obsno'/2 // ------------------------------------------------------------------------- // PART 3: BASIC DATA STRUCTURE set obs `obsno' gen idno=_n gen t=cond(idno<=`halfobs',1,0) label var t "Treatment" version 14.1: set seed 20190426 // --- // MAKING SURE SEED IS WHAT WE THINK IT IS: // (we do this for replicability of results. // if you are interested to see what issues sometimes arise in random number // generation, and/or how generators have changed over time, read here: // http://documents.worldbank.org/curated/en/369471468333566549/Perils-of-simulation-parallel-streams-and-the-case-of-statas-rnormal-command ) // local seed_start=substr(c(seed),1,30) local planned_seed_start="XAA00000000013414da635d9c6be76" local seed_end=strreverse(substr(strreverse(c(seed)),1,30)) local planned_seed_end="6ab09bce15792e00010000013809e3" di "`seed_start'" _newline "`planned_seed_start'" assert "`seed_start'"=="`planned_seed_start'" di "`seed_end'" _newline "`planned_seed_end'" assert "`seed_end'"=="`planned_seed_end'" // --- // CALCULATING CRITICAL VALUE TO BE USED LATER: local normcrit5=invnorm(0.975) di `normcrit5' // For reference, though it will not be used in data simulation, // calculating the usual "2.8" number // as seen in slide 7 of this 2016 set of lecture notes: // http://economics.ozier.com/econ626/lec/econ626lecture7_handout.pdf local MDEcrit80=`normcrit5'+invnorm(0.8) di `MDEcrit80' // For reference, though it will not be used in data simulation, // thinking through anticipated standard errors and power // expected (approx) SE local expse=(sqrt((1/`halfobs')*1+(1/`halfobs')*1)) di `expse' // expected MDE: di `MDEcrit80'*`expse' // expected power local exppower=normal(`trueeffect'/`expse'-`normcrit5') di `exppower' // ------------------------------------------------------------------------- // PART 4: SIMULATIONS // Start timer timer clear 1 timer on 1 // move to the output directory. cd "${mydatadir}" // open the file that will record simulation results cap file close fp file open fp using "mde-not-expostpower-2019-`loopmax'.csv", write replace // write headers. // Iteration number // Standard error from regression // Minimum detectable effect using this standard error // first regression: beta hat // first regression: p value // first regression: erroneous "ex post power" // second regression: beta hat // second regression: p value // second regression: erroneous "ex post power" file write fp "iteration,se,mde,b1,p1,epp1,b2,p2,epp2" _newline // loop forvalues iter=1/`loopmax' { di "-----------iteration `iter'" // generate random error cap drop e1 qui gen e1=invnorm(uniform()) // generate outcomes incorporating error and effects cap drop y1 qui gen y1=e1+`trueeffect'*t cap drop y2 qui gen y2=e1+`tinyeffect'*t // regress outcome on treatment (scenario 1) qui reg y1 t // capture results from regression local b1=_b[t] mat rr1=r(table) local p1=rr1[4,1] // note that the stanard error will be the same in scenarios 1 and 2 with this data generating process, so only recording it once. local se=_se[t] // see the p values scroll by if you are interested di "`p1'" // MDE (depends on the SE, doesn't vary between scenarios 1 and 2 with this data generating process) local mde=`MDEcrit80'*_se[t] // erroneous "ex-post power" 1 local epp1=normal(abs(_b[t])/_se[t]-`normcrit5') + normal(-abs(_b[t])/_se[t]-`normcrit5') // regress outcome on treatment (scenario 2) qui reg y2 t // capture results from regression local b2=_b[t] mat rr2=r(table) local p2=rr2[4,1] // see the p values scroll by if you are interested di "`p2'" // erroneous "ex-post power" 2 local epp2=normal(abs(_b[t])/_se[t]-`normcrit5') + normal(-abs(_b[t])/_se[t]-`normcrit5') // WRITE ALL TO ONE LINE OF THE FILE file write fp "`iter',`se',`mde',`b1',`p1',`epp1',`b2',`p2',`epp2'" _newline } // close the file file close fp // report the time used timer off 1 timer list 1