/* UMD ECON-626 FALL 2019 Power, real data Power, PART 3: using real data to simulate power with different designs */ set more off /* CHANGE THIS FIRST LINE SO THAT IT IS APPROPRIATE TO WHERE YOUR FILES ARE STORED: */ cd "C:\Users\wb259971\Dropbox\econ-626-2019\lectures\L7 Power\activity" use KenyaPSDPtestdataStd3V3.dta, clear // 3.1.1 // use "summarize" or "sum" to see characteristics of the "engsep98" English test score variable. // how many observations are there total? // what is the standard deviation? sum engsep98 local thissd=r(sd) local thisN=r(N) // 3.1.2: the "loneway" command is a one-way anova analysis. // It reports several statistics, including the "Intra-class correlation." // Try it with the command below, and try it with other variables if you like // loneway engsep98 schid loneway engsep98 schid local thisrho=r(rho) // 3.1.3 // how many clusters (schid) are there? use "codebook schid" or other such commands. "tabulate" can also help. // thus what is the average number of observations per cluster? (total divided by number of clusters) codebook schid qui tab schid local thisNclust=r(r) di `thisN' di `thisNclust' local obsPerClust=`thisN'/`thisNclust' di `obsPerClust' // 3.2 // note how different the standard error is when clustered vs when not, // using either the "mean" or "reg" command on variable engsep98, // with or without the "cluster(schid)" option reg engsep98 mean engsep98 reg engsep98, cluster(schid) mean engsep98, cluster(schid) // 3.3 // simulate treatment effects where treatment is assigned by school. // you might do this by repeatedly (with capture drop in between as needed): // (setting the seed) // 1) generating and assigning such random numbers to one observation per school (tagsch) // 2) sorting by this random number, for these observations // 3) assigning treatment to the first 33 of 67 schools // 4) using egen to use this treatment variable for all observations within school // 5) adding a treatment effect to engsep98 to create the "outcome" // 6) running the regression of outcome on treatment // 7) counting how many times it is significant // (at least) two parameters to control local effectsize=1.5 local loopmax=100 // (setting the seed) version 11.2: set seed 98765 // initialize counter local rejcount=0 // repeatedly: forvalues i=1/`loopmax' { // report progress during the loop if(mod(`i',10)==0) { di "Loop `i'" } // quietly, so as not to fill the screen with output qui { // 1) generating and assigning such random numbers to one observation per school (tagsch) cap drop su gen su=uniform() if tagsch==1 // 2) sorting by this random number, for these observations sort tagsch su // 3) assigning treatment to the first 33 of 67 schools cap drop _t sort tagsch by tagsch: gen _t=cond(_n<=33,1,0) if tagsch==1 // 4) using egen to use this treatment variable for all observations within school cap drop t sort schid by schid: egen t=max(_t) // 5) adding a treatment effect to engsep98 to create the "outcome" cap drop outcome gen outcome=engsep98+`effectsize'*t } // 6) running the regression of outcome on treatment qui reg outcome t, vce(cluster schid) // 7) counting how many times it is significant if abs(_b[t]/_se[t])>1.96 { local rejcount=`rejcount'+1 } } di "Rej: `rejcount'" // 3.4 // how does this compare to the associated power calculation? // easiest thing is to try typing in the power from the rejection rate you saw, and use sampsi followed by sampclus. // you could also use the formula on slide 29, and rearrange, to solve more precisely. sampsi 0 1.5, sd1(5.657) power(0.52) sampclus, obsclus(45) rho(0.247) // using the formula on slide 29, but rearranging: // z+z = MDE * sqrt(p(1-p)) * sqrt(N/sigma^2) / sqrt(1+(ng-1)rho)) // z_(1-k) = MDE * sqrt(p(1-p)) * sqrt(N/sigma^2) / sqrt(1+(ng-1)rho)) - z_(alpha/2) // z_(1-k) = 1.5 * 0.5 * sqrt(3006/(5.65658^2)) / sqrt(1+(44.8358-1)0.24677)) - 1.96 // z_(1-k) = 0.75 * 9.6926 / sqrt(1+(43.8358)0.24677)) - 1.96 // z_(1-k) = 0.75 * 9.6926 / sqrt(11.81736) - 1.96 // z_(1-k) = 0.75 * 9.6926 / 3.4376387 - 1.96 // z_(1-k) = 2.11466 - 1.96 // z_(1-k) = 0.15466 // k = normal(0.15466) = 0.5615 // 3.5 // consider stratifying by a different year's mean test score at the school level. // these variables are already created for you, so: // re-do 3.3 above, but replacing four lines to stratify by "othertestblock" // and treat "numtreatblock" schools within each block: /* sort tagsch su becomes sort tagsch othertestblock su sort tagsch becomes sort tagsch othertestblock by tagsch: gen _t=cond(_n<=33,1,0) if tagsch==1 becomes by tagsch othertestblock: gen _t=cond(_n<=numtreatblock,1,0) if tagsch==1 qui reg outcome t, vce(cluster schid) becomes qui reg outcome t i.othertestblock, vce(cluster schid) */ // how do your results change? // (setting the seed) version 11.2: set seed 98765 // initialize counter local rejcount=0 // repeatedly: forvalues i=1/`loopmax' { // report progress during the loop if(mod(`i',10)==0) { di "Loop `i'" } // quietly, so as not to fill the screen with output qui { // 1) generating and assigning such random numbers to one observation per school (tagsch) cap drop su gen su=uniform() if tagsch==1 // 2) sorting by this random number, for these observations sort tagsch othertestblock su // 3) assigning treatment to the first 33 of 67 schools cap drop _t sort tagsch othertestblock by tagsch othertestblock: gen _t=cond(_n<=numtreatblock,1,0) if tagsch==1 // 4) using egen to use this treatment variable for all observations within school cap drop t sort schid by schid: egen t=max(_t) // 5) adding a treatment effect to engsep98 to create the "outcome" cap drop outcome gen outcome=engsep98+`effectsize'*t } // 6) running the regression of outcome on treatment qui reg outcome t i.othertestblock, vce(cluster schid) // 7) counting how many times it is significant if abs(_b[t]/_se[t])>1.96 { local rejcount=`rejcount'+1 } } di "Rej: `rejcount'"