/*
UMD ECON-626 FALL 2019
Power, real data
Power, PART 3: using real data to simulate power with different designs
*/
set more off
/* CHANGE THIS FIRST LINE SO THAT IT IS APPROPRIATE TO WHERE YOUR FILES ARE STORED: */
cd "C:\Users\wb259971\Dropbox\econ-626-2019\lectures\L7 Power\activity"
use KenyaPSDPtestdataStd3V3.dta, clear
// 3.1.1
// use "summarize" or "sum" to see characteristics of the "engsep98" English test score variable.
// how many observations are there total?
// what is the standard deviation?
// 3.1.2: the "loneway" command is a one-way anova analysis.
// It reports several statistics, including the "Intra-class correlation."
// Try it with the command below, and try it with other variables if you like
// loneway engsep98 schid
// 3.1.3
// how many clusters (schid) are there? use "codebook schid" or other such commands.
// thus what is the average number of observations per cluster? (total divided by number of clusters)
// 3.2
// note how different the standard error is when clustered vs when not,
// using either the "mean" or "reg" command on variable engsep98,
// with or without the "cluster(schid)" option
// 3.3
// simulate treatment effects where treatment is assigned by school.
// you might do this by repeatedly (with capture drop in between as needed):
// (setting the seed)
// 1) generating and assigning such random numbers to one observation per school (tagsch)
// 2) sorting by this random number, for these observations
// 3) assigning treatment to the first 33 of 67 schools
// 4) using egen to use this treatment variable for all observations within school
// 5) adding a treatment effect to engsep98 to create the "outcome"
// 6) running the regression of outcome on treatment
// 7) counting how many times it is significant
exit // remove this exit line once you are ready to work on this question:
// (at least) two parameters to control
local effectsize=1.5
local loopmax=100
// (setting the seed)
version 11.2: set seed 98765
// initialize counter
local rejcount=0
// repeatedly:
forvalues i=1/`loopmax' {
// report progress during the loop
if(mod(`i',10)==0) {
di "Loop `i'"
}
// quietly, so as not to fill the screen with output
qui {
// 1) generating and assigning such random numbers to one observation per school (tagsch)
cap drop su
gen su=uniform() if tagsch==1
// 2) sorting by this random number, for these observations
sort tagsch su
// 3) assigning treatment to the first 33 of 67 schools
cap drop _t
sort tagsch
by tagsch: gen _t=cond(_n<=33,1,0) if tagsch==1
// 4) using egen to use this treatment variable for all observations within school
cap drop t
sort schid
by schid: egen t=max(_t)
// 5) adding a treatment effect to engsep98 to create the "outcome"
cap drop outcome
gen outcome=engsep98+`effectsize'*t
}
// 6) running the regression of outcome on treatment
qui reg outcome t, vce(cluster schid)
// 7) counting how many times it is significant
if abs(_b[t]/_se[t])>1.96 {
local rejcount=`rejcount'+1
}
}
di "Rej: `rejcount'"
// 3.4
// how does this compare to the associated power calculation?
// easiest thing is to try typing in the power from the rejection rate you saw, and use sampsi followed by sampclus.
// you could also use the formula on slide 29, and rearrange, to solve more precisely.
// 3.5
// consider stratifying by a different year's mean test score at the school level.
// these variables are already created for you, so:
// re-do 3.3 above, but replacing four lines to stratify by "othertestblock"
// and treat "numtreatblock" schools within each block:
/*
sort tagsch su
becomes
sort tagsch othertestblock su
sort tagsch
becomes
sort tagsch othertestblock
by tagsch: gen _t=cond(_n<=33,1,0) if tagsch==1
becomes
by tagsch othertestblock: gen _t=cond(_n<=numtreatblock,1,0) if tagsch==1
qui reg outcome t, vce(cluster schid)
becomes
qui reg outcome t i.othertestblock, vce(cluster schid)
*/
// how do your results change?