// ECON 626 FALL 2019 // L9. Multiple Hypothesis Testing // A2. Implementing Anderson's Benjamini-Hochberg q-values clear all set seed 12345 set scheme s1mono *cd "E:\Dropbox\econ-626-2019\lectures\L9 Multiple Testing\activities" cd "C:\Users\pj\Dropbox\econ-626-2019\lectures\L9 Multiple Testing\activities" // Redo Q2 with genuine treatment effects // 2a. Generate a data set with 1000 observations and 100 outcome variables: // forvalues i = 1/100 {gen y`i' = rnormal()} set obs 1000 gen id = _n forvalues i = 1/100 { qui gen y`i' = rnormal() } gen treatment = (_n<=500) forvalues i = 1/100 { qui replace y`i' = y`i' + 0.18*treatment } // 2b. calculate p-values from 100 regressions of yi = t gen pval = . forvalues i = 1/100 { qui reg y`i' t mat V = r(table) qui replace pval = V[4,1] in `i' } gen p_reject = pval<=0.05 if pval!=. // 2c. Calculate Bonferroni-adjusted p-values gen bval = min(1,pval*100) if pval!=. // 2d. Calculate Benjamini-Hochberg q-values count if pval!=. // number of hypotheses being tested local k = r(N) sort pval qui gen rank = _n if pval!=. qui gen temp_q = pval * (`k'/rank) qui gen qval = temp_q // will be replaced as needed local j = `k' - 1 qui sum temp_q in `k' local thisqval = r(mean) forvalues i = 1/`j' { local temprank = `k' - `i' qui sum temp_q in `temprank' local tempqval = r(mean) if `tempqval' > `thisqval' { qui replace qval = `thisqval' in `temprank' } if `tempqval' < `thisqval' { local thisqval = `tempqval' } } // 2e. Calculate Romano-Wolf adjusted p-values rwolf y1-y100, indepvar(treatment) sort id gen rwval = "" forvalues i = 1/100 { replace rwval = "`e(rw_y`i')'" in `i' } destring rwval, replace force // 2f. Plot the four sets of adjusted p-values sort rank twoway /// (scatter pval rank, mcolor(vermillion) msymbol(oh) mlwidth(thin)) /// (scatter bval rank, mcolor(orangebrown) msymbol(oh) mlwidth(thin)) /// (scatter qval rank, mcolor(sea) msymbol(oh) mlwidth(thin)) /// (scatter rwval rank, mcolor(turquoise) msymbol(oh) mlwidth(thin)), /// legend(cols(1) label(1 "Unadjusted p-values") /// label(2 "Bonferroni correction") /// label(3 "Benjamini-Hochberg") /// label(4 "Romano-Wolf") size(small) ring(0) pos(11)) /// xtitle(" " "Rank of unadjusted p-value", size(small)) /// ytitle("Adjusted p-value" " ", size(small))