Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Vancouver_application_8-8-2018/submitted JASA data/Bryan Graham Stata software/bandwidth_selection_600/iptate_loop_simulation_bw_select_600_July202018_500reps.do
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1057 lines (456 sloc)
25.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
****&&&&& MUST RUN THE FOLLOWING BEFORE RUNNING: | |
****&&&& ssc install iptATE (this is case-sensitive) | |
****&&&& ssc install estout | |
****&&&& change directory to be the same one where the data are located, for all instances of import, export, and cd throughout this code | |
****&&&& Be sure Microsoft Word 2007 or later is installed, so that .docx format will be recognized for making tables | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
log using vanrichlon600, text replace | |
*expand matsize | |
set matsize 600 | |
local repetitions=100 | |
local repetitions_600=500 | |
local observationsnum=600*500 | |
*first do optimal bandwidth selection based on 100 repetitions, then later use the optimal bandwidth to estimate GIPT using 500 repetitions | |
*divide bandwidths by 100 later, but here keep the way they are for ease of creating directories | |
foreach bw in 75 85 95 105 115 125 135 145 { | |
mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'" | |
set more off | |
forvalues j=1/`repetitions' { | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
import delimited using cross_section_data600100100_7-14-18.csv, clear | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'" | |
file open applic_file_`bw'_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\myfile_`bw'_`j'.txt", write | |
****the next 2 lines create a block of 600 observations, so the first 600 have a=1, the next 600 have a=2, etc | |
**** 100 repetitions for bandwidth selection, times 600 observations, gives the following command of set obs 60000 | |
set obs 60000 | |
egen a = seq(), b(600) | |
label data bwselect_600_data | |
local datasample= _N | |
gen id= _n | |
gen Lat=g1 | |
gen Long=g2 | |
gen x1=x | |
gen x12=x^2 | |
*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time) | |
gen d_treatnew=dtreat*dtime | |
keep if `j' == a | |
forvalues i = 1/600 { | |
gen dist`i' = ((Long-Long[`i'])^2+(Lat-Lat[`i'])^2)^0.5 | |
gen paren`i'=exp(-0.5*(dist`i'/(`bw'/100))^2) | |
gen w_w_`i'=(paren`i')^0.5 | |
label variable y "y" | |
label variable x1 "x" | |
label variable d_treatnew "Treatment Dummy" | |
*create w times x | |
gen x1_w_w_`i' = w_w_`i'*x1 | |
gen x12_w_w_`i' =((w_w_`i')*x1)^2 | |
gen dum_treat_w_w_`i' =(w_w_`i')*d_treatnew | |
*run iptATE | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
iptATE y dum_treat_w_w_`i' x1_w_w_`i' x12_w_w_`i' , optroutine(e2) | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'" | |
*save coefficients and V-cov matrix | |
mat b_w_w_`i'=e(b) | |
mat v_w_w_`i'=e(V) | |
*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector | |
scalar h_b=colnumb(b_w_w_`i', "ate:gamma") | |
scalar h_v=colnumb(v_w_w_`i', "ate:gamma") | |
*here use `i' because the ATE varies with each observation in GIPT | |
scalar v_ATE`i'=v_w_w_`i'[h_v,h_v] | |
scalar b_ate`i'=b_w_w_`i'[1,h_b] | |
scalar trueate`i'=trueate[`i'] | |
*take the square root of the ate variance to get the standard error (se) of the ate | |
scalar se_ate`i'=(v_ATE`i')^0.5 | |
*write b and se to txt file | |
file write applic_file_`bw'_`j' ("`i'") _tab (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n | |
drop dist`i' | |
drop paren`i' | |
drop w_w_`i' | |
drop x1_w_w_`i' | |
drop x12_w_w_`i' | |
drop dum_treat_w_w_`i' | |
} | |
file close applic_file_`bw'_`j' | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\bwselect_600_July2018_with_weights_`bw'_`j'.dta" | |
label data bw_select_600_July2018_with_weights_`bw'.dta | |
********************************************** | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\myfile_`bw'_`j'.txt" | |
*since the id is 1 thru 600 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_600_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line | |
gen id=v1 +(600*(`j'-1)) | |
gen b_ate=v2 | |
gen v_ATE=v3 | |
label variable b_ate "ATE" | |
label variable v_ATE "Standard Errors of ATE" | |
drop v1 v2 v3 | |
merge 1:1 id using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\bwselect_600_July2018_with_weights_`bw'_`j'.dta" | |
*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs | |
gen difference_`bw'=b_ate-trueate | |
gen difference2_`bw'=difference_`bw'^2 | |
sum difference2_`bw' | |
mat ASE_mat_`bw'_`j'=r(mean) | |
scalar ASE_`bw'_`j'=ASE_mat_`bw'_`j'[1,1] | |
drop id | |
drop b_ate | |
drop v_ATE | |
drop _merge | |
} | |
scalar MASE_sum_`bw'=0 | |
scalar MASE_`bw'=0 | |
forvalues j=1/`repetitions' { | |
scalar MASE_sum_`bw' = ASE_`bw'_`j' + MASE_`bw' | |
scalar MASE_`bw' = MASE_sum_`bw' | |
} | |
scalar MASE_`bw' = MASE_sum_`bw'/`repetitions' | |
*rename applic_file so that there won't be problems the next time this program runs | |
shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\myfile_`bw'_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_`bw'\testfile_`bw'_`j'.txt", replace | |
} | |
di MASE_75 MASE_85 MASE_95 MASE_105 MASE_115 MASE_125 MASE_135 MASE_145 | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
scalar optimal_b=. | |
foreach bw in 75 85 95 105 115 125 135 145 { | |
if MASE_`bw' == min(MASE_75, MASE_85, MASE_95, MASE_105, MASE_115, MASE_125, MASE_135, MASE_145) { | |
scalar optimal_b=`bw'/100 | |
} | |
} | |
di optimal_b | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\" | |
*******************************************NOW USE THIS OPTIMAL BANDWIDTH TO RUN GIPT WITH THE 500 REPETITIONS******************** | |
mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps" | |
forvalues j=1/`repetitions_600' { | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\data_600" | |
import delimited using cross_section_data600100.csv, clear | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps" | |
file open applic_file_600_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_`j'.txt", write | |
****the next 2 lines create a block of 600 observations, so the first 600 have a=1, the next 600 have a=2, etc | |
set obs `observationsnum' | |
egen a = seq(), b(600) | |
label data data_600 | |
local datasample= _N | |
gen id= _n | |
gen Lat=g1 | |
gen Long=g2 | |
gen y1=y | |
gen x1=x | |
gen x12=x^2 | |
*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time) | |
gen d_treat=dtreat | |
gen d_time=dtime | |
gen d_treatnew=d_treat*d_time | |
keep if `j' == a | |
file write applic_file_600_`j' "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate" _n | |
forvalues i = 1/600 { | |
gen dist`i' = ((Long-Long[`i'])^2+(Lat-Lat[`i'])^2)^0.5 | |
gen paren`i'=exp(-0.5*(dist`i'/(optimal_b))^2) | |
gen w_w_`i'=(paren`i')^0.5 | |
label variable y1 "y" | |
label variable x1 "x" | |
label variable d_treatnew "Treatment Dummy" | |
*create w times x | |
gen x1_w_w_`i' = w_w_`i'*x1 | |
gen x12_w_w_`i' =((w_w_`i')*x1)^2 | |
gen dum_treat_w_w_`i' =(w_w_`i')*d_treatnew | |
*run iptATE | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
iptATE y dum_treat_w_w_`i' x1_w_w_`i' x12_w_w_`i' , optroutine(e2) | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps" | |
*save coefficients and V-cov matrix | |
mat b_w_w_`i'=e(b) | |
mat v_w_w_`i'=e(V) | |
*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector | |
scalar h_b=colnumb(b_w_w_`i', "ate:gamma") | |
scalar h_v=colnumb(v_w_w_`i', "ate:gamma") | |
*here use `i' because the ATE varies with each observation in GIPT | |
scalar v_ATE`i'=v_w_w_`i'[h_v,h_v] | |
scalar b_ate`i'=b_w_w_`i'[1,h_b] | |
scalar trueate`i' = trueate[`i'] | |
*take the square root of the ate variance to get the standard error (se) of the ate | |
scalar se_ate`i'=(v_ATE`i')^0.5 | |
*write b and se to txt file | |
file write applic_file_600_`j' ("`i'") _tab (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n | |
drop dist`i' | |
drop paren`i' | |
drop w_w_`i' | |
drop x1_w_w_`i' | |
drop x12_w_w_`i' | |
drop dum_treat_w_w_`i' | |
} | |
file close applic_file_600_`j' | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\data_600_`j'.dta" | |
label data bw_select_600_July2018_with_weights_600.dta | |
********************************************** | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_`j'.txt" | |
*since the id is 1 thru 600 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_600_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line | |
gen id=number +(600*(`j'-1)) | |
label variable b_ate "ATE" | |
label variable se_ate "Standard Errors of ATE" | |
label variable b_trueate "True ATE" | |
merge 1:1 id using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\data_600_`j'.dta" | |
*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs | |
gen difference_600=b_ate-trueate | |
gen difference2_600=difference_600^2 | |
sum difference2_600 | |
mat ASE_mat_600_`j'=r(mean) | |
scalar ASE_600_`j'=ASE_mat_600_`j'[1,1] | |
sum difference_600 | |
mat bias_mat_600_`j'=r(mean) | |
scalar bias_600_`j'=bias_mat_600_`j'[1,1] | |
drop id | |
drop b_ate | |
drop se_ate | |
drop b_trueate | |
drop _merge | |
} | |
scalar MASE_sum_600=0 | |
scalar MASE_600=0 | |
scalar bias_600=0 | |
scalar bias_sum_600=0 | |
forvalues j=1/`repetitions_600' { | |
scalar MASE_sum_600 = ASE_600_`j' + MASE_600 | |
scalar MASE_600 = MASE_sum_600 | |
scalar bias_sum_600 = bias_600_`j' + bias_600 | |
scalar bias_600 = bias_sum_600 | |
} | |
scalar MASE_600 = MASE_sum_600/`repetitions_600' | |
scalar bias_600 = bias_sum_600/`repetitions_600' | |
forvalues j=1/`repetitions_600' { | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_`j'.txt" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\stata_myfile_600_`j'", replace | |
} | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\stata_myfile_600_1" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_all", replace | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_all" | |
forvalues j=2/`repetitions_600' { | |
append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\stata_myfile_600_`j'" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_all", replace | |
} | |
export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\all_data_600.txt", delimiter(tab) | |
*rename applic_file so that there won't be problems the next time this program runs | |
shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\myfile_600_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_600_500reps\testfile_600_`j'.txt", replace | |
*******************************NOW DO REGULAR IPT*************************************************************** | |
mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps" | |
forvalues j=1/`repetitions_600' { | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\data_600" | |
import delimited using cross_section_data600100.csv, clear | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps" | |
file open applic_file_ipt_600_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_`j'.txt", write | |
****the next 2 lines create a block of 600 observations, so the first 600 have a=1, the next 600 have a=2, etc | |
set obs `observationsnum' | |
egen a = seq(), b(600) | |
label data data_ipt_600 | |
local datasample= _N | |
gen id= _n | |
gen Lat=g1 | |
gen Long=g2 | |
gen x1=x | |
gen x12=x^2 | |
*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time) | |
gen d_treat=dtreat | |
gen d_time=dtime | |
gen d_treatnew=d_treat*d_time | |
keep if `j' == a | |
label variable y "y" | |
label variable x1 "x" | |
label variable d_treatnew "Treatment Dummy" | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600" | |
*run iptATE | |
iptATE y d_treatnew x1 x12 , optroutine(e2) | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps" | |
*save coefficients and V-cov matrix | |
mat bmat`j'=e(b) | |
mat vmat`j'=e(V) | |
set matsize 600 | |
mkmat trueate, mat(trueatemat1_`j') | |
file write applic_file_ipt_600_`j' "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate" _n | |
forvalues i=1/600 { | |
*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector | |
scalar h_b=colnumb(bmat`j', "ate:gamma") | |
scalar h_v=colnumb(vmat`j', "ate:gamma") | |
*here use `j' because the ATE does not vary with each observation in IPT | |
scalar v_ATE`i'=vmat`j'[h_v,h_v] | |
scalar b_ate`i'=bmat`j'[1,h_b] | |
scalar trueate`i'=trueatemat1_`j'[`i',1] | |
*take the square root of the ate variance to get the standard error (se) of the ate | |
scalar se_ate`i'=(v_ATE`i')^0.5 | |
*write b and se to txt file | |
file write applic_file_ipt_600_`j' ("`i'") _tab (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n | |
} | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\data_ipt_600_`j'.dta" , replace | |
label data ipt_600_July2018_with_weights_600.dta | |
file close applic_file_ipt_600_`j' | |
********************************************** | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_`j'.txt" | |
*since the id is 1 thru 600 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_600_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line | |
gen id=number +(600*(`j'-1)) | |
*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs | |
gen difference_ipt_600=b_ate-b_trueate | |
gen difference2_ipt_600=difference_ipt_600^2 | |
sum difference2_ipt_600 | |
mat ASE_mat_ipt_600_`j'=r(mean) | |
scalar ASE_ipt_600_`j'=ASE_mat_ipt_600_`j'[1,1] | |
sum difference_ipt_600 | |
mat bias_mat_ipt_600_`j'=r(mean) | |
scalar bias_ipt_600_`j'=bias_mat_ipt_600_`j'[1,1] | |
drop id | |
drop b_ate | |
drop se_ate | |
drop difference_ipt_600 | |
drop difference2_ipt_600 | |
} | |
scalar MASE_sum_ipt_600=0 | |
scalar MASE_ipt_600=0 | |
scalar bias_ipt_600=0 | |
scalar bias_sum_ipt_600=0 | |
forvalues j=1/`repetitions_600' { | |
scalar MASE_sum_ipt_600 = ASE_ipt_600_`j' + MASE_ipt_600 | |
scalar MASE_ipt_600 = MASE_sum_ipt_600 | |
scalar bias_sum_ipt_600 = bias_ipt_600_`j' + bias_ipt_600 | |
scalar bias_ipt_600 = bias_sum_ipt_600 | |
} | |
scalar MASE_ipt_600 = MASE_sum_ipt_600/`repetitions_600' | |
scalar bias_ipt_600 = bias_sum_ipt_600/`repetitions_600' | |
*combine multiple txt files into one, so that we can get individual b_ate and trueate estimates | |
forvalues j=1/`repetitions_600' { | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_`j'.txt" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\stata_myfile_ipt_600_`j'.dta", replace | |
} | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\stata_myfile_ipt_600_1.dta" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_all.dta" | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_all.dta" | |
forvalues j=2/`repetitions_600' { | |
append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\stata_myfile_ipt_600_`j'.dta" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_all.dta", replace | |
} | |
export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\all_data_600_ipt.txt", delimiter(tab) | |
*rename applic_file so that there won't be problems the next time this program runs | |
shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\myfile_ipt_600_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_ipt_600_500reps\testfile_ipt_600_`j'.txt", replace | |
**************************NOW DO DIFFERENCE-IN-DIFFERENCES************************************************* | |
mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps" | |
forvalues j=1/`repetitions_600' { | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\data_600" | |
import delimited using cross_section_data600100.csv, clear | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps" | |
file open applic_file_DID_600_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_`j'.txt", write | |
****the next 2 lines create a block of 600 observations, so the first 600 have a=1, the next 600 have a=2, etc | |
set obs `observationsnum' | |
egen a = seq(), b(600) | |
label data data_DID_600 | |
local datasample= _N | |
gen id= _n | |
gen x1=x | |
gen x12=x^2 | |
*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time) | |
gen d_treat=dtreat | |
gen d_time=dtime | |
gen d_treatnew=d_treat*d_time | |
keep if `j' == a | |
label variable y "y" | |
label variable x1 "x" | |
label variable d_treatnew "Treatment Dummy" | |
reg y d_treatnew x1 x12 | |
****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps" | |
*save coefficients and V-cov matrix | |
mat bmat`j'=e(b) | |
mat vmat`j'=e(V) | |
set matsize 600 | |
mkmat trueate, mat(trueatemat2_`j') | |
file write applic_file_DID_600_`j' "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate" _n | |
forvalues i=1/600 { | |
*here use `j' because the ATE does not vary with each observation in DID | |
scalar v_ATE`i'=vmat`j'[1,1] | |
scalar b_ate`i'=bmat`j'[1,1] | |
scalar trueate`i'=trueatemat2_`j'[`i',1] | |
*take the square root of the ate variance to get the standard error (se) of the ate | |
scalar se_ate`i'=(v_ATE`i')^0.5 | |
*write b and se to txt file | |
file write applic_file_DID_600_`j' ("`i'") _tab (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n | |
} | |
file close applic_file_DID_600_`j' | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\data_DID_600_`j'" , replace | |
label data DID_600_July2018_with_weights_600.dta | |
********************************************** | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_`j'.txt" | |
*since the id is 1 thru 600 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_600_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line | |
gen id=number +(600*(`j'-1)) | |
*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs | |
gen difference_DID_600=b_ate-b_trueate | |
gen difference2_DID_600=difference_DID_600^2 | |
sum difference2_DID_600 | |
mat ASE_mat_DID_600_`j'=r(mean) | |
scalar ASE_DID_600_`j'=ASE_mat_DID_600_`j'[1,1] | |
sum difference_DID_600 | |
mat bias_mat_DID_600_`j'=r(mean) | |
scalar bias_DID_600_`j'=bias_mat_DID_600_`j'[1,1] | |
drop b_ate | |
drop se_ate | |
drop difference_DID_600 | |
drop difference2_DID_600 | |
} | |
scalar MASE_sum_DID_600=0 | |
scalar MASE_DID_600=0 | |
scalar bias_DID_600=0 | |
scalar bias_sum_DID_600=0 | |
forvalues j=1/`repetitions_600' { | |
scalar MASE_sum_DID_600 = ASE_DID_600_`j' + MASE_DID_600 | |
scalar MASE_DID_600 = MASE_sum_DID_600 | |
scalar bias_sum_DID_600 = bias_DID_600_`j' + bias_DID_600 | |
scalar bias_DID_600 = bias_sum_DID_600 | |
} | |
scalar MASE_DID_600 = MASE_sum_DID_600/`repetitions_600' | |
scalar bias_DID_600 = bias_sum_DID_600/`repetitions_600' | |
*combine multiple txt files into one, so that we can get individual b_ate and trueate estimates | |
forvalues j=1/`repetitions_600' { | |
clear | |
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_`j'.txt" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\stata_myfile_DID_600_`j'", replace | |
} | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\stata_myfile_DID_600_1" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_all", replace | |
clear | |
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_all" | |
forvalues j=2/`repetitions_600' { | |
append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\stata_myfile_DID_600_`j'" | |
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_all", replace | |
} | |
export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\all_data_600_DID.txt", delimiter(tab) | |
*rename applic_file so that there won't be problems the next time this program runs | |
shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\myfile_DID_600_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\dir_DID_600_500reps\testfile_DID_600_`j'.txt", replace | |
scalar bias_GIPT_600=bias_600 | |
scalar MASE_GIPT_600=MASE_600 | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\" | |
file open output_600 using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\bias_and_MASE_600.txt", write | |
file write output_600 "optimal bandwidth" _tab "bias ipt 600 obs" _tab "bias DID 600 obs" _tab "MASE GIPT 600 obs" _tab "MASE ipt 600 obs" _tab "MASE DID 600 obs" _n | |
file write output_600 (optimal_b) _tab (bias_GIPT_600) _tab (bias_ipt_600) _tab (bias_DID_600) _tab (MASE_GIPT_600) _tab (MASE_ipt_600) _tab (MASE_DID_600) _n | |
file close output_600 | |
clear | |
import delimited optimal_bandwidth bias_GIPT bias_IPT bias_DID MASE_GIPT MASE_IPT MASE_DID using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\bias_and_MASE_600.txt" | |
cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_600\" | |
save output_data_600, replace | |
putdocx begin | |
putdocx paragraph | |
putdocx text ("Table xx: Estimates of Bias and Mean of the Average Squared Errors, Simulations for N=600, 500 Iterations") | |
putdocx paragraph | |
putdocx table tbl1 = data("bias_GIPT bias_IPT bias_DID"), varnames | |
putdocx paragraph | |
putdocx table tbl2 = data("MASE_GIPT MASE_IPT MASE_DID"), varnames | |
putdocx paragraph | |
putdocx table tbl3 = data("optimal_bandwidth"), varnames | |
putdocx paragraph | |
putdocx save mytable_600.docx, replace | |
log close | |