iptate_loop_simulation_bw_select_300_July202018_500reps.do

****&&&&& MUST RUN THE FOLLOWING BEFORE RUNNING:

****&&&& ssc install iptATE (this is case-sensitive)
****&&&& ssc install estout

****&&&& change directory to be the same one where the data are located, for all instances of import, export, and cd throughout this code

****&&&& Be sure Microsoft Word 2007 or later is installed, so that .docx format will be recognized for making tables

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"

log using vanrichlon300, text replace

local repetitions=100
local repetitions_300=500
local observationsnum=300*500

*first do optimal bandwidth selection based on 100 repetitions, then later use the optimal bandwidth to estimate GIPT using 500 repetitions

*divide bandwidths by 100 later, but here keep the way they are for ease of creating directories

foreach bw in 75 85 95 105 115 125 135 145 {


mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'"


set more off


forvalues j=1/`repetitions' {

****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"

import delimited using cross_section_data300100100_7-14-18.csv, clear

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'"

file open applic_file_`bw'_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\myfile_`bw'_`j'.txt", write

****the next 2 lines create a block of 300 observations, so the first 300 have a=1, the next 300 have a=2, etc
**** 100 repetitions for bandwidth selection, times 300 observations, gives the following command of set obs 30000

set obs 30000

egen a = seq(), b(300)


label data bwselect_300_data


local datasample= _N


gen id= _n
gen Lat=g1
gen Long=g2


gen x1=x
gen x12=x^2

*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time)

gen d_treatnew=d_treat*d_time

keep if `j' == a

forvalues i = 1/300 {

gen dist`i' = ((Long-Long[`i'])^2+(Lat-Lat[`i'])^2)^0.5

gen paren`i'=exp(-0.5*(dist`i'/(`bw'/100))^2)

gen w_w_`i'=(paren`i')^0.5


label variable y "y"
label variable x1 "x"

label variable d_treatnew "Treatment Dummy"


*create w times x

gen x1_w_w_`i' = w_w_`i'*x1


gen x12_w_w_`i' =((w_w_`i')*x1)^2


gen dum_treat_w_w_`i' =(w_w_`i')*d_treatnew

*run iptATE

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"


iptATE y dum_treat_w_w_`i' x1_w_w_`i' x12_w_w_`i' , optroutine(e2)

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'"

*save coefficients and V-cov matrix


mat b_w_w_`i'=e(b)


mat v_w_w_`i'=e(V)

*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector

scalar h_b=colnumb(b_w_w_`i', "ate:gamma")
scalar h_v=colnumb(v_w_w_`i', "ate:gamma")

*here use `i' because the ATE varies with each observation in GIPT

scalar v_ATE`i'=v_w_w_`i'[h_v,h_v]
scalar b_ate`i'=b_w_w_`i'[1,h_b]
scalar trueate`i'=trueate[`i']

*take the square root of the ate variance to get the standard error (se) of the ate

scalar se_ate`i'=(v_ATE`i')^0.5

*write b and se to txt file


file write applic_file_`bw'_`j' ("`i'") _tab  (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n


drop dist`i'

drop paren`i'

drop w_w_`i'

drop x1_w_w_`i'

drop x12_w_w_`i'

drop dum_treat_w_w_`i'


}


file close applic_file_`bw'_`j'


save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\bwselect_300_July2018_with_weights_`bw'_`j'.dta"

label data bw_select_300_July2018_with_weights_`bw'.dta


**********************************************


clear
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\myfile_`bw'_`j'.txt"

*since the id is 1 thru 300 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_300_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line

gen id=v1 +(300*(`j'-1))
gen b_ate=v2
gen v_ATE=v3

label variable b_ate "ATE"
label variable v_ATE "Standard Errors of ATE"

drop v1 v2 v3

merge 1:1 id using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\bwselect_300_July2018_with_weights_`bw'_`j'.dta"


*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs

gen difference_`bw'=b_ate-trueate
gen difference2_`bw'=difference_`bw'^2


sum difference2_`bw'


mat ASE_mat_`bw'_`j'=r(mean)

scalar ASE_`bw'_`j'=ASE_mat_`bw'_`j'[1,1]


drop id
drop b_ate
drop v_ATE
drop _merge


}

scalar MASE_sum_`bw'=0

scalar MASE_`bw'=0

forvalues j=1/`repetitions' {

scalar MASE_sum_`bw' = ASE_`bw'_`j' + MASE_`bw'

scalar MASE_`bw' = MASE_sum_`bw'

}

scalar MASE_`bw' = MASE_sum_`bw'/`repetitions'

*rename applic_file so that there won't be problems the next time this program runs

shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\myfile_`bw'_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_`bw'\testfile_`bw'_`j'.txt", replace

}
di MASE_75 MASE_85 MASE_95 MASE_105 MASE_115 MASE_125 MASE_135 MASE_145

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"

scalar optimal_b=.

foreach bw in 75 85 95 105 115 125 135 145 {

if MASE_`bw' == min(MASE_75, MASE_85, MASE_95, MASE_105, MASE_115, MASE_125, MASE_135, MASE_145) {

*divide optimal bandwidth by 100 because originally we multiplied bandwidth by 100 to facilitate making directories

scalar optimal_b=`bw'/100

}
}


di optimal_b

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\"

*******************************************NOW USE THIS OPTIMAL BANDWIDTH TO RUN GIPT WITH THE 500 REPETITIONS********************

mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps"


forvalues j=1/`repetitions_300' {

****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\data_300"

import delimited using cross_section_data300100.csv, clear


cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps"

file open applic_file_300_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_`j'.txt", write

****the next 2 lines create a block of 300 observations, so the first 300 have a=1, the next 300 have a=2, etc

set obs `observationsnum'

egen a = seq(), b(300)


label data data_300

local datasample= _N


gen id= _n
gen Lat=g1
gen Long=g2


gen y1=y
gen x1=x
gen x12=x^2

*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time)

gen d_treat=dtreat
gen d_time=dtime

gen d_treatnew=d_treat*d_time

keep if `j' == a

file write applic_file_300_`j'  "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate"  _n

forvalues i = 1/300 {

gen dist`i' = ((Long-Long[`i'])^2+(Lat-Lat[`i'])^2)^0.5

gen paren`i'=exp(-0.5*(dist`i'/(optimal_b))^2)

gen w_w_`i'=(paren`i')^0.5


label variable y1 "y"
label variable x1 "x"

label variable d_treatnew "Treatment Dummy"


gen x1_w_w_`i' = w_w_`i'*x1


gen x12_w_w_`i' =((w_w_`i')*x1)^2


gen dum_treat_w_w_`i' =(w_w_`i')*d_treatnew

*run iptATE

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"


iptATE y dum_treat_w_w_`i' x1_w_w_`i' x12_w_w_`i' , optroutine(e2)

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps"

*save coefficients and V-cov matrix


mat b_w_w_`i'=e(b)


mat v_w_w_`i'=e(V)

*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector

scalar h_b=colnumb(b_w_w_`i', "ate:gamma")
scalar h_v=colnumb(v_w_w_`i', "ate:gamma")

*here use `i' because the ATE varies with each observation in GIPT

scalar v_ATE`i'=v_w_w_`i'[h_v,h_v]
scalar b_ate`i'=b_w_w_`i'[1,h_b]
scalar trueate`i' = trueate[`i']

*take the square root of the ate variance to get the standard error (se) of the ate

scalar se_ate`i'=(v_ATE`i')^0.5

*write b and se to txt file


file write applic_file_300_`j' ("`i'") _tab  (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n


drop dist`i'

drop paren`i'

drop w_w_`i'

drop x1_w_w_`i'

drop x12_w_w_`i'

drop dum_treat_w_w_`i'


}


file close applic_file_300_`j'


save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\data_300_`j'.dta"

label data bw_select_300_July2018_with_weights_300.dta


**********************************************


clear
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_`j'.txt"

*since the id is 1 thru 300 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_300_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line

gen id=number +(300*(`j'-1))


label variable b_ate "ATE"
label variable se_ate "Standard Errors of ATE"
label variable b_trueate "True ATE"


merge 1:1 id using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\data_300_`j'.dta"


*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs

gen difference_300=b_ate-trueate
gen difference2_300=difference_300^2


sum difference2_300


mat ASE_mat_300_`j'=r(mean)

scalar ASE_300_`j'=ASE_mat_300_`j'[1,1]

sum difference_300

mat bias_mat_300_`j'=r(mean)

scalar bias_300_`j'=bias_mat_300_`j'[1,1]


drop id
drop b_ate
drop se_ate
drop b_trueate
drop _merge


}


scalar MASE_sum_300=0

scalar MASE_300=0

scalar bias_300=0

scalar bias_sum_300=0

forvalues j=1/`repetitions_300' {

scalar MASE_sum_300 = ASE_300_`j' + MASE_300

scalar MASE_300 = MASE_sum_300

scalar bias_sum_300 = bias_300_`j' + bias_300

scalar bias_300 = bias_sum_300

}

scalar MASE_300 = MASE_sum_300/`repetitions_300'

scalar bias_300 = bias_sum_300/`repetitions_300'


forvalues j=1/`repetitions_300' {

clear

import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_`j'.txt"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\stata_myfile_300_`j'", replace

}
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\stata_myfile_300_1"
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_all", replace
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_all"

forvalues j=2/`repetitions_300' {

append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\stata_myfile_300_`j'"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_all", replace

}

export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\all_data_300.txt", delimiter(tab)

*rename applic_file so that there won't be problems the next time this program runs

shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\myfile_300_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_300_500reps\testfile_300_`j'.txt", replace

*******************************NOW DO REGULAR IPT***************************************************************

mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps"


forvalues j=1/`repetitions_300' {

****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER


cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\data_300"

import delimited using cross_section_data300100.csv, clear

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps"

file open applic_file_ipt_300_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_`j'.txt", write

****the next 2 lines create a block of 300 observations, so the first 300 have a=1, the next 300 have a=2, etc

set obs `observationsnum'

egen a = seq(), b(300)


label data data_ipt_300


local datasample= _N


gen id= _n
gen Lat=g1
gen Long=g2


gen x1=x
gen x12=x^2

*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time)

gen d_treat=dtreat
gen d_time=dtime


gen d_treatnew=d_treat*d_time

keep if `j' == a


label variable y "y"
label variable x1 "x"

label variable d_treatnew "Treatment Dummy"


*change the directory to the user's local path

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300"

*run iptATE

iptATE y d_treatnew x1 x12 , optroutine(e2)

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps"

*save coefficients and V-cov matrix


mat bmat`j'=e(b)


mat vmat`j'=e(V)

mkmat trueate, mat(trueatemat1_`j')

file write applic_file_ipt_300_`j' "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate"  _n

forvalues i=1/300 {

*note h_b and h_v is the column number where the ate is listed, in both the parameter vector and the vcov vector

scalar h_b=colnumb(bmat`j', "ate:gamma")
scalar h_v=colnumb(vmat`j', "ate:gamma")

*here use `j' because the ATE does not vary with each observation in IPT

scalar v_ATE`i'=vmat`j'[h_v,h_v]
scalar b_ate`i'=bmat`j'[1,h_b]
scalar trueate`i'=trueatemat1_`j'[`i',1]

*take the square root of the ate variance to get the standard error (se) of the ate

scalar se_ate`i'=(v_ATE`i')^0.5


file write applic_file_ipt_300_`j' ("`i'") _tab  (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n


}

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\data_ipt_300_`j'.dta" , replace

label data ipt_300_July2018_with_weights_300.dta

file close applic_file_ipt_300_`j'


**********************************************

clear
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_`j'.txt"

*since the id is 1 thru 300 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_300_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line

gen id=number +(300*(`j'-1))


*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs

gen difference_ipt_300=b_ate-b_trueate
gen difference2_ipt_300=difference_ipt_300^2


sum difference2_ipt_300


mat ASE_mat_ipt_300_`j'=r(mean)

scalar ASE_ipt_300_`j'=ASE_mat_ipt_300_`j'[1,1]

sum difference_ipt_300

mat bias_mat_ipt_300_`j'=r(mean)

scalar bias_ipt_300_`j'=bias_mat_ipt_300_`j'[1,1]

drop id
drop b_ate
drop se_ate

drop difference_ipt_300
drop difference2_ipt_300


}

scalar MASE_sum_ipt_300=0

scalar MASE_ipt_300=0

scalar bias_ipt_300=0

scalar bias_sum_ipt_300=0

forvalues j=1/`repetitions_300' {

scalar MASE_sum_ipt_300 = ASE_ipt_300_`j' + MASE_ipt_300

scalar MASE_ipt_300 = MASE_sum_ipt_300

scalar bias_sum_ipt_300 = bias_ipt_300_`j' + bias_ipt_300

scalar bias_ipt_300 = bias_sum_ipt_300

}

scalar MASE_ipt_300 = MASE_sum_ipt_300/`repetitions_300'

scalar bias_ipt_300 = bias_sum_ipt_300/`repetitions_300'

*combine multiple txt files into one, so that we can get individual b_ate and trueate estimates


forvalues j=1/`repetitions_300' {

clear

import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_`j'.txt"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\stata_myfile_ipt_300_`j'.dta", replace

}
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\stata_myfile_ipt_300_1.dta"
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_all.dta"
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_all.dta"

forvalues j=2/`repetitions_300' {

append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\stata_myfile_ipt_300_`j'.dta"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_all.dta", replace

}

export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\all_data_300_ipt.txt", delimiter(tab)


*rename applic_file so that there won't be problems the next time this program runs

shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\myfile_ipt_300_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_ipt_300_500reps\testfile_ipt_300_`j'.txt", replace


**************************NOW DO DIFFERENCE-IN-DIFFERENCES*************************************************

mkdir "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps"


forvalues j=1/`repetitions_300' {

****&&&& NOTE: MUST CHANGE THE DIRECTORY BELOW DEPENDING ON THE PATH OF THE USER


cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\data_300"

import delimited using cross_section_data300100.csv, clear

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps"

file open applic_file_DID_300_`j' using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_`j'.txt", write

****the next 2 lines create a block of 300 observations, so the first 300 have a=1, the next 300 have a=2, etc

set obs `observationsnum'

egen a = seq(), b(300)


label data data_DID_300


local datasample= _N


gen id= _n


gen x1=x
gen x12=x^2

*d_treatnew is the treatment dummy, i.e., properties in the city (d_treat) after the storm (d_time)

gen d_treat=dtreat
gen d_time=dtime


gen d_treatnew=d_treat*d_time

keep if `j' == a


label variable y "y"
label variable x1 "x"

label variable d_treatnew "Treatment Dummy"


reg y d_treatnew x1 x12

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps"

*save coefficients and V-cov matrix


mat bmat`j'=e(b)


mat vmat`j'=e(V)

mkmat trueate, mat(trueatemat2_`j')

file write applic_file_DID_300_`j'  "number" _tab "b_ate" _tab "se_ate" _tab "b_trueate"  _n

forvalues i=1/300 {


*here use `j' because the ATE does not vary with each observation in DID

scalar v_ATE`i'=vmat`j'[1,1]
scalar b_ate`i'=bmat`j'[1,1]
scalar trueate`i'=trueatemat2_`j'[`i',1]

*take the square root of the ate variance to get the standard error (se) of the ate

scalar se_ate`i'=(v_ATE`i')^0.5

*write b and se to txt file


file write applic_file_DID_300_`j' ("`i'") _tab  (b_ate`i') _tab (se_ate`i') _tab (trueate`i') _n


}


file close applic_file_DID_300_`j'


save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\data_DID_300_`j'" , replace

label data DID_300_July2018_with_weights_300.dta


**********************************************


clear
import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_`j'.txt"

*since the id is 1 thru 300 in all iterations of myfile_`bw'_`j' but id is the actual number from the original data file in bwselect_300_July2018_with_weights_`bw'_`j'.dta we need to make the transforation in the next line

gen id=number +(300*(`j'-1))


*now calculate MASE, noting that since the 100 reps are all in one, calculating the ASE for all obs is the same as doing the ASE for each sample, then taking the mean of the ASEs

gen difference_DID_300=b_ate-b_trueate
gen difference2_DID_300=difference_DID_300^2


sum difference2_DID_300


mat ASE_mat_DID_300_`j'=r(mean)

scalar ASE_DID_300_`j'=ASE_mat_DID_300_`j'[1,1]

sum difference_DID_300

mat bias_mat_DID_300_`j'=r(mean)

scalar bias_DID_300_`j'=bias_mat_DID_300_`j'[1,1]

*drop id
drop b_ate
drop se_ate
*drop _merge

drop difference_DID_300
drop difference2_DID_300

}

scalar MASE_sum_DID_300=0

scalar MASE_DID_300=0

scalar bias_DID_300=0

scalar bias_sum_DID_300=0

forvalues j=1/`repetitions_300' {

scalar MASE_sum_DID_300 = ASE_DID_300_`j' + MASE_DID_300

scalar MASE_DID_300 = MASE_sum_DID_300

scalar bias_sum_DID_300 = bias_DID_300_`j' + bias_DID_300

scalar bias_DID_300 = bias_sum_DID_300

}

scalar MASE_DID_300 = MASE_sum_DID_300/`repetitions_300'

scalar bias_DID_300 = bias_sum_DID_300/`repetitions_300'


*combine multiple txt files into one, so that we can get individual b_ate and trueate estimates


forvalues j=1/`repetitions_300' {

clear

import delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_`j'.txt"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\stata_myfile_DID_300_`j'", replace

}
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\stata_myfile_DID_300_1"
save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_all", replace
clear
use "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_all"

forvalues j=2/`repetitions_300' {

append using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\stata_myfile_DID_300_`j'"

save "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_all", replace

}

export delimited "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\all_data_300_DID.txt", delimiter(tab)

*rename applic_file so that there won't be problems the next time this program runs

shell ren "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\myfile_DID_300_`j'.txt" "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\dir_DID_300_500reps\testfile_DID_300_`j'.txt", replace

scalar bias_GIPT_300=bias_300
scalar MASE_GIPT_300=MASE_300

cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\"

file open output_300 using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\bias_and_MASE_300.txt", write

file write output_300 "optimal bandwidth" _tab  "bias ipt 300 obs" _tab  "bias DID 300 obs" _tab "MASE GIPT 300 obs" _tab "MASE ipt 300 obs" _tab "MASE DID 300 obs" _n
file write output_300 (optimal_b) _tab (bias_GIPT_300) _tab (bias_ipt_300) _tab (bias_DID_300) _tab (MASE_GIPT_300) _tab (MASE_ipt_300) _tab (MASE_DID_300) _n


file close output_300

clear

import delimited optimal_bandwidth bias_GIPT bias_IPT bias_DID MASE_GIPT MASE_IPT MASE_DID using "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\bias_and_MASE_300.txt"


cd "C:\Users\jcohen2\Desktop\Bryan Graham Stata software\bandwidth_selection_300\"

save output_data_300, replace


putdocx begin

putdocx paragraph
putdocx text ("Table x: Estimates of Bias and Mean of the Average Squared Errors, Simulations for N=300, 500 Iterations")
putdocx paragraph

putdocx table tbl1 = data("bias_GIPT bias_IPT bias_DID"), varnames
putdocx paragraph

putdocx table tbl2 = data("MASE_GIPT MASE_IPT MASE_DID"), varnames
putdocx paragraph

putdocx table tbl3 = data("optimal_bandwidth"), varnames
putdocx paragraph

putdocx save mytable_300.docx, replace


log close