From 03394069608ec34838ef67b93eb8d503a370f05a Mon Sep 17 00:00:00 2001 From: Jeffrey Cohen Date: Fri, 24 Aug 2018 02:31:51 -0400 Subject: [PATCH] Add files via upload Data generating processes; NOT FOR REPRODUCTION --- .../dgp_cross_section_v2_rep100.g | 192 +++++++++++++++++ .../dgp_cross_section_v2_rep500.g | 193 ++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep100.g create mode 100644 submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep500.g diff --git a/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep100.g b/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep100.g new file mode 100644 index 0000000..7048f66 --- /dev/null +++ b/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep100.g @@ -0,0 +1,192 @@ +/* +Title: Generating data from a treatment effect model in which 1) coefficient depends on spacial location; 2)The treatment selection depends on covariates. +Author: Ke Yang +date created: 3/1/2017 +last change: 4/13/2018 + The selection process for the treatment group. The treatment group is selected based on a funciton of covariates; see Freeman and Berk, 2012 +Note:This program generates data using the following model: +The location variable, X=[x1,x2], both of which follow a positive-half-normal distribution; +Y = f0(x) + f1(x).DB 2 + u; with var(u)=1. There are n subjects. +DB = (a + b.z + V > 0 ) where V is a r.v. following N(0,1). +*/ + +new; +library pgraph; + +/*Section 1: Defines the parameter values in the DGP & estimation procedure*/ + +rep=100; +wind=0.15; /*This is the window size used in gwr_knn() estimation*/ +bandk=2; /*This is the bandwidth used in K()*/ + +@mux=0; mean of x@ +/*varx={0.08 0.17}; variance of x*/ +varx={14042 18883}; +nj={600 300}; /* 600 number of individuals, n*/ +var={100}; /*variance of error * 100 */ +cor=0; /*correlation of errors terms from the same individual*/ +roh=0; /*spatial dependence*/ +nei= 3; /* number of neighborhoods */ +periods=20; + + + +/*Section 2: DGP cycle starts here*/ + +/*cyCount=1;*/ + + cyc_nj=1; + do while cyc_nj<=cols(nj); + + cyc_var=1; + do while cyc_var<=cols(var); + + + + field = 1; + prec = 0; + fmat = "%*.*lf"; + index_nj = ftos(nj[cyc_nj],fmat,field,prec); + index_var= ftos(var[cyc_var],fmat,field,prec); + index_rep= ftos(rep,fmat,field,prec); + + index=index_nj$+index_var$+index_rep; + outfile="C:/Users/ke/GoogleDrive/keyang/research/vancouvor_housing/mc/cross_section_data"$+index$+".txt"; + output file = ^outfile reset; + + /*Section 2: Defines the vectors to store the regression results*/ + d=zeros(nj[cyc_nj]*rep,8); + fgwr=zeros(nj[cyc_nj]*rep, 7); + fols=zeros(rep,7); + colname= zeros(5,4*cols(nj)*cols(var)*cols(cor)*cols(roh)); + + rep_cyc=1; + do while rep_cyc<=rep; + + /*read parameter values in this cycle*/ + nc=nj[cyc_nj]; /*sample size = nj*/ + varc=var[cyc_var]/100; /*variance of the error term*/ + corc=cor/100; /*correlations for observations in the same panel*/ + rohc=roh/100; /*spacial AR parameter*/ + + /*x follows a half normal (right) distribution; Then we truncate the data outside of 2 stdev above the mean + x=rndn(nc,2); + x=abs(x); / + flag1=(x[.,1].>2); + flag2=(x[.,2].>2); + x[.,1]=x[.,1]-flag1*2; + x[.,2]=x[.,2]-flag2*2; + */ + /*x=x.*varx; */ + + x=rndu(nc, 2).*2; + + y=zeros(nc,1); + nhood=ones(nc/nei,1).*.seqa(1,1,nei); /* a category variable that resembles different neiborhoods */ + times=ones(nc/periods,1).*.seqa(1,1,periods);/* a category variable that resembles different time periods */ + imr=rndn(nc,1)*3; + /*Dtreat=(rndn(nc,1)+nhood.*sin(-x)+imr.*cos(-x)+0.3).gt 0; treatment dummy: Dtreat = (error + b1.nhood + b2.imr + V > 0) */ + /*Dtreat=(rndn(nc,1)+nhood.*(0.25*x[.,1].^2-0.5*x[.,2])+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/ + /*Dtreat=(rndn(nc,1)+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/ + /*Dtreat=sqrt((x[.,1]).^2+(x[.,2]).^2).le 0.78; Dummy based on distance from the center: 0.78 for 30%; 1.12 for 50%; 1.4 for 70%*/ + Dtreat=(x[.,1]-1+0.25*x[.,2]-.25).gt 0; + Dtime=times.>=11; + + dist=zeros(nc,nc);/*matrix gives distances between units*/ + i=1; + do while i<=nc;/*print ((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'~sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)';stop;*/ + dis=sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)/(stdc(sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)')); + dist[i,.]=inv(sqrt(2*pi))*exp(-0.5.*dis.^2/1.5); /*gaussion weight*/ + i=i+1; + endo; + /*dist=diagrv(dist,zeros(nc,1)); replace diaganal element with zero*/ + + + /*error process: alpha-unit specific error with spatial dependence*/ + + alpha=rndn(nc,1)*sqrt(corc*varc); + epsilon=alpha+rndn(nc,1)*sqrt((1-corc)*varc);/*one-way error, grouped by unit*/ + alpha=rohc*dist*alpha;/*first order spatial autoregressive process*/ + + + /*regression model with difference-in-differnce structure */ + /*y=(x^2-2*x)+sin(6*x).*Dtreat +epsilon/8; */ + /*y=(3-x) .*Dtreat + epsilon/8; */ + /*trueATE=10*inv(2*pi)*exp(-0.5*(x[.,1].^2+x[.,2].^2)); */ + trueATE=1/5*(-x[.,1].^0.3-x[.,2]^0.3); + y=trueATE.*Dtreat.*Dtime + 0.2*imr +epsilon/8; + + d[nc*(rep_cyc-1)+1:nc*rep_cyc,.] = y~x~Dtreat~Dtime~imr~epsilon/8~trueATE; /*data matrix to be generated */ + + /*Estimate all four parameter functions with gwr_knn() procedure GWR-"" OLS-""*/ + /*{proj_y, bgwr,weits,Lmat}=gwr_knn(y, Dpost~Dtreat~(Dtreat.*Dpost) ,(x-x'),zeros(njc,njc), wind*njc,wind*njc);*/ /*using zeros(nj,nj) for year_dt to ignore that weight scheme*/ + /*{proj_y, bgwr}=gwr(y, dpost~Dtreat~(Dtreat.*Dpost)~nhood~imr, (x-x'), zeros(njc, njc), (maxc(x)-minc(x))/8, (maxc(x)-minc(x))/8); + fgwr[nJc*(rep_cyc-1)+1:nJc*rep_cyc,.]=bgwr;*/ + /* + {nam,m,bols,stb,vc,std,sig,cx,rsq,resid,dbw } = ols(0,y,Dpost~Dtreat~(Dtreat.*Dpost~nhood~imr)); + fols[rep_cyc,.]= bols'; + */ + rep_cyc=rep_cyc+1; + endo; +/* + colname[1,CyCount]=nj[cycle_nj]; + colname[2,cyCount]=var[cycle_var]/100; + colname[3,CyCount]=cor[cycle_cor]/100; + colname[4,CyCount]=roh[cycle_roh]/100; + + cyCount=cyCount+1; + +*/ +@------------------DATA CHECK---------------------------------------------- +dev=eye(nJ[cycle_nJ]/j[cycle_J]).*.(eye(J[cycle_J])-ones(J[cycle_J],1)*ones(J[cycle_J],1)'./J[cycle_J]); +ahat=(1/(nJ[cycle_nJ]-nJ[cycle_nJ]/J[cycle_J]))*(dev*(y-mx))'*(dev*(y-mx)); +bhat=(1/nJ[cycle_nJ])*(y-mx-dev*(y-mx))'*(y-mx-dev*(y-mx))-ahat/J[cycle_J]; +print "variance of epsilon" (1-cor[cycle_cor]/100)*var[cycle_var]/100 "variance of alpha" var[cycle_var]*cor[cycle_cor]/10000; +print "variance of epsilon" ahat "variance of alpha" bhat; +--------------------------------------------------------------------------@ + + + cyc_var=cyc_var+1; + endo; + + cyc_nj=cyc_nj+1; + endo; + +/*print colname|bgwr;*/ + +print "y~g1~g2~Dtreat~Dtime~x~epsilon~trueATE"; +print d; + + +stop; +print " ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; +print "Kernel weight using stdev as bandwidth"; +print dist; + +stop; + +print "average bias & ase"; +truf=(d[.,2].^2-2.*d[.,2])~sin(6*d[.,2])~sin(-d[.,2])~cos(-d[.,2])~ones(rows(d),1)~0.5*ones(rows(d),1); +print (meanc(fgwr-truf))'|(meanc((fgwr-truf).^2))'; +/* +print (bols-meanc(truf))'; +print (meanc((ones(rows(d),1)*bols'-truf).^2))'; +*/ + +@--------------GRAPHS------------------------------------------------------@ +z1=x~y~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)~ones(rows(d),1)~0.5*ones(rows(d),1); +z1=sortc(z1,1); + +/*print "x~y~bgwr~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)";print z1;*/ + +library pgraph; +graphset; +_pdate=""; +ylabel("fgwr(x) and y"); +xlabel("x"); +title("Regression"); +_pltype = {6 }; +_plctrl = { -1,0}; +xy(z1[.,1],z1[., 8 14]); + + diff --git a/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep500.g b/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep500.g new file mode 100644 index 0000000..4e1617d --- /dev/null +++ b/submitted JASA data/Bryan Graham Stata software/dgp_cross_section_v2_rep500.g @@ -0,0 +1,193 @@ +/* +Title: Generating data from a treatment effect model in which 1) coefficient depends on spacial location; 2)The treatment selection depends on covariates. +Author: Ke Yang +date created: 3/1/2017 +last change: 4/13/2018 + The selection process for the treatment group. The treatment group is selected based on a funciton of covariates; see Freeman and Berk, 2012 +Note:This program generates data using the following model: +The location variable, X=[x1,x2], both of which follow a positive-half-normal distribution; +Y = f0(x) + f1(x).DB 2 + u; with var(u)=1. There are n subjects. +DB = (a + b.z + V > 0 ) where V is a r.v. following N(0,1). +*/ + +new; +library pgraph; + +/*Section 1: Defines the parameter values in the DGP & estimation procedure*/ + +rep=500; +wind=0.15; /*This is the window size used in gwr_knn() estimation*/ +bandk=2; /*This is the bandwidth used in K()*/ + +@mux=0; mean of x@ +/*varx={0.08 0.17}; variance of x*/ +varx={14042 18883}; +nj={600 300}; /* 600 number of individuals, n*/ +var={100}; /*variance of error * 100 */ +cor=0; /*correlation of errors terms from the same individual*/ +roh=0; /*spatial dependence*/ +nei= 3; /* number of neighborhoods */ +periods=20; + + + +/*Section 2: DGP cycle starts here*/ + +/*cyCount=1;*/ + + cyc_nj=1; + do while cyc_nj<=cols(nj); + + cyc_var=1; + do while cyc_var<=cols(var); + + + + field = 1; + prec = 0; + fmat = "%*.*lf"; + index_nj = ftos(nj[cyc_nj],fmat,field,prec); + index_var= ftos(var[cyc_var],fmat,field,prec); + index_rep= ftos(rep,fmat,field,prec); + + index=index_nj$+index_var$+index_rep; + outfile="C:/Users/ke/GoogleDrive/keyang/research/vancouvor_housing/mc/cross_section_data"$+index$+".txt"; + output file = ^outfile reset; + + /*Section 2: Defines the vectors to store the regression results*/ + d=zeros(nj[cyc_nj]*rep,8); + fgwr=zeros(nj[cyc_nj]*rep, 7); + fols=zeros(rep,7); + colname= zeros(5,4*cols(nj)*cols(var)*cols(cor)*cols(roh)); + + rep_cyc=1; + do while rep_cyc<=rep; + + /*read parameter values in this cycle*/ + nc=nj[cyc_nj]; /*sample size = nj*/ + varc=var[cyc_var]/100; /*variance of the error term*/ + corc=cor/100; /*correlations for observations in the same panel*/ + rohc=roh/100; /*spacial AR parameter*/ + + /*x follows a half normal (right) distribution; Then we truncate the data outside of 2 stdev above the mean + x=rndn(nc,2); + x=abs(x); / + flag1=(x[.,1].>2); + flag2=(x[.,2].>2); + x[.,1]=x[.,1]-flag1*2; + x[.,2]=x[.,2]-flag2*2; + */ + /*x=x.*varx; */ + + x=rndu(nc, 2).*2; + + y=zeros(nc,1); + nhood=ones(nc/nei,1).*.seqa(1,1,nei); /* a category variable that resembles different neiborhoods */ + times=ones(nc/periods,1).*.seqa(1,1,periods);/* a category variable that resembles different time periods */ + imr=rndn(nc,1)*3; + /*Dtreat=(rndn(nc,1)+nhood.*sin(-x)+imr.*cos(-x)+0.3).gt 0; treatment dummy: Dtreat = (error + b1.nhood + b2.imr + V > 0) */ + /*Dtreat=(rndn(nc,1)+nhood.*(0.25*x[.,1].^2-0.5*x[.,2])+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/ + /*Dtreat=(rndn(nc,1)+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/ + /*Dtreat=sqrt((x[.,1]).^2+(x[.,2]).^2).le 0.78; Dummy based on distance from the center: 0.78 for 30%; 1.12 for 50%; 1.4 for 70%*/ + Dtreat=(x[.,1]-1+0.25*x[.,2]-.25).gt 0; + Dtime=times.>=11; + + dist=zeros(nc,nc);/*matrix gives distances between units*/ + i=1; + do while i<=nc;/*print ((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'~sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)';stop;*/ + dis=sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)/(stdc(sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)')); + dist[i,.]=inv(sqrt(2*pi))*exp(-0.5.*dis.^2/1.5); /*gaussion weight*/ + i=i+1; + endo; + /*dist=diagrv(dist,zeros(nc,1)); replace diaganal element with zero*/ + + + /*error process: alpha-unit specific error with spatial dependence*/ + + alpha=rndn(nc,1)*sqrt(corc*varc); + epsilon=alpha+rndn(nc,1)*sqrt((1-corc)*varc);/*one-way error, grouped by unit*/ + alpha=rohc*dist*alpha;/*first order spatial autoregressive process*/ + + + /*regression model with difference-in-differnce structure */ + /*y=(x^2-2*x)+sin(6*x).*Dtreat +epsilon/8; */ + /*y=(3-x) .*Dtreat + epsilon/8; */ + /*trueATE=10*inv(2*pi)*exp(-0.5*(x[.,1].^2+x[.,2].^2)); */ + trueATE=1/5*(-x[.,1].^0.3-x[.,2]^0.3); + y=trueATE.*Dtreat.*Dtime + 0.2*imr +epsilon/8; + + d[nc*(rep_cyc-1)+1:nc*rep_cyc,.] = y~x~Dtreat~Dtime~imr~epsilon/8~trueATE; /*data matrix to be generated */ + + /*Estimate all four parameter functions with gwr_knn() procedure GWR-"" OLS-""*/ + /*{proj_y, bgwr,weits,Lmat}=gwr_knn(y, Dpost~Dtreat~(Dtreat.*Dpost) ,(x-x'),zeros(njc,njc), wind*njc,wind*njc);*/ /*using zeros(nj,nj) for year_dt to ignore that weight scheme*/ + /*{proj_y, bgwr}=gwr(y, dpost~Dtreat~(Dtreat.*Dpost)~nhood~imr, (x-x'), zeros(njc, njc), (maxc(x)-minc(x))/8, (maxc(x)-minc(x))/8); + fgwr[nJc*(rep_cyc-1)+1:nJc*rep_cyc,.]=bgwr;*/ + /* + {nam,m,bols,stb,vc,std,sig,cx,rsq,resid,dbw } = ols(0,y,Dpost~Dtreat~(Dtreat.*Dpost~nhood~imr)); + fols[rep_cyc,.]= bols'; + */ + rep_cyc=rep_cyc+1; + endo; +/* + colname[1,CyCount]=nj[cycle_nj]; + colname[2,cyCount]=var[cycle_var]/100; + colname[3,CyCount]=cor[cycle_cor]/100; + colname[4,CyCount]=roh[cycle_roh]/100; + + cyCount=cyCount+1; + +*/ +@------------------DATA CHECK---------------------------------------------- +dev=eye(nJ[cycle_nJ]/j[cycle_J]).*.(eye(J[cycle_J])-ones(J[cycle_J],1)*ones(J[cycle_J],1)'./J[cycle_J]); +ahat=(1/(nJ[cycle_nJ]-nJ[cycle_nJ]/J[cycle_J]))*(dev*(y-mx))'*(dev*(y-mx)); +bhat=(1/nJ[cycle_nJ])*(y-mx-dev*(y-mx))'*(y-mx-dev*(y-mx))-ahat/J[cycle_J]; +print "variance of epsilon" (1-cor[cycle_cor]/100)*var[cycle_var]/100 "variance of alpha" var[cycle_var]*cor[cycle_cor]/10000; +print "variance of epsilon" ahat "variance of alpha" bhat; +--------------------------------------------------------------------------@ + + + cyc_var=cyc_var+1; + endo; + + cyc_nj=cyc_nj+1; + endo; + +/*print colname|bgwr;*/ + +print "y~g1~g2~Dtreat~Dtime~x~epsilon~trueATE"; +print d; + + +stop; + +print " ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; +print "Kernel weight using stdev as bandwidth"; +print dist; + +stop; + +print "average bias & ase"; +truf=(d[.,2].^2-2.*d[.,2])~sin(6*d[.,2])~sin(-d[.,2])~cos(-d[.,2])~ones(rows(d),1)~0.5*ones(rows(d),1); +print (meanc(fgwr-truf))'|(meanc((fgwr-truf).^2))'; +/* +print (bols-meanc(truf))'; +print (meanc((ones(rows(d),1)*bols'-truf).^2))'; +*/ + +@--------------GRAPHS------------------------------------------------------@ +z1=x~y~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)~ones(rows(d),1)~0.5*ones(rows(d),1); +z1=sortc(z1,1); + +/*print "x~y~bgwr~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)";print z1;*/ + +library pgraph; +graphset; +_pdate=""; +ylabel("fgwr(x) and y"); +xlabel("x"); +title("Regression"); +_pltype = {6 }; +_plctrl = { -1,0}; +xy(z1[.,1],z1[., 8 14]); + +