Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add files via upload
Data generating processes; NOT FOR REPRODUCTION
  • Loading branch information
jec14018 committed Aug 24, 2018
1 parent 815fed4 commit 0339406
Show file tree
Hide file tree
Showing 2 changed files with 385 additions and 0 deletions.
@@ -0,0 +1,192 @@
/*
Title: Generating data from a treatment effect model in which 1) coefficient depends on spacial location; 2)The treatment selection depends on covariates.
Author: Ke Yang
date created: 3/1/2017
last change: 4/13/2018
The selection process for the treatment group. The treatment group is selected based on a funciton of covariates; see Freeman and Berk, 2012
Note:This program generates data using the following model:
The location variable, X=[x1,x2], both of which follow a positive-half-normal distribution;
Y = f0(x) + f1(x).DB 2 + u; with var(u)=1. There are n subjects.
DB = (a + b.z + V > 0 ) where V is a r.v. following N(0,1).
*/

new;
library pgraph;

/*Section 1: Defines the parameter values in the DGP & estimation procedure*/

rep=100;
wind=0.15; /*This is the window size used in gwr_knn() estimation*/
bandk=2; /*This is the bandwidth used in K()*/

@mux=0; mean of x@
/*varx={0.08 0.17}; variance of x*/
varx={14042 18883};
nj={600 300}; /* 600 number of individuals, n*/
var={100}; /*variance of error * 100 */
cor=0; /*correlation of errors terms from the same individual*/
roh=0; /*spatial dependence*/
nei= 3; /* number of neighborhoods */
periods=20;



/*Section 2: DGP cycle starts here*/

/*cyCount=1;*/

cyc_nj=1;
do while cyc_nj<=cols(nj);

cyc_var=1;
do while cyc_var<=cols(var);



field = 1;
prec = 0;
fmat = "%*.*lf";
index_nj = ftos(nj[cyc_nj],fmat,field,prec);
index_var= ftos(var[cyc_var],fmat,field,prec);
index_rep= ftos(rep,fmat,field,prec);

index=index_nj$+index_var$+index_rep;
outfile="C:/Users/ke/GoogleDrive/keyang/research/vancouvor_housing/mc/cross_section_data"$+index$+".txt";
output file = ^outfile reset;

/*Section 2: Defines the vectors to store the regression results*/
d=zeros(nj[cyc_nj]*rep,8);
fgwr=zeros(nj[cyc_nj]*rep, 7);
fols=zeros(rep,7);
colname= zeros(5,4*cols(nj)*cols(var)*cols(cor)*cols(roh));

rep_cyc=1;
do while rep_cyc<=rep;

/*read parameter values in this cycle*/
nc=nj[cyc_nj]; /*sample size = nj*/
varc=var[cyc_var]/100; /*variance of the error term*/
corc=cor/100; /*correlations for observations in the same panel*/
rohc=roh/100; /*spacial AR parameter*/

/*x follows a half normal (right) distribution; Then we truncate the data outside of 2 stdev above the mean
x=rndn(nc,2);
x=abs(x); /
flag1=(x[.,1].>2);
flag2=(x[.,2].>2);
x[.,1]=x[.,1]-flag1*2;
x[.,2]=x[.,2]-flag2*2;
*/
/*x=x.*varx; */

x=rndu(nc, 2).*2;

y=zeros(nc,1);
nhood=ones(nc/nei,1).*.seqa(1,1,nei); /* a category variable that resembles different neiborhoods */
times=ones(nc/periods,1).*.seqa(1,1,periods);/* a category variable that resembles different time periods */
imr=rndn(nc,1)*3;
/*Dtreat=(rndn(nc,1)+nhood.*sin(-x)+imr.*cos(-x)+0.3).gt 0; treatment dummy: Dtreat = (error + b1.nhood + b2.imr + V > 0) */
/*Dtreat=(rndn(nc,1)+nhood.*(0.25*x[.,1].^2-0.5*x[.,2])+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=(rndn(nc,1)+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=sqrt((x[.,1]).^2+(x[.,2]).^2).le 0.78; Dummy based on distance from the center: 0.78 for 30%; 1.12 for 50%; 1.4 for 70%*/
Dtreat=(x[.,1]-1+0.25*x[.,2]-.25).gt 0;
Dtime=times.>=11;

dist=zeros(nc,nc);/*matrix gives distances between units*/
i=1;
do while i<=nc;/*print ((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'~sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)';stop;*/
dis=sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)/(stdc(sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'));
dist[i,.]=inv(sqrt(2*pi))*exp(-0.5.*dis.^2/1.5); /*gaussion weight*/
i=i+1;
endo;
/*dist=diagrv(dist,zeros(nc,1)); replace diaganal element with zero*/
/*error process: alpha-unit specific error with spatial dependence*/
alpha=rndn(nc,1)*sqrt(corc*varc);
epsilon=alpha+rndn(nc,1)*sqrt((1-corc)*varc);/*one-way error, grouped by unit*/
alpha=rohc*dist*alpha;/*first order spatial autoregressive process*/
/*regression model with difference-in-differnce structure */
/*y=(x^2-2*x)+sin(6*x).*Dtreat +epsilon/8; */
/*y=(3-x) .*Dtreat + epsilon/8; */
/*trueATE=10*inv(2*pi)*exp(-0.5*(x[.,1].^2+x[.,2].^2)); */
trueATE=1/5*(-x[.,1].^0.3-x[.,2]^0.3);
y=trueATE.*Dtreat.*Dtime + 0.2*imr +epsilon/8;
d[nc*(rep_cyc-1)+1:nc*rep_cyc,.] = y~x~Dtreat~Dtime~imr~epsilon/8~trueATE; /*data matrix to be generated */
/*Estimate all four parameter functions with gwr_knn() procedure GWR-"" OLS-""*/
/*{proj_y, bgwr,weits,Lmat}=gwr_knn(y, Dpost~Dtreat~(Dtreat.*Dpost) ,(x-x'),zeros(njc,njc), wind*njc,wind*njc);*/ /*using zeros(nj,nj) for year_dt to ignore that weight scheme*/
/*{proj_y, bgwr}=gwr(y, dpost~Dtreat~(Dtreat.*Dpost)~nhood~imr, (x-x'), zeros(njc, njc), (maxc(x)-minc(x))/8, (maxc(x)-minc(x))/8);
fgwr[nJc*(rep_cyc-1)+1:nJc*rep_cyc,.]=bgwr;*/
/*
{nam,m,bols,stb,vc,std,sig,cx,rsq,resid,dbw } = ols(0,y,Dpost~Dtreat~(Dtreat.*Dpost~nhood~imr));
fols[rep_cyc,.]= bols';
*/
rep_cyc=rep_cyc+1;
endo;
/*
colname[1,CyCount]=nj[cycle_nj];
colname[2,cyCount]=var[cycle_var]/100;
colname[3,CyCount]=cor[cycle_cor]/100;
colname[4,CyCount]=roh[cycle_roh]/100;

cyCount=cyCount+1;

*/
@------------------DATA CHECK----------------------------------------------
dev=eye(nJ[cycle_nJ]/j[cycle_J]).*.(eye(J[cycle_J])-ones(J[cycle_J],1)*ones(J[cycle_J],1)'./J[cycle_J]);
ahat=(1/(nJ[cycle_nJ]-nJ[cycle_nJ]/J[cycle_J]))*(dev*(y-mx))'*(dev*(y-mx));
bhat=(1/nJ[cycle_nJ])*(y-mx-dev*(y-mx))'*(y-mx-dev*(y-mx))-ahat/J[cycle_J];
print "variance of epsilon" (1-cor[cycle_cor]/100)*var[cycle_var]/100 "variance of alpha" var[cycle_var]*cor[cycle_cor]/10000;
print "variance of epsilon" ahat "variance of alpha" bhat;
--------------------------------------------------------------------------@
cyc_var=cyc_var+1;
endo;
cyc_nj=cyc_nj+1;
endo;
/*print colname|bgwr;*/
print "y~g1~g2~Dtreat~Dtime~x~epsilon~trueATE";
print d;
stop;
print " ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
print "Kernel weight using stdev as bandwidth";
print dist;
stop;
print "average bias & ase";
truf=(d[.,2].^2-2.*d[.,2])~sin(6*d[.,2])~sin(-d[.,2])~cos(-d[.,2])~ones(rows(d),1)~0.5*ones(rows(d),1);
print (meanc(fgwr-truf))'|(meanc((fgwr-truf).^2))';
/*
print (bols-meanc(truf))';
print (meanc((ones(rows(d),1)*bols'-truf).^2))';
*/

@--------------GRAPHS------------------------------------------------------@
z1=x~y~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)~ones(rows(d),1)~0.5*ones(rows(d),1);
z1=sortc(z1,1);

/*print "x~y~bgwr~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)";print z1;*/

library pgraph;
graphset;
_pdate="";
ylabel("fgwr(x) and y");
xlabel("x");
title("Regression");
_pltype = {6 };
_plctrl = { -1,0};
xy(z1[.,1],z1[., 8 14]);


@@ -0,0 +1,193 @@
/*
Title: Generating data from a treatment effect model in which 1) coefficient depends on spacial location; 2)The treatment selection depends on covariates.
Author: Ke Yang
date created: 3/1/2017
last change: 4/13/2018
The selection process for the treatment group. The treatment group is selected based on a funciton of covariates; see Freeman and Berk, 2012
Note:This program generates data using the following model:
The location variable, X=[x1,x2], both of which follow a positive-half-normal distribution;
Y = f0(x) + f1(x).DB 2 + u; with var(u)=1. There are n subjects.
DB = (a + b.z + V > 0 ) where V is a r.v. following N(0,1).
*/

new;
library pgraph;

/*Section 1: Defines the parameter values in the DGP & estimation procedure*/

rep=500;
wind=0.15; /*This is the window size used in gwr_knn() estimation*/
bandk=2; /*This is the bandwidth used in K()*/

@mux=0; mean of x@
/*varx={0.08 0.17}; variance of x*/
varx={14042 18883};
nj={600 300}; /* 600 number of individuals, n*/
var={100}; /*variance of error * 100 */
cor=0; /*correlation of errors terms from the same individual*/
roh=0; /*spatial dependence*/
nei= 3; /* number of neighborhoods */
periods=20;



/*Section 2: DGP cycle starts here*/

/*cyCount=1;*/

cyc_nj=1;
do while cyc_nj<=cols(nj);

cyc_var=1;
do while cyc_var<=cols(var);



field = 1;
prec = 0;
fmat = "%*.*lf";
index_nj = ftos(nj[cyc_nj],fmat,field,prec);
index_var= ftos(var[cyc_var],fmat,field,prec);
index_rep= ftos(rep,fmat,field,prec);

index=index_nj$+index_var$+index_rep;
outfile="C:/Users/ke/GoogleDrive/keyang/research/vancouvor_housing/mc/cross_section_data"$+index$+".txt";
output file = ^outfile reset;

/*Section 2: Defines the vectors to store the regression results*/
d=zeros(nj[cyc_nj]*rep,8);
fgwr=zeros(nj[cyc_nj]*rep, 7);
fols=zeros(rep,7);
colname= zeros(5,4*cols(nj)*cols(var)*cols(cor)*cols(roh));

rep_cyc=1;
do while rep_cyc<=rep;

/*read parameter values in this cycle*/
nc=nj[cyc_nj]; /*sample size = nj*/
varc=var[cyc_var]/100; /*variance of the error term*/
corc=cor/100; /*correlations for observations in the same panel*/
rohc=roh/100; /*spacial AR parameter*/

/*x follows a half normal (right) distribution; Then we truncate the data outside of 2 stdev above the mean
x=rndn(nc,2);
x=abs(x); /
flag1=(x[.,1].>2);
flag2=(x[.,2].>2);
x[.,1]=x[.,1]-flag1*2;
x[.,2]=x[.,2]-flag2*2;
*/
/*x=x.*varx; */

x=rndu(nc, 2).*2;

y=zeros(nc,1);
nhood=ones(nc/nei,1).*.seqa(1,1,nei); /* a category variable that resembles different neiborhoods */
times=ones(nc/periods,1).*.seqa(1,1,periods);/* a category variable that resembles different time periods */
imr=rndn(nc,1)*3;
/*Dtreat=(rndn(nc,1)+nhood.*sin(-x)+imr.*cos(-x)+0.3).gt 0; treatment dummy: Dtreat = (error + b1.nhood + b2.imr + V > 0) */
/*Dtreat=(rndn(nc,1)+nhood.*(0.25*x[.,1].^2-0.5*x[.,2])+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=(rndn(nc,1)+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=sqrt((x[.,1]).^2+(x[.,2]).^2).le 0.78; Dummy based on distance from the center: 0.78 for 30%; 1.12 for 50%; 1.4 for 70%*/
Dtreat=(x[.,1]-1+0.25*x[.,2]-.25).gt 0;
Dtime=times.>=11;

dist=zeros(nc,nc);/*matrix gives distances between units*/
i=1;
do while i<=nc;/*print ((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'~sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)';stop;*/
dis=sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)/(stdc(sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'));
dist[i,.]=inv(sqrt(2*pi))*exp(-0.5.*dis.^2/1.5); /*gaussion weight*/
i=i+1;
endo;
/*dist=diagrv(dist,zeros(nc,1)); replace diaganal element with zero*/
/*error process: alpha-unit specific error with spatial dependence*/
alpha=rndn(nc,1)*sqrt(corc*varc);
epsilon=alpha+rndn(nc,1)*sqrt((1-corc)*varc);/*one-way error, grouped by unit*/
alpha=rohc*dist*alpha;/*first order spatial autoregressive process*/
/*regression model with difference-in-differnce structure */
/*y=(x^2-2*x)+sin(6*x).*Dtreat +epsilon/8; */
/*y=(3-x) .*Dtreat + epsilon/8; */
/*trueATE=10*inv(2*pi)*exp(-0.5*(x[.,1].^2+x[.,2].^2)); */
trueATE=1/5*(-x[.,1].^0.3-x[.,2]^0.3);
y=trueATE.*Dtreat.*Dtime + 0.2*imr +epsilon/8;
d[nc*(rep_cyc-1)+1:nc*rep_cyc,.] = y~x~Dtreat~Dtime~imr~epsilon/8~trueATE; /*data matrix to be generated */
/*Estimate all four parameter functions with gwr_knn() procedure GWR-"" OLS-""*/
/*{proj_y, bgwr,weits,Lmat}=gwr_knn(y, Dpost~Dtreat~(Dtreat.*Dpost) ,(x-x'),zeros(njc,njc), wind*njc,wind*njc);*/ /*using zeros(nj,nj) for year_dt to ignore that weight scheme*/
/*{proj_y, bgwr}=gwr(y, dpost~Dtreat~(Dtreat.*Dpost)~nhood~imr, (x-x'), zeros(njc, njc), (maxc(x)-minc(x))/8, (maxc(x)-minc(x))/8);
fgwr[nJc*(rep_cyc-1)+1:nJc*rep_cyc,.]=bgwr;*/
/*
{nam,m,bols,stb,vc,std,sig,cx,rsq,resid,dbw } = ols(0,y,Dpost~Dtreat~(Dtreat.*Dpost~nhood~imr));
fols[rep_cyc,.]= bols';
*/
rep_cyc=rep_cyc+1;
endo;
/*
colname[1,CyCount]=nj[cycle_nj];
colname[2,cyCount]=var[cycle_var]/100;
colname[3,CyCount]=cor[cycle_cor]/100;
colname[4,CyCount]=roh[cycle_roh]/100;

cyCount=cyCount+1;

*/
@------------------DATA CHECK----------------------------------------------
dev=eye(nJ[cycle_nJ]/j[cycle_J]).*.(eye(J[cycle_J])-ones(J[cycle_J],1)*ones(J[cycle_J],1)'./J[cycle_J]);
ahat=(1/(nJ[cycle_nJ]-nJ[cycle_nJ]/J[cycle_J]))*(dev*(y-mx))'*(dev*(y-mx));
bhat=(1/nJ[cycle_nJ])*(y-mx-dev*(y-mx))'*(y-mx-dev*(y-mx))-ahat/J[cycle_J];
print "variance of epsilon" (1-cor[cycle_cor]/100)*var[cycle_var]/100 "variance of alpha" var[cycle_var]*cor[cycle_cor]/10000;
print "variance of epsilon" ahat "variance of alpha" bhat;
--------------------------------------------------------------------------@
cyc_var=cyc_var+1;
endo;
cyc_nj=cyc_nj+1;
endo;
/*print colname|bgwr;*/
print "y~g1~g2~Dtreat~Dtime~x~epsilon~trueATE";
print d;
stop;
print " ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
print "Kernel weight using stdev as bandwidth";
print dist;
stop;
print "average bias & ase";
truf=(d[.,2].^2-2.*d[.,2])~sin(6*d[.,2])~sin(-d[.,2])~cos(-d[.,2])~ones(rows(d),1)~0.5*ones(rows(d),1);
print (meanc(fgwr-truf))'|(meanc((fgwr-truf).^2))';
/*
print (bols-meanc(truf))';
print (meanc((ones(rows(d),1)*bols'-truf).^2))';
*/

@--------------GRAPHS------------------------------------------------------@
z1=x~y~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)~ones(rows(d),1)~0.5*ones(rows(d),1);
z1=sortc(z1,1);

/*print "x~y~bgwr~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)";print z1;*/

library pgraph;
graphset;
_pdate="";
ylabel("fgwr(x) and y");
xlabel("x");
title("Regression");
_pltype = {6 };
_plctrl = { -1,0};
xy(z1[.,1],z1[., 8 14]);


0 comments on commit 0339406

Please sign in to comment.