Skip to content
Permalink
0339406960
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
193 lines (148 sloc) 8.26 KB
/*
Title: Generating data from a treatment effect model in which 1) coefficient depends on spacial location; 2)The treatment selection depends on covariates.
Author: Ke Yang
date created: 3/1/2017
last change: 4/13/2018
The selection process for the treatment group. The treatment group is selected based on a funciton of covariates; see Freeman and Berk, 2012
Note:This program generates data using the following model:
The location variable, X=[x1,x2], both of which follow a positive-half-normal distribution;
Y = f0(x) + f1(x).DB 2 + u; with var(u)=1. There are n subjects.
DB = (a + b.z + V > 0 ) where V is a r.v. following N(0,1).
*/
new;
library pgraph;
/*Section 1: Defines the parameter values in the DGP & estimation procedure*/
rep=500;
wind=0.15; /*This is the window size used in gwr_knn() estimation*/
bandk=2; /*This is the bandwidth used in K()*/
@mux=0; mean of x@
/*varx={0.08 0.17}; variance of x*/
varx={14042 18883};
nj={600 300}; /* 600 number of individuals, n*/
var={100}; /*variance of error * 100 */
cor=0; /*correlation of errors terms from the same individual*/
roh=0; /*spatial dependence*/
nei= 3; /* number of neighborhoods */
periods=20;
/*Section 2: DGP cycle starts here*/
/*cyCount=1;*/
cyc_nj=1;
do while cyc_nj<=cols(nj);
cyc_var=1;
do while cyc_var<=cols(var);
field = 1;
prec = 0;
fmat = "%*.*lf";
index_nj = ftos(nj[cyc_nj],fmat,field,prec);
index_var= ftos(var[cyc_var],fmat,field,prec);
index_rep= ftos(rep,fmat,field,prec);
index=index_nj$+index_var$+index_rep;
outfile="C:/Users/ke/GoogleDrive/keyang/research/vancouvor_housing/mc/cross_section_data"$+index$+".txt";
output file = ^outfile reset;
/*Section 2: Defines the vectors to store the regression results*/
d=zeros(nj[cyc_nj]*rep,8);
fgwr=zeros(nj[cyc_nj]*rep, 7);
fols=zeros(rep,7);
colname= zeros(5,4*cols(nj)*cols(var)*cols(cor)*cols(roh));
rep_cyc=1;
do while rep_cyc<=rep;
/*read parameter values in this cycle*/
nc=nj[cyc_nj]; /*sample size = nj*/
varc=var[cyc_var]/100; /*variance of the error term*/
corc=cor/100; /*correlations for observations in the same panel*/
rohc=roh/100; /*spacial AR parameter*/
/*x follows a half normal (right) distribution; Then we truncate the data outside of 2 stdev above the mean
x=rndn(nc,2);
x=abs(x); /
flag1=(x[.,1].>2);
flag2=(x[.,2].>2);
x[.,1]=x[.,1]-flag1*2;
x[.,2]=x[.,2]-flag2*2;
*/
/*x=x.*varx; */
x=rndu(nc, 2).*2;
y=zeros(nc,1);
nhood=ones(nc/nei,1).*.seqa(1,1,nei); /* a category variable that resembles different neiborhoods */
times=ones(nc/periods,1).*.seqa(1,1,periods);/* a category variable that resembles different time periods */
imr=rndn(nc,1)*3;
/*Dtreat=(rndn(nc,1)+nhood.*sin(-x)+imr.*cos(-x)+0.3).gt 0; treatment dummy: Dtreat = (error + b1.nhood + b2.imr + V > 0) */
/*Dtreat=(rndn(nc,1)+nhood.*(0.25*x[.,1].^2-0.5*x[.,2])+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=(rndn(nc,1)+imr.*(-0.5*x[.,1]+x[.,2])).gt 0;*/
/*Dtreat=sqrt((x[.,1]).^2+(x[.,2]).^2).le 0.78; Dummy based on distance from the center: 0.78 for 30%; 1.12 for 50%; 1.4 for 70%*/
Dtreat=(x[.,1]-1+0.25*x[.,2]-.25).gt 0;
Dtime=times.>=11;
dist=zeros(nc,nc);/*matrix gives distances between units*/
i=1;
do while i<=nc;/*print ((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'~sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)';stop;*/
dis=sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)/(stdc(sqrt((x[i,1]-x[.,1]').^2+(x[i,2]-x[.,2]').^2)'));
dist[i,.]=inv(sqrt(2*pi))*exp(-0.5.*dis.^2/1.5); /*gaussion weight*/
i=i+1;
endo;
/*dist=diagrv(dist,zeros(nc,1)); replace diaganal element with zero*/
/*error process: alpha-unit specific error with spatial dependence*/
alpha=rndn(nc,1)*sqrt(corc*varc);
epsilon=alpha+rndn(nc,1)*sqrt((1-corc)*varc);/*one-way error, grouped by unit*/
alpha=rohc*dist*alpha;/*first order spatial autoregressive process*/
/*regression model with difference-in-differnce structure */
/*y=(x^2-2*x)+sin(6*x).*Dtreat +epsilon/8; */
/*y=(3-x) .*Dtreat + epsilon/8; */
/*trueATE=10*inv(2*pi)*exp(-0.5*(x[.,1].^2+x[.,2].^2)); */
trueATE=1/5*(-x[.,1].^0.3-x[.,2]^0.3);
y=trueATE.*Dtreat.*Dtime + 0.2*imr +epsilon/8;
d[nc*(rep_cyc-1)+1:nc*rep_cyc,.] = y~x~Dtreat~Dtime~imr~epsilon/8~trueATE; /*data matrix to be generated */
/*Estimate all four parameter functions with gwr_knn() procedure GWR-"" OLS-""*/
/*{proj_y, bgwr,weits,Lmat}=gwr_knn(y, Dpost~Dtreat~(Dtreat.*Dpost) ,(x-x'),zeros(njc,njc), wind*njc,wind*njc);*/ /*using zeros(nj,nj) for year_dt to ignore that weight scheme*/
/*{proj_y, bgwr}=gwr(y, dpost~Dtreat~(Dtreat.*Dpost)~nhood~imr, (x-x'), zeros(njc, njc), (maxc(x)-minc(x))/8, (maxc(x)-minc(x))/8);
fgwr[nJc*(rep_cyc-1)+1:nJc*rep_cyc,.]=bgwr;*/
/*
{nam,m,bols,stb,vc,std,sig,cx,rsq,resid,dbw } = ols(0,y,Dpost~Dtreat~(Dtreat.*Dpost~nhood~imr));
fols[rep_cyc,.]= bols';
*/
rep_cyc=rep_cyc+1;
endo;
/*
colname[1,CyCount]=nj[cycle_nj];
colname[2,cyCount]=var[cycle_var]/100;
colname[3,CyCount]=cor[cycle_cor]/100;
colname[4,CyCount]=roh[cycle_roh]/100;
cyCount=cyCount+1;
*/
@------------------DATA CHECK----------------------------------------------
dev=eye(nJ[cycle_nJ]/j[cycle_J]).*.(eye(J[cycle_J])-ones(J[cycle_J],1)*ones(J[cycle_J],1)'./J[cycle_J]);
ahat=(1/(nJ[cycle_nJ]-nJ[cycle_nJ]/J[cycle_J]))*(dev*(y-mx))'*(dev*(y-mx));
bhat=(1/nJ[cycle_nJ])*(y-mx-dev*(y-mx))'*(y-mx-dev*(y-mx))-ahat/J[cycle_J];
print "variance of epsilon" (1-cor[cycle_cor]/100)*var[cycle_var]/100 "variance of alpha" var[cycle_var]*cor[cycle_cor]/10000;
print "variance of epsilon" ahat "variance of alpha" bhat;
--------------------------------------------------------------------------@
cyc_var=cyc_var+1;
endo;
cyc_nj=cyc_nj+1;
endo;
/*print colname|bgwr;*/
print "y~g1~g2~Dtreat~Dtime~x~epsilon~trueATE";
print d;
stop;
print " ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
print "Kernel weight using stdev as bandwidth";
print dist;
stop;
print "average bias & ase";
truf=(d[.,2].^2-2.*d[.,2])~sin(6*d[.,2])~sin(-d[.,2])~cos(-d[.,2])~ones(rows(d),1)~0.5*ones(rows(d),1);
print (meanc(fgwr-truf))'|(meanc((fgwr-truf).^2))';
/*
print (bols-meanc(truf))';
print (meanc((ones(rows(d),1)*bols'-truf).^2))';
*/
@--------------GRAPHS------------------------------------------------------@
z1=x~y~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)~ones(rows(d),1)~0.5*ones(rows(d),1);
z1=sortc(z1,1);
/*print "x~y~bgwr~(x^2-2*x)~sin(6*x)~sin(-x)~cos(-x)";print z1;*/
library pgraph;
graphset;
_pdate="";
ylabel("fgwr(x) and y");
xlabel("x");
title("Regression");
_pltype = {6 };
_plctrl = { -1,0};
xy(z1[.,1],z1[., 8 14]);