Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
fixes for parallel
  • Loading branch information
yuz12012 committed Apr 29, 2017
1 parent 654c07a commit 27b72ed
Showing 1 changed file with 34 additions and 21 deletions.
55 changes: 34 additions & 21 deletions parallel/para_gibbs.cu
Expand Up @@ -155,29 +155,37 @@ int main(int argc, char **argv){
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials); seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials);


int *tDot; int *tDot;
CUDA_CALL(cudaMalloc((void **)&tDot,trials*sizeof(int))); CUDA_CALL(cudaMalloc((void **)&tDot,nThreads*nBlocks*sizeof(int)));


float *h; float *h;
CUDA_CALL(cudaMalloc((void **)&h,trials*sizeof(float))); CUDA_CALL(cudaMalloc((void **)&h,trials*sizeof(float)));


sampleTdot<<<nBlocks,nThreads>>>(trials, tDot,devStates); sampleTdot<<<nBlocks,nThreads>>>(trials, tDot,devStates);


int numThreads = trials / nBlocks; // TODO: This should be a whole number //int numThreads = trials / nBlocks; // TODO: This should be a whole number
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates, nBlocks, nThreads); mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates, nBlocks, nThreads, h, results);



/*------ Free Memory -------------------------------------------*/ /*------ Free Memory -------------------------------------------*/






free(y); free(y);
free(n); free(n);

CUDA_CALL(cudaFree(devStates)); CUDA_CALL(cudaFree(devStates));
CUDA_CALL(cudaFree(dev_theta)); CUDA_CALL(cudaFree(dev_theta));
CUDA_CALL(cudaFree(dev_log_theta)); CUDA_CALL(cudaFree(dev_log_theta));
CUDA_CALL(cudaFree(dev_y)); CUDA_CALL(cudaFree(dev_y));
CUDA_CALL(cudaFree(dev_n)); CUDA_CALL(cudaFree(dev_n));
/* CUDA_CALL(cudaFree(results));
CUDA_CALL(cudaFree(h));
CUDA_CALL(cudaFree(tDot));
CUDA_CALL(cudaFree(dev_a_out));
CUDA_CALL(cudaFree(dev_b_out));
*/





return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
Expand All @@ -202,30 +210,35 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int


int M = nBlocks * nThreads; int M = nBlocks * nThreads;
int id = threadIdx.x + blockIdx.x * blockDim.x; int id = threadIdx.x + blockIdx.x * blockDim.x;
float h = id^(-1/(4+2));


for (int i=0; i < nThreads; i++) { for (int i=0; i < trials; i++) {
h[i] = powf(i,(-1/(4+2)));
for (int m=0; m < M; m++) { for (int m=0; m < M; m++) {
int *cDot; int *cDot = (int*) malloc(M*sizeof(int));
CUDA_CALL(cudaMemcpy(cDot, tDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice)); printf("%d\n",m);

memcpy(cDot, tDot, sizeof(int) * M);
cDot[m] = (curand(&state[id]) % (trials-1)) + 1; cDot[m] = (curand(&state[id]) % (trials-1)) + 1;
int u = curand_uniform(&state[id]); int u = curand_uniform(&state[id]);
float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, nThreads, h[i]); float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]);
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, nThreads, h[i]); float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]);
if (u < (wcDot/ wtDot) ) if (u < (wcDot/ wtDot) ){
CUDA_CALL(cudaMemcpy(tDot, cDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice)); memcpy(tDot, cDot, sizeof(int) * trials);
}
free(cDot);
} }
// TODO: Draw from Multivariate Normal, and Save into the results // TODO: Draw from Multivariate Normal, and Save into the results
float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, nThreads); float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials);
float posterior_mean_b = posteriorMean(tDot, dev_b_out, M, nThreads); float posterior_mean_b = posteriorMean(tDot, dev_b_out, M, trials);
float variance = (h[i]^2)/M; float variance = (h[i]*h[i])/M;

printf("%d\n",trials);
printf('%f %f' , norm(state, posterior_mean_a, variance), norm(state, posterior_mean_b, variance)); printf("%f, %f\n" , general_normoral(state, posterior_mean_a, variance), general_normoral(state, posterior_mean_b, variance));
} }
} }




__device__ float general_normoral(curandState *state, float mean, float variance) { __device__ float general_normoral(curandState *state, float mean, float variance) {
int id = threadIdx.x + blockIdx.x * blockDim.x;
return curand_normal(&state[id]) * variance + mean; return curand_normal(&state[id]) * variance + mean;
} }


Expand All @@ -250,14 +263,14 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M,
int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId) int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId)
posterior_m_tm_x = dev_x_out[index]; // posterior_m_tm of x posterior_m_tm_x = dev_x_out[index]; // posterior_m_tm of x
posterior_m_tm_y = dev_y_out[index]; // posterio_m_tm_ of y posterior_m_tm_y = dev_y_out[index]; // posterio_m_tm_ of y
product *= normPDF(posterior_m_tm_x, posterior_mean_x, h^2) * normPDF(posterior_m_tm_y, posterior_mean_y, h^2); product *= normPDF(posterior_m_tm_x, posterior_mean_x, h*h) * normPDF(posterior_m_tm_y, posterior_mean_y, h*h);
} }
return product; return product;
} }


__device__ float normPDF(float x, float mean, float variance) { __device__ float normPDF(float x, float mean, float variance) {
float denominator = sqrtf(2*PI*(variance^2)); float denominator = sqrtf(2*PI*(variance*variance));
float numerator = expf( -1 * (x-mean)^2 / (2*variance^2) ); float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) );
return numerator/denominator; return numerator/denominator;
} }


Expand Down

0 comments on commit 27b72ed

Please sign in to comment.