From 27b72ed6bce069f216b90f80d114d00cba16aa4c Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 28 Apr 2017 23:23:19 -0400 Subject: [PATCH] fixes for parallel --- parallel/para_gibbs.cu | 55 ++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/parallel/para_gibbs.cu b/parallel/para_gibbs.cu index f6407f2..6361bf4 100644 --- a/parallel/para_gibbs.cu +++ b/parallel/para_gibbs.cu @@ -155,16 +155,15 @@ int main(int argc, char **argv){ seqMetroProcess<<>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials); int *tDot; - CUDA_CALL(cudaMalloc((void **)&tDot,trials*sizeof(int))); + CUDA_CALL(cudaMalloc((void **)&tDot,nThreads*nBlocks*sizeof(int))); float *h; CUDA_CALL(cudaMalloc((void **)&h,trials*sizeof(float))); sampleTdot<<>>(trials, tDot,devStates); - int numThreads = trials / nBlocks; // TODO: This should be a whole number - mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates, nBlocks, nThreads); - + //int numThreads = trials / nBlocks; // TODO: This should be a whole number + mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates, nBlocks, nThreads, h, results); /*------ Free Memory -------------------------------------------*/ @@ -172,12 +171,21 @@ int main(int argc, char **argv){ free(y); free(n); - + CUDA_CALL(cudaFree(devStates)); CUDA_CALL(cudaFree(dev_theta)); CUDA_CALL(cudaFree(dev_log_theta)); CUDA_CALL(cudaFree(dev_y)); CUDA_CALL(cudaFree(dev_n)); +/* CUDA_CALL(cudaFree(results)); + CUDA_CALL(cudaFree(h)); + CUDA_CALL(cudaFree(tDot)); + CUDA_CALL(cudaFree(dev_a_out)); + CUDA_CALL(cudaFree(dev_b_out)); +*/ + + + return EXIT_SUCCESS; } @@ -202,30 +210,35 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int int M = nBlocks * nThreads; int id = threadIdx.x + blockIdx.x * blockDim.x; - float h = id^(-1/(4+2)); - for (int i=0; i < nThreads; i++) { + for (int i=0; i < trials; i++) { + h[i] = powf(i,(-1/(4+2))); for (int m=0; m < M; m++) { - int *cDot; - CUDA_CALL(cudaMemcpy(cDot, tDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice)); + int *cDot = (int*) malloc(M*sizeof(int)); + printf("%d\n",m); + + memcpy(cDot, tDot, sizeof(int) * M); cDot[m] = (curand(&state[id]) % (trials-1)) + 1; int u = curand_uniform(&state[id]); - float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, nThreads, h[i]); - float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, nThreads, h[i]); - if (u < (wcDot/ wtDot) ) - CUDA_CALL(cudaMemcpy(tDot, cDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice)); + float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]); + float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); + if (u < (wcDot/ wtDot) ){ + memcpy(tDot, cDot, sizeof(int) * trials); + } + free(cDot); } // TODO: Draw from Multivariate Normal, and Save into the results - float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, nThreads); - float posterior_mean_b = posteriorMean(tDot, dev_b_out, M, nThreads); - float variance = (h[i]^2)/M; - - printf('%f %f' , norm(state, posterior_mean_a, variance), norm(state, posterior_mean_b, variance)); + float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials); + float posterior_mean_b = posteriorMean(tDot, dev_b_out, M, trials); + float variance = (h[i]*h[i])/M; + printf("%d\n",trials); + printf("%f, %f\n" , general_normoral(state, posterior_mean_a, variance), general_normoral(state, posterior_mean_b, variance)); } } __device__ float general_normoral(curandState *state, float mean, float variance) { + int id = threadIdx.x + blockIdx.x * blockDim.x; return curand_normal(&state[id]) * variance + mean; } @@ -250,14 +263,14 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M, int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId) posterior_m_tm_x = dev_x_out[index]; // posterior_m_tm of x posterior_m_tm_y = dev_y_out[index]; // posterio_m_tm_ of y - product *= normPDF(posterior_m_tm_x, posterior_mean_x, h^2) * normPDF(posterior_m_tm_y, posterior_mean_y, h^2); + product *= normPDF(posterior_m_tm_x, posterior_mean_x, h*h) * normPDF(posterior_m_tm_y, posterior_mean_y, h*h); } return product; } __device__ float normPDF(float x, float mean, float variance) { - float denominator = sqrtf(2*PI*(variance^2)); - float numerator = expf( -1 * (x-mean)^2 / (2*variance^2) ); + float denominator = sqrtf(2*PI*(variance*variance)); + float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) ); return numerator/denominator; }