From 3e8a6419373ca515df0d132a48423089834df64a Mon Sep 17 00:00:00 2001 From: Reynaldo Morillo Date: Sun, 30 Apr 2017 13:49:24 -0400 Subject: [PATCH] Made a performance enhancement to para_gibbs, by removing the copying of tDot to make cDot. Instead just replace the the values that differ them when necessary. --- parallel/para_gibbs.cu | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/parallel/para_gibbs.cu b/parallel/para_gibbs.cu index f9a1f5b..3d5e144 100644 --- a/parallel/para_gibbs.cu +++ b/parallel/para_gibbs.cu @@ -176,7 +176,7 @@ int main(int argc, char **argv){ free(y); free(n); - + CUDA_CALL(cudaFree(devStates)); CUDA_CALL(cudaFree(dev_theta)); CUDA_CALL(cudaFree(dev_log_theta)); @@ -216,21 +216,27 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int int M = nBlocks * nThreads; int id = threadIdx.x + blockIdx.x * blockDim.x; - for (int i=0; i < trials; i++) { + for (int i=0; i < trials; i++) { h[i] = powf(i,(-1/(4+2))); for (int m=0; m < M; m++) { int *cDot = (int*) malloc(M*sizeof(int)); - //printf("%d\n",m); - - memcpy(cDot, tDot, sizeof(int) * M); - cDot[m] = (curand(&state[id]) % (trials-1)) + 1; + + // Performance modiciation made + // We are not going to make a copy of tDot to make cDot, rather + // rather just replace the one value that differes them when necessary + + int tDot_val_at_m = tDot[m]; + int cDot_val_at_m = (curand(&state[id]) % (trials-1)) + 1; + tDot[m] = cDot_val_at_m; // At this point tDot is actually cDot int u = curand_uniform(&state[id]); - float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]); - float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); + float wcDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // Note: tDot is actually cDot + // Switch back to tDot + tDot[m] = tDot_val_at_m; + float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // This is using tDot if (u < (wcDot/ wtDot) ){ - memcpy(tDot, cDot, sizeof(int) * trials); + // Retrun tDot to the cDot configuration + tDot[m] = cDot_val_at_m; } - free(cDot); } // TODO: Draw from Multivariate Normal, and Save into the results float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials); @@ -281,7 +287,7 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M, } __device__ float normPDF(float x, float mean, float variance) { - + float denominator = sqrtf(2*PI*(variance*variance)); //printf("the denominator is %f\n",PI); float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) );