Skip to content
Permalink
Browse files

Made a performance enhancement to para_gibbs, by removing the copying…

… of tDot to make cDot. Instead just replace the the values that differ them when necessary.
  • Loading branch information
rjm11010 committed Apr 30, 2017
1 parent 6cd1212 commit 3e8a6419373ca515df0d132a48423089834df64a
Showing with 17 additions and 11 deletions.
  1. +17 −11 parallel/para_gibbs.cu
@@ -176,7 +176,7 @@ int main(int argc, char **argv){

free(y);
free(n);

CUDA_CALL(cudaFree(devStates));
CUDA_CALL(cudaFree(dev_theta));
CUDA_CALL(cudaFree(dev_log_theta));
@@ -216,21 +216,27 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int
int M = nBlocks * nThreads;
int id = threadIdx.x + blockIdx.x * blockDim.x;

for (int i=0; i < trials; i++) {
for (int i=0; i < trials; i++) {
h[i] = powf(i,(-1/(4+2)));
for (int m=0; m < M; m++) {
int *cDot = (int*) malloc(M*sizeof(int));
//printf("%d\n",m);

memcpy(cDot, tDot, sizeof(int) * M);
cDot[m] = (curand(&state[id]) % (trials-1)) + 1;

// Performance modiciation made
// We are not going to make a copy of tDot to make cDot, rather
// rather just replace the one value that differes them when necessary

int tDot_val_at_m = tDot[m];
int cDot_val_at_m = (curand(&state[id]) % (trials-1)) + 1;
tDot[m] = cDot_val_at_m; // At this point tDot is actually cDot
int u = curand_uniform(&state[id]);
float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]);
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]);
float wcDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // Note: tDot is actually cDot
// Switch back to tDot
tDot[m] = tDot_val_at_m;
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // This is using tDot
if (u < (wcDot/ wtDot) ){
memcpy(tDot, cDot, sizeof(int) * trials);
// Retrun tDot to the cDot configuration
tDot[m] = cDot_val_at_m;
}
free(cDot);
}
// TODO: Draw from Multivariate Normal, and Save into the results
float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials);
@@ -281,7 +287,7 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M,
}

__device__ float normPDF(float x, float mean, float variance) {

float denominator = sqrtf(2*PI*(variance*variance));
//printf("the denominator is %f\n",PI);
float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) );

0 comments on commit 3e8a641

Please sign in to comment.
You can’t perform that action at this time.