Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Made a performance enhancement to para_gibbs, by removing the copying…
… of tDot to make cDot. Instead just replace the the values that differ them when necessary.
  • Loading branch information
rjm11010 committed Apr 30, 2017
1 parent 6cd1212 commit 3e8a641
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions parallel/para_gibbs.cu
Expand Up @@ -176,7 +176,7 @@ int main(int argc, char **argv){


free(y); free(y);
free(n); free(n);

CUDA_CALL(cudaFree(devStates)); CUDA_CALL(cudaFree(devStates));
CUDA_CALL(cudaFree(dev_theta)); CUDA_CALL(cudaFree(dev_theta));
CUDA_CALL(cudaFree(dev_log_theta)); CUDA_CALL(cudaFree(dev_log_theta));
Expand Down Expand Up @@ -216,21 +216,27 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int
int M = nBlocks * nThreads; int M = nBlocks * nThreads;
int id = threadIdx.x + blockIdx.x * blockDim.x; int id = threadIdx.x + blockIdx.x * blockDim.x;


for (int i=0; i < trials; i++) { for (int i=0; i < trials; i++) {
h[i] = powf(i,(-1/(4+2))); h[i] = powf(i,(-1/(4+2)));
for (int m=0; m < M; m++) { for (int m=0; m < M; m++) {
int *cDot = (int*) malloc(M*sizeof(int)); int *cDot = (int*) malloc(M*sizeof(int));
//printf("%d\n",m);

// Performance modiciation made
memcpy(cDot, tDot, sizeof(int) * M); // We are not going to make a copy of tDot to make cDot, rather
cDot[m] = (curand(&state[id]) % (trials-1)) + 1; // rather just replace the one value that differes them when necessary

int tDot_val_at_m = tDot[m];
int cDot_val_at_m = (curand(&state[id]) % (trials-1)) + 1;
tDot[m] = cDot_val_at_m; // At this point tDot is actually cDot
int u = curand_uniform(&state[id]); int u = curand_uniform(&state[id]);
float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]); float wcDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // Note: tDot is actually cDot
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // Switch back to tDot
tDot[m] = tDot_val_at_m;
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // This is using tDot
if (u < (wcDot/ wtDot) ){ if (u < (wcDot/ wtDot) ){
memcpy(tDot, cDot, sizeof(int) * trials); // Retrun tDot to the cDot configuration
tDot[m] = cDot_val_at_m;
} }
free(cDot);
} }
// TODO: Draw from Multivariate Normal, and Save into the results // TODO: Draw from Multivariate Normal, and Save into the results
float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials); float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials);
Expand Down Expand Up @@ -281,7 +287,7 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M,
} }


__device__ float normPDF(float x, float mean, float variance) { __device__ float normPDF(float x, float mean, float variance) {

float denominator = sqrtf(2*PI*(variance*variance)); float denominator = sqrtf(2*PI*(variance*variance));
//printf("the denominator is %f\n",PI); //printf("the denominator is %f\n",PI);
float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) ); float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) );
Expand Down

0 comments on commit 3e8a641

Please sign in to comment.