|
@@ -176,7 +176,7 @@ int main(int argc, char **argv){ |
|
|
|
|
|
free(y); |
|
|
free(n); |
|
|
|
|
|
|
|
|
CUDA_CALL(cudaFree(devStates)); |
|
|
CUDA_CALL(cudaFree(dev_theta)); |
|
|
CUDA_CALL(cudaFree(dev_log_theta)); |
|
@@ -216,21 +216,27 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int |
|
|
int M = nBlocks * nThreads; |
|
|
int id = threadIdx.x + blockIdx.x * blockDim.x; |
|
|
|
|
|
for (int i=0; i < trials; i++) { |
|
|
for (int i=0; i < trials; i++) { |
|
|
h[i] = powf(i,(-1/(4+2))); |
|
|
for (int m=0; m < M; m++) { |
|
|
int *cDot = (int*) malloc(M*sizeof(int)); |
|
|
//printf("%d\n",m); |
|
|
|
|
|
memcpy(cDot, tDot, sizeof(int) * M); |
|
|
cDot[m] = (curand(&state[id]) % (trials-1)) + 1; |
|
|
|
|
|
// Performance modiciation made |
|
|
// We are not going to make a copy of tDot to make cDot, rather |
|
|
// rather just replace the one value that differes them when necessary |
|
|
|
|
|
int tDot_val_at_m = tDot[m]; |
|
|
int cDot_val_at_m = (curand(&state[id]) % (trials-1)) + 1; |
|
|
tDot[m] = cDot_val_at_m; // At this point tDot is actually cDot |
|
|
int u = curand_uniform(&state[id]); |
|
|
float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, trials, h[i]); |
|
|
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); |
|
|
float wcDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // Note: tDot is actually cDot |
|
|
// Switch back to tDot |
|
|
tDot[m] = tDot_val_at_m; |
|
|
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, trials, h[i]); // This is using tDot |
|
|
if (u < (wcDot/ wtDot) ){ |
|
|
memcpy(tDot, cDot, sizeof(int) * trials); |
|
|
// Retrun tDot to the cDot configuration |
|
|
tDot[m] = cDot_val_at_m; |
|
|
} |
|
|
free(cDot); |
|
|
} |
|
|
// TODO: Draw from Multivariate Normal, and Save into the results |
|
|
float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, trials); |
|
@@ -281,7 +287,7 @@ __device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M, |
|
|
} |
|
|
|
|
|
__device__ float normPDF(float x, float mean, float variance) { |
|
|
|
|
|
|
|
|
float denominator = sqrtf(2*PI*(variance*variance)); |
|
|
//printf("the denominator is %f\n",PI); |
|
|
float numerator = expf( -1 * (x-mean)*(x-mean) / (2*variance*variance) ); |
|
|