|
@@ -91,7 +91,11 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int |
|
|
|
|
|
__global__ void sampleTdot(int trials, int *tDot,curandState *state); |
|
|
|
|
|
__device__ float posteriorMean(*tDot, float *dev_x_out, int M, int nThreads); |
|
|
__device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads); |
|
|
|
|
|
__device__ float computeW(int *tDot, float *dev_x_out, int M, int nThreads, float h); |
|
|
|
|
|
__device__ float normPDF(float x, float mean, float sigma); |
|
|
|
|
|
int main(int argc, char **argv){ |
|
|
|
|
@@ -205,7 +209,7 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int |
|
|
} |
|
|
|
|
|
__device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads) { |
|
|
float sum; |
|
|
float sum = 0; |
|
|
for (int i=0; i < M; i++) { |
|
|
int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId) |
|
|
sum += dev_x_out[index]; // posterior_m_tm |
|
@@ -214,7 +218,24 @@ __device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads) |
|
|
return mean; |
|
|
} |
|
|
|
|
|
__device__ float computeW(int *tDot, float *dev_x_out, int M, int nThreads, float h) { |
|
|
float posterior_mean = posteriorMean(tDot, dev_x_out, M, nThreads); |
|
|
float product = 1; |
|
|
float posterior_m_tm; |
|
|
|
|
|
for (int i=0; i < M; i++) { |
|
|
int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId) |
|
|
posterior_m_tm = dev_x_out[index]; // posterior_m_tm |
|
|
product *= normPDF(posterior_m_tm, posterior_mean, h^2); |
|
|
} |
|
|
return product; |
|
|
} |
|
|
|
|
|
__device__ float normPDF(float x, float mean, float sigma) { |
|
|
float denominator = sqrtf(2*PI*(sigma^2)); |
|
|
float numerator = expf( -1 * (x-mean)^2 / (2*sigma^2) ); |
|
|
return numerator/denominator; |
|
|
} |
|
|
|
|
|
/* |
|
|
* Sample each theta from the appropriate gamma distribution |
|
|