Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added computeW, general_normal functions. mergePosterior first draft …
…is done, and ready to be tested.
  • Loading branch information
rjm11010 committed Apr 29, 2017
1 parent d270e97 commit e4f18f0
Showing 1 changed file with 46 additions and 22 deletions.
68 changes: 46 additions & 22 deletions parallel/para_gibbs.cu
Expand Up @@ -87,20 +87,22 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n,
float a, float b, int K, curandState *state);

__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks, int nThreads);
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks, int nThreads, float *h, float *results);

__global__ void sampleTdot(int trials, int *tDot,curandState *state);

__device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads);

__device__ float computeW(int *tDot, float *dev_x_out, int M, int nThreads, float h);
__device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M, int nThreads, float h);

__device__ float normPDF(float x, float mean, float sigma);

__device__ float general_normoral(curandState *state, float mean, float variance);

int main(int argc, char **argv){

curandState *devStates;
float a, b, *n, *dev_n, *dev_theta, *dev_log_theta, *dev_a_out, *dev_b_out;
float a, b, *n, *dev_n, *dev_theta, *dev_log_theta, *dev_a_out, *dev_b_out, *results;
int K, *y, *dev_y, nBlocks,nThreads, trials = 1000;

if(argc>4){
Expand Down Expand Up @@ -137,8 +139,9 @@ int main(int argc, char **argv){
/* Allocate space for random states on device */
CUDA_CALL(cudaMalloc((void **)&devStates, K * sizeof(curandState)));
/////////////////////////allocate space on dev memory to save a and b
CUDA_CALL(cudaMalloc((void **)&dev_a_out,nBlocks*nThreads*trials*sizeof(float)));
CUDA_CALL(cudaMalloc((void **)&dev_b_out,nBlocks*nThreads*trials*sizeof(float)));
CUDA_CALL(cudaMalloc((void **)&dev_a_out, nBlocks*nThreads*trials*sizeof(float)));
CUDA_CALL(cudaMalloc((void **)&dev_b_out, nBlocks*nThreads*trials*sizeof(float)));
CUDA_CALL(cudaMalloc((void **)&results, nThreads*sizeof(float)));

/*------ Setup random number generators (one per thread) ---------*/

Expand All @@ -161,8 +164,12 @@ int main(int argc, char **argv){

int numThreads = trials / nBlocks; // TODO: This should be a whole number
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates, nBlocks, nThreads);


/*------ Free Memory -------------------------------------------*/



free(y);
free(n);

Expand All @@ -183,7 +190,7 @@ __global__ void sampleTdot(int trials, int *tDot,curandState *state){
}


__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks, int nThreads){
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks, int nThreads, float *h, float *results){

// printf("\n all blocks finished\n");
// for(int j = 0; j < nBlocks*2 ; j++) {
Expand All @@ -197,17 +204,31 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int
int id = threadIdx.x + blockIdx.x * blockDim.x;
float h = id^(-1/(4+2));

for (int i=0; i < M; i++) {
int *cDot;
CUDA_CALL(cudaMemcpy(cDot, tDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice));
int cm = (curand(&state[id]) % (trials-1)) + 1;
int u = curand_uniform(&state[id]);
// TODO: Compute wcDot
// TODO: Compute WtDot
if ()
for (int i=0; i < nThreads; i++) {
for (int m=0; m < M; m++) {
int *cDot;
CUDA_CALL(cudaMemcpy(cDot, tDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice));
cDot[m] = (curand(&state[id]) % (trials-1)) + 1;
int u = curand_uniform(&state[id]);
float wcDot = computeW(cDot, dev_a_out, dev_b_out, M, nThreads, h[i]);
float wtDot = computeW(tDot, dev_a_out, dev_b_out, M, nThreads, h[i]);
if (u < (wcDot/ wtDot) )
CUDA_CALL(cudaMemcpy(tDot, cDot, sizeof(int) * trials, cudaMemcpyDeviceToDevice));
}
// TODO: Draw from Multivariate Normal, and Save into the results
float posterior_mean_a = posteriorMean(tDot, dev_a_out, M, nThreads);
float posterior_mean_b = posteriorMean(tDot, dev_b_out, M, nThreads);
float variance = (h[i]^2)/M;

printf('%f %f' , norm(state, posterior_mean_a, variance), norm(state, posterior_mean_b, variance));
}
}


__device__ float general_normoral(curandState *state, float mean, float variance) {
return curand_normal(&state[id]) * variance + mean;
}

__device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads) {
float sum = 0;
for (int i=0; i < M; i++) {
Expand All @@ -218,22 +239,25 @@ __device__ float posteriorMean(int *tDot, float *dev_x_out, int M, int nThreads)
return mean;
}

__device__ float computeW(int *tDot, float *dev_x_out, int M, int nThreads, float h) {
float posterior_mean = posteriorMean(tDot, dev_x_out, M, nThreads);
__device__ float computeW(int *tDot, float *dev_x_out, float *dev_y_out, int M, int nThreads, float h) {
float posterior_mean_x = posteriorMean(tDot, dev_x_out, M, nThreads);
float posterior_mean_y = posteriorMean(tDot, dev_y_out, M, nThreads);
float product = 1;
float posterior_m_tm;
float posterior_m_tm_x;
float posterior_m_tm_y;

for (int i=0; i < M; i++) {
int index = tDot[i] + i * nThreads; // trial m of posterior m (note: i = blockId)
posterior_m_tm = dev_x_out[index]; // posterior_m_tm
product *= normPDF(posterior_m_tm, posterior_mean, h^2);
posterior_m_tm_x = dev_x_out[index]; // posterior_m_tm of x
posterior_m_tm_y = dev_y_out[index]; // posterio_m_tm_ of y
product *= normPDF(posterior_m_tm_x, posterior_mean_x, h^2) * normPDF(posterior_m_tm_y, posterior_mean_y, h^2);
}
return product;
}

__device__ float normPDF(float x, float mean, float sigma) {
float denominator = sqrtf(2*PI*(sigma^2));
float numerator = expf( -1 * (x-mean)^2 / (2*sigma^2) );
__device__ float normPDF(float x, float mean, float variance) {
float denominator = sqrtf(2*PI*(variance^2));
float numerator = expf( -1 * (x-mean)^2 / (2*variance^2) );
return numerator/denominator;
}

Expand Down

0 comments on commit e4f18f0

Please sign in to comment.