Skip to content
Permalink
Browse files

partial data done

  • Loading branch information
yuz12012 committed Apr 22, 2017
1 parent 0c917d7 commit 69daa2766a5ae25cc4ed68ddf3847aaf3fd03b2e
Showing with 36 additions and 19 deletions.
  1. +36 −19 parallel/para_gibbs.cu
@@ -124,10 +124,13 @@ int main(int argc, char **argv){
/*------ Setup random number generators (one per thread) ---------*/ /*------ Setup random number generators (one per thread) ---------*/


//nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; //nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
nBlocks = 500; nBlocks = 1;
setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K); setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K);
rnorm();

printf("alpha, beta\n"); printf("alpha, beta\n");
seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); //seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials);
seqMetroProcess<<<nBlocks,128>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials);


//mergePosterior<<<nBlocks,1>>>(); //mergePosterior<<<nBlocks,1>>>();
/*------ Free Memory -------------------------------------------*/ /*------ Free Memory -------------------------------------------*/
@@ -172,10 +175,13 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
float a, float b, int trials){ float a, float b, int trials){
/*------ MCMC ----------------------------------------------------*/ /*------ MCMC ----------------------------------------------------*/
int i; int i;
printf("K: %d \n ",&K); //printf("K: %d \n ",K);

int id = threadIdx.x + blockIdx.x * blockDim.x;
int start = blockIdx.x * K/nBlocks;
int lengthPerBlock = K/nBlocks; int nTasks = nBlocks*blockDim.x;
//int start = blockIdx.x * K/nBlocks;
int start = id * K/nTasks;
int lengthPerBlock = K/nTasks;
//partition the data //partition the data
int *yy; int *yy;
yy = y+start; yy = y+start;
@@ -186,15 +192,15 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
sTheta = theta+start; sTheta = theta+start;
float *sLogTheta; float *sLogTheta;
sLogTheta = log_theta+start; sLogTheta = log_theta+start;

/*
printf("length per block:%d\n",&lengthPerBlock); printf("length per block:%d\n",lengthPerBlock);
printf("start is:%d\n",&start); printf("start is:%d\n",start);
printf("partial data under block id: %d \n ",blockIdx.x); printf("partial data under block id: %d \n ",blockIdx.x);
for(int j = 0; j < lengthPerBlock ; j++) { for(int j = 0; j < lengthPerBlock ; j++) {
printf(" %d \n ", yy); printf(" %d \n ", *yy);
yy++; yy++;
} }

*/






@@ -219,8 +225,8 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
sum_logs = sum_logs + *sLogTheta; sum_logs = sum_logs + *sLogTheta;
sLogTheta++; sLogTheta++;
} }
printf("sum of theta is %f\n",flat_sum); //printf("sum of theta is %f\n",flat_sum);
printf("sum of logtheta is %f\n",sum_logs); //printf("sum of logtheta is %f\n",sum_logs);


/* Sample hyperparameters. */ /* Sample hyperparameters. */
a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs); a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs);
@@ -277,17 +283,20 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){


__device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){ __device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){


static float sigma = 2; float sigma = 2.0;
float U, log_acceptance_ratio, proposal = rnorm(state,id) * sigma + a; float norm = curand_normal(&state[id]);

float U, log_acceptance_ratio, proposal = norm * sigma + a;
//printf("rnorm result:%f \n",norm*sigma);
if(proposal <= 0) if(proposal <= 0)
return a; return a;


log_acceptance_ratio = (proposal - a) * sum_logs + log_acceptance_ratio = (proposal - a) * sum_logs +
K * (proposal - a) * log(b) - K * (proposal - a) * log(b) -
K * (lgamma(proposal) - lgamma(a)); K * (lgamma(proposal) - lgamma(a));


U = curand(&state[id]) / float(RAND_MAX); U = curand_normal(&state[id]);
//printf("log_acceptance result:%f \n",log_acceptance_ratio);
//printf("log U result:%f \n",log(U));


if(log(U) < log_acceptance_ratio){ if(log(U) < log_acceptance_ratio){
sigma *= 1.1; sigma *= 1.1;
@@ -326,6 +335,9 @@ __host__ float rnorm(){


float U1 = rand() / float(RAND_MAX); float U1 = rand() / float(RAND_MAX);
float U2 = rand() / float(RAND_MAX); float U2 = rand() / float(RAND_MAX);
/* printf("host U1 result:%f \n",U1); printf("host U2 result:%f \n",U2);
printf("host RANDMAX result:%f \n",RAND_MAX);
*/
float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2);
/* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */
return V1; return V1;
@@ -334,8 +346,13 @@ __host__ float rnorm(){


__device__ float rnorm(curandState *state,int id){ __device__ float rnorm(curandState *state,int id){


float U1 = curand(&state[id]) / float(RAND_MAX); float U1 = curand_uniform(&state[id]);
float U2 = curand(&state[id]) / float(RAND_MAX); /* printf("U1 result:%f \n",U1);
printf("RANDMAX result:%f \n",RAND_MAX);
*/
float U2 = curand_uniform(&state[id]);
//printf("U2 result:%f \n",U2);

float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2);
/* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */
return V1; return V1;

0 comments on commit 69daa27

Please sign in to comment.
You can’t perform that action at this time.