Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
some change
  • Loading branch information
yuz12012 committed Apr 22, 2017
1 parent fd678a1 commit 0c917d7
Showing 1 changed file with 37 additions and 22 deletions.
59 changes: 37 additions & 22 deletions parallel/para_gibbs.cu
Expand Up @@ -67,10 +67,10 @@ specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1


__host__ void load_data(int argc, char **argv, int *K, int **y, float **n); __host__ void load_data(int argc, char **argv, int *K, int **y, float **n);


__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs); __device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs);
__device__ float sample_b(curandState *state, float a, int K, float flat_sum); __device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum);
__host__ float rnorm(); __host__ float rnorm();
__device__ float rnorm(curandState *state ); __device__ float rnorm(curandState *state,int id );
__host__ float rgamma(float a, float b); __host__ float rgamma(float a, float b);


__device__ float rgamma(curandState *state, int id, float a, float b); __device__ float rgamma(curandState *state, int id, float a, float b);
Expand All @@ -86,6 +86,8 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n, __device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n,
float a, float b, int K, curandState *state); float a, float b, int K, curandState *state);


__global__ void mergePosterior();

int main(int argc, char **argv){ int main(int argc, char **argv){


curandState *devStates; curandState *devStates;
Expand Down Expand Up @@ -124,8 +126,10 @@ int main(int argc, char **argv){
//nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; //nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
nBlocks = 500; nBlocks = 500;
setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K); setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K);
printf("alpha, beta\n");
seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials);


//mergePosterior<<<nBlocks,1>>>();
/*------ Free Memory -------------------------------------------*/ /*------ Free Memory -------------------------------------------*/


free(y); free(y);
Expand All @@ -140,6 +144,11 @@ int main(int argc, char **argv){
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }


__global__ void mergePosterior(){
printf("\n all blocks finished\n");
}




/* /*
* Sample each theta from the appropriate gamma distribution * Sample each theta from the appropriate gamma distribution
Expand All @@ -163,25 +172,30 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
float a, float b, int trials){ float a, float b, int trials){
/*------ MCMC ----------------------------------------------------*/ /*------ MCMC ----------------------------------------------------*/
int i; int i;
printf("K: %d \n ",&K);


int start = blockIdx.x * K/nBlocks; int start = blockIdx.x * K/nBlocks;
int lengthPerBlock = K/nBlocks; int lengthPerBlock = K/nBlocks;
//partition the data //partition the data
int *yy; int *yy;
yy = &y[start]; yy = y+start;
float *nn; float *nn;
nn = &n[start]; nn = n+start;


float *sTheta; float *sTheta;
sTheta = &theta[start]; sTheta = theta+start;
float *sLogTheta; float *sLogTheta;
sLogTheta = &log_theta[start]; sLogTheta = log_theta+start;


printf("block id:%d\n",blockIdx.x); printf("length per block:%d\n",&lengthPerBlock);
printf("start is:%d\n",&start);
printf("partial data under block id: %d \n ",blockIdx.x);
for(int j = 0; j < lengthPerBlock ; j++) { for(int j = 0; j < lengthPerBlock ; j++) {
printf("%d\n ", yy[j]); printf(" %d \n ", yy);
} yy++;
printf("alpha, beta\n"); }








Expand All @@ -205,11 +219,12 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
sum_logs = sum_logs + *sLogTheta; sum_logs = sum_logs + *sLogTheta;
sLogTheta++; sLogTheta++;
} }

printf("sum of theta is %f\n",flat_sum);
printf("sum of logtheta is %f\n",sum_logs);


/* Sample hyperparameters. */ /* Sample hyperparameters. */
a = sample_a(state, a, b, lengthPerBlock, sum_logs); a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs);
b = sample_b(state, a, lengthPerBlock, flat_sum); b = sample_b(state,threadIdx.x, a, lengthPerBlock, flat_sum);


/* print hyperparameters. */ /* print hyperparameters. */
printf("%f, %f\n", a, b); printf("%f, %f\n", a, b);
Expand Down Expand Up @@ -260,10 +275,10 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){
* is adjusted at each step. * is adjusted at each step.
*/ */


__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs){ __device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){


static float sigma = 2; static float sigma = 2;
float U, log_acceptance_ratio, proposal = rnorm(state) * sigma + a; float U, log_acceptance_ratio, proposal = rnorm(state,id) * sigma + a;


if(proposal <= 0) if(proposal <= 0)
return a; return a;
Expand All @@ -272,7 +287,7 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum
K * (proposal - a) * log(b) - K * (proposal - a) * log(b) -
K * (lgamma(proposal) - lgamma(a)); K * (lgamma(proposal) - lgamma(a));


U = curand(state) / float(RAND_MAX); U = curand(&state[id]) / float(RAND_MAX);


if(log(U) < log_acceptance_ratio){ if(log(U) < log_acceptance_ratio){
sigma *= 1.1; sigma *= 1.1;
Expand All @@ -288,11 +303,11 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum
* Sample b from a gamma distribution. * Sample b from a gamma distribution.
*/ */


__device__ float sample_b(curandState *state, float a, int K, float flat_sum){ __device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum){


float hyperA = K * a + 1; float hyperA = K * a + 1;
float hyperB = flat_sum; float hyperB = flat_sum;
return rgamma(state,0,hyperA, hyperB); return rgamma(state,id,hyperA, hyperB);
} }




Expand All @@ -317,10 +332,10 @@ __host__ float rnorm(){
} }




__device__ float rnorm(curandState *state){ __device__ float rnorm(curandState *state,int id){


float U1 = curand(state) / float(RAND_MAX); float U1 = curand(&state[id]) / float(RAND_MAX);
float U2 = curand(state) / float(RAND_MAX); float U2 = curand(&state[id]) / float(RAND_MAX);
float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2);
/* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */
return V1; return V1;
Expand Down

0 comments on commit 0c917d7

Please sign in to comment.