From 0c917d78ec97523320c1e47c2a9ddf29baae7f54 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Sat, 22 Apr 2017 15:09:00 -0400 Subject: [PATCH] some change --- parallel/para_gibbs.cu | 59 ++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/parallel/para_gibbs.cu b/parallel/para_gibbs.cu index 1b04b7f..52f29c9 100644 --- a/parallel/para_gibbs.cu +++ b/parallel/para_gibbs.cu @@ -67,10 +67,10 @@ specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1 __host__ void load_data(int argc, char **argv, int *K, int **y, float **n); -__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs); -__device__ float sample_b(curandState *state, float a, int K, float flat_sum); +__device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs); +__device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum); __host__ float rnorm(); -__device__ float rnorm(curandState *state ); +__device__ float rnorm(curandState *state,int id ); __host__ float rgamma(float a, float b); __device__ float rgamma(curandState *state, int id, float a, float b); @@ -86,6 +86,8 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat __device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n, float a, float b, int K, curandState *state); +__global__ void mergePosterior(); + int main(int argc, char **argv){ curandState *devStates; @@ -124,8 +126,10 @@ int main(int argc, char **argv){ //nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; nBlocks = 500; setup_kernel<<>>(devStates, 0, K); + printf("alpha, beta\n"); seqMetroProcess<<>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); + //mergePosterior<<>>(); /*------ Free Memory -------------------------------------------*/ free(y); @@ -140,6 +144,11 @@ int main(int argc, char **argv){ return EXIT_SUCCESS; } +__global__ void mergePosterior(){ + printf("\n all blocks finished\n"); +} + + /* * Sample each theta from the appropriate gamma distribution @@ -163,25 +172,30 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat float a, float b, int trials){ /*------ MCMC ----------------------------------------------------*/ int i; + printf("K: %d \n ",&K); int start = blockIdx.x * K/nBlocks; int lengthPerBlock = K/nBlocks; //partition the data int *yy; - yy = &y[start]; + yy = y+start; float *nn; - nn = &n[start]; + nn = n+start; float *sTheta; - sTheta = &theta[start]; + sTheta = theta+start; float *sLogTheta; - sLogTheta = &log_theta[start]; + sLogTheta = log_theta+start; - printf("block id:%d\n",blockIdx.x); + printf("length per block:%d\n",&lengthPerBlock); + printf("start is:%d\n",&start); + printf("partial data under block id: %d \n ",blockIdx.x); for(int j = 0; j < lengthPerBlock ; j++) { - printf("%d\n ", yy[j]); - } - printf("alpha, beta\n"); + printf(" %d \n ", yy); + yy++; + } + + @@ -205,11 +219,12 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat sum_logs = sum_logs + *sLogTheta; sLogTheta++; } - +printf("sum of theta is %f\n",flat_sum); +printf("sum of logtheta is %f\n",sum_logs); /* Sample hyperparameters. */ - a = sample_a(state, a, b, lengthPerBlock, sum_logs); - b = sample_b(state, a, lengthPerBlock, flat_sum); + a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs); + b = sample_b(state,threadIdx.x, a, lengthPerBlock, flat_sum); /* print hyperparameters. */ printf("%f, %f\n", a, b); @@ -260,10 +275,10 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){ * is adjusted at each step. */ -__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs){ +__device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){ static float sigma = 2; - float U, log_acceptance_ratio, proposal = rnorm(state) * sigma + a; + float U, log_acceptance_ratio, proposal = rnorm(state,id) * sigma + a; if(proposal <= 0) return a; @@ -272,7 +287,7 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum K * (proposal - a) * log(b) - K * (lgamma(proposal) - lgamma(a)); - U = curand(state) / float(RAND_MAX); + U = curand(&state[id]) / float(RAND_MAX); if(log(U) < log_acceptance_ratio){ sigma *= 1.1; @@ -288,11 +303,11 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum * Sample b from a gamma distribution. */ -__device__ float sample_b(curandState *state, float a, int K, float flat_sum){ +__device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum){ float hyperA = K * a + 1; float hyperB = flat_sum; - return rgamma(state,0,hyperA, hyperB); + return rgamma(state,id,hyperA, hyperB); } @@ -317,10 +332,10 @@ __host__ float rnorm(){ } -__device__ float rnorm(curandState *state){ +__device__ float rnorm(curandState *state,int id){ - float U1 = curand(state) / float(RAND_MAX); - float U2 = curand(state) / float(RAND_MAX); + float U1 = curand(&state[id]) / float(RAND_MAX); + float U2 = curand(&state[id]) / float(RAND_MAX); float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ return V1;