From 69daa2766a5ae25cc4ed68ddf3847aaf3fd03b2e Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Sat, 22 Apr 2017 17:42:47 -0400 Subject: [PATCH] partial data done --- parallel/para_gibbs.cu | 55 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/parallel/para_gibbs.cu b/parallel/para_gibbs.cu index 52f29c9..1e2053a 100644 --- a/parallel/para_gibbs.cu +++ b/parallel/para_gibbs.cu @@ -124,10 +124,13 @@ int main(int argc, char **argv){ /*------ Setup random number generators (one per thread) ---------*/ //nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - nBlocks = 500; + nBlocks = 1; setup_kernel<<>>(devStates, 0, K); + rnorm(); + printf("alpha, beta\n"); - seqMetroProcess<<>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); + //seqMetroProcess<<>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); + seqMetroProcess<<>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); //mergePosterior<<>>(); /*------ Free Memory -------------------------------------------*/ @@ -172,10 +175,13 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat float a, float b, int trials){ /*------ MCMC ----------------------------------------------------*/ int i; - printf("K: %d \n ",&K); - - int start = blockIdx.x * K/nBlocks; - int lengthPerBlock = K/nBlocks; + //printf("K: %d \n ",K); + int id = threadIdx.x + blockIdx.x * blockDim.x; + + int nTasks = nBlocks*blockDim.x; + //int start = blockIdx.x * K/nBlocks; + int start = id * K/nTasks; + int lengthPerBlock = K/nTasks; //partition the data int *yy; yy = y+start; @@ -186,15 +192,15 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat sTheta = theta+start; float *sLogTheta; sLogTheta = log_theta+start; - - printf("length per block:%d\n",&lengthPerBlock); - printf("start is:%d\n",&start); + /* + printf("length per block:%d\n",lengthPerBlock); + printf("start is:%d\n",start); printf("partial data under block id: %d \n ",blockIdx.x); for(int j = 0; j < lengthPerBlock ; j++) { - printf(" %d \n ", yy); + printf(" %d \n ", *yy); yy++; } - +*/ @@ -219,8 +225,8 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat sum_logs = sum_logs + *sLogTheta; sLogTheta++; } -printf("sum of theta is %f\n",flat_sum); -printf("sum of logtheta is %f\n",sum_logs); +//printf("sum of theta is %f\n",flat_sum); +//printf("sum of logtheta is %f\n",sum_logs); /* Sample hyperparameters. */ a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs); @@ -277,9 +283,10 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){ __device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){ - static float sigma = 2; - float U, log_acceptance_ratio, proposal = rnorm(state,id) * sigma + a; - + float sigma = 2.0; + float norm = curand_normal(&state[id]); + float U, log_acceptance_ratio, proposal = norm * sigma + a; + //printf("rnorm result:%f \n",norm*sigma); if(proposal <= 0) return a; @@ -287,7 +294,9 @@ __device__ float sample_a(curandState *state,int id, float a, float b, int K, fl K * (proposal - a) * log(b) - K * (lgamma(proposal) - lgamma(a)); - U = curand(&state[id]) / float(RAND_MAX); + U = curand_normal(&state[id]); + //printf("log_acceptance result:%f \n",log_acceptance_ratio); + //printf("log U result:%f \n",log(U)); if(log(U) < log_acceptance_ratio){ sigma *= 1.1; @@ -326,6 +335,9 @@ __host__ float rnorm(){ float U1 = rand() / float(RAND_MAX); float U2 = rand() / float(RAND_MAX); +/* printf("host U1 result:%f \n",U1); printf("host U2 result:%f \n",U2); +printf("host RANDMAX result:%f \n",RAND_MAX); +*/ float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ return V1; @@ -334,8 +346,13 @@ __host__ float rnorm(){ __device__ float rnorm(curandState *state,int id){ - float U1 = curand(&state[id]) / float(RAND_MAX); - float U2 = curand(&state[id]) / float(RAND_MAX); + float U1 = curand_uniform(&state[id]); + /* printf("U1 result:%f \n",U1); +printf("RANDMAX result:%f \n",RAND_MAX); +*/ + float U2 = curand_uniform(&state[id]); + //printf("U2 result:%f \n",U2); + float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ return V1;