Skip to content
Permalink
Browse files

output from step 1 added

  • Loading branch information
yuz12012 committed Apr 24, 2017
1 parent 12017fa commit d19d590d2afc67a5f4896340c43486dce6e80df6
Showing with 40 additions and 16 deletions.
  1. +40 −16 parallel/para_gibbs.cu
@@ -82,16 +82,17 @@ __global__ void setup_kernel(curandState *state, unsigned int seed, int);

__global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandState *state,
float *theta, float *log_theta,
float a, float b, int trials);
float a, float b,float *dev_a_out,float *dev_b_out,
int trials);
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n,
float a, float b, int K, curandState *state);

__global__ void mergePosterior();
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out);

int main(int argc, char **argv){

curandState *devStates;
float a, b, *n, *dev_n, *dev_theta, *dev_log_theta;
float a, b, *n, *dev_n, *dev_theta, *dev_log_theta, *dev_a_out, *dev_b_out;
int K, *y, *dev_y, nBlocks,nThreads, trials = 1000;

if(argc>4){
@@ -126,20 +127,23 @@ int main(int argc, char **argv){

/* Allocate space for random states on device */
CUDA_CALL(cudaMalloc((void **)&devStates, K * sizeof(curandState)));

/////////////////////////allocate space on dev memory to save a and b
CUDA_CALL(cudaMalloc((void **)&dev_a_out,nBlocks*nThreads*trials*sizeof(float)));
CUDA_CALL(cudaMalloc((void **)&dev_b_out,nBlocks*nThreads*trials*sizeof(float)));

/*------ Setup random number generators (one per thread) ---------*/

//nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
//nBlocks = 1;
setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K);
rnorm();


//printf("%f",&dev_a_out);
printf("alpha, beta\n");
//seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials);
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials);
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials);


//mergePosterior<<<nBlocks,1>>>();
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out);
/*------ Free Memory -------------------------------------------*/

free(y);
@@ -154,8 +158,16 @@ int main(int argc, char **argv){
return EXIT_SUCCESS;
}

__global__ void mergePosterior(){
printf("\n all blocks finished\n");
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out){
/* printf("\n all blocks finished\n");
for(int j = 0; j < trials ; j++) {
printf(" %f ", *dev_a_out);
printf(" %f \n",*dev_b_out);
dev_a_out++;
dev_b_out++;
}
*/

}


@@ -179,7 +191,7 @@ __device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *

__global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandState *state,
float *theta, float *log_theta,
float a, float b, int trials){
float a, float b,float *dev_a_out,float *dev_b_out, int trials){
/*------ MCMC ----------------------------------------------------*/
int i;
//printf("K: %d \n ",K);
@@ -213,9 +225,15 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat


/* Steps of MCMC */

int rstart = id*trials/nTasks;
int rlenPerTask = trials;
float *subA;
subA = dev_a_out+rstart;
float *subB;
subB = dev_b_out+rstart;

for(i = 0; i < trials; i++){

for(i = 0; i < rlenPerTask; i++){
//sample_theta<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, dev_theta, dev_log_theta, dev_y, dev_n, a, b, K);
sample_theta_seq(sTheta, sLogTheta, yy, nn, a, b, lengthPerBlock, state);

@@ -236,11 +254,17 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
//printf("sum of logtheta is %f\n",sum_logs);

/* Sample hyperparameters. */
a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs);
b = sample_b(state,threadIdx.x, a, lengthPerBlock, flat_sum);
a = sample_a(state,id, a, b, lengthPerBlock, sum_logs);
b = sample_b(state,id, a, lengthPerBlock, flat_sum);

/* print hyperparameters. */
printf("%f, %f\n", a, b);
//printf("%f, %f\n", a, b);
////////////save new output to the global array
*subA = a;
subA++;
*subB = b;
subB++;

}
}

0 comments on commit d19d590

Please sign in to comment.
You can’t perform that action at this time.