|
@@ -87,7 +87,9 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat |
|
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n,
|
|
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n,
|
|
float a, float b, int K, curandState *state);
|
|
float a, float b, int K, curandState *state);
|
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot);
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks);
|
|
|
|
|
|
|
|
__global__ void sampleTdot(int trials, int *tDot,curandState *state);
|
|
|
|
|
|
int main(int argc, char **argv){
|
|
int main(int argc, char **argv){
|
|
|
|
|
|
@@ -100,10 +102,11 @@ int main(int argc, char **argv){ |
|
nBlocks = atoi(argv[3]);
|
|
nBlocks = atoi(argv[3]);
|
|
nThreads = atoi(argv[4]);
|
|
nThreads = atoi(argv[4]);
|
|
}
|
|
}
|
|
else if(argc > 2)
|
|
else if(argc > 2){
|
|
trials = atoi(argv[2]);
|
|
trials = atoi(argv[2]);
|
|
nBlocks = 64;
|
|
nBlocks = 64;
|
|
nThreads = 1;
|
|
nThreads = 1;
|
|
|
|
}
|
|
|
|
|
|
load_data(argc, argv, &K, &y, &n);
|
|
load_data(argc, argv, &K, &y, &n);
|
|
|
|
|
|
@@ -143,7 +146,15 @@ int main(int argc, char **argv){ |
|
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials);
|
|
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials);
|
|
|
|
|
|
int *tDot;
|
|
int *tDot;
|
|
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot);
|
|
CUDA_CALL(cudaMalloc((void **)&tDot,trials*sizeof(int)));
|
|
|
|
|
|
|
|
float *h;
|
|
|
|
CUDA_CALL(cudaMalloc((void **)&h,trials*sizeof(float)));
|
|
|
|
|
|
|
|
sampleTdot<<<nBlocks,nThreads>>>(trials, tDot,devStates);
|
|
|
|
|
|
|
|
|
|
|
|
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates,nBlocks);
|
|
/*------ Free Memory -------------------------------------------*/
|
|
/*------ Free Memory -------------------------------------------*/
|
|
|
|
|
|
free(y);
|
|
free(y);
|
|
@@ -158,7 +169,16 @@ int main(int argc, char **argv){ |
|
return EXIT_SUCCESS;
|
|
return EXIT_SUCCESS;
|
|
}
|
|
}
|
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot){
|
|
__global__ void sampleTdot(int trials, int *tDot,curandState *state){
|
|
|
|
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
|
|
|
int u = (curand(&state[id]) % (trials-1)) + 1;
|
|
|
|
//printf("thread %d sample: %d",id,u);
|
|
|
|
tDot[id] = u;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot, curandState *state,int nBlocks){
|
|
|
|
|
|
/* printf("\n all blocks finished\n");
|
|
/* printf("\n all blocks finished\n");
|
|
for(int j = 0; j < trials ; j++) {
|
|
for(int j = 0; j < trials ; j++) {
|
|
printf(" %f ", *dev_a_out);
|
|
printf(" %f ", *dev_a_out);
|
|
@@ -168,6 +188,15 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int |
|
}
|
|
}
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
printf("\n all blocks finished\n");
|
|
|
|
for(int j = 0; j < nBlocks*2 ; j++) {
|
|
|
|
// printf(" %f ", *dev_a_out);
|
|
|
|
printf(" %d \n",*tDot);
|
|
|
|
tDot++;
|
|
|
|
//dev_b_out++;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|