|
@@ -87,7 +87,9 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat |
|
|
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n, |
|
|
float a, float b, int K, curandState *state); |
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot); |
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot,curandState *state, int nBlocks); |
|
|
|
|
|
__global__ void sampleTdot(int trials, int *tDot,curandState *state); |
|
|
|
|
|
int main(int argc, char **argv){ |
|
|
|
|
@@ -100,10 +102,11 @@ int main(int argc, char **argv){ |
|
|
nBlocks = atoi(argv[3]); |
|
|
nThreads = atoi(argv[4]); |
|
|
} |
|
|
else if(argc > 2) |
|
|
else if(argc > 2){ |
|
|
trials = atoi(argv[2]); |
|
|
nBlocks = 64; |
|
|
nThreads = 1; |
|
|
} |
|
|
|
|
|
load_data(argc, argv, &K, &y, &n); |
|
|
|
|
@@ -143,7 +146,15 @@ int main(int argc, char **argv){ |
|
|
seqMetroProcess<<<nBlocks,nThreads>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,dev_a_out,dev_b_out,trials); |
|
|
|
|
|
int *tDot; |
|
|
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot); |
|
|
CUDA_CALL(cudaMalloc((void **)&tDot,trials*sizeof(int))); |
|
|
|
|
|
float *h; |
|
|
CUDA_CALL(cudaMalloc((void **)&h,trials*sizeof(float))); |
|
|
|
|
|
sampleTdot<<<nBlocks,nThreads>>>(trials, tDot,devStates); |
|
|
|
|
|
|
|
|
mergePosterior<<<1,1>>>(trials,dev_a_out,dev_b_out,tDot,devStates,nBlocks); |
|
|
/*------ Free Memory -------------------------------------------*/ |
|
|
|
|
|
free(y); |
|
@@ -158,7 +169,16 @@ int main(int argc, char **argv){ |
|
|
return EXIT_SUCCESS; |
|
|
} |
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot){ |
|
|
__global__ void sampleTdot(int trials, int *tDot,curandState *state){ |
|
|
int id = threadIdx.x + blockIdx.x * blockDim.x; |
|
|
int u = (curand(&state[id]) % (trials-1)) + 1; |
|
|
//printf("thread %d sample: %d",id,u); |
|
|
tDot[id] = u; |
|
|
} |
|
|
|
|
|
|
|
|
__global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int *tDot, curandState *state,int nBlocks){ |
|
|
|
|
|
/* printf("\n all blocks finished\n"); |
|
|
for(int j = 0; j < trials ; j++) { |
|
|
printf(" %f ", *dev_a_out); |
|
@@ -168,6 +188,15 @@ __global__ void mergePosterior(int trials, float *dev_a_out,float *dev_b_out,int |
|
|
} |
|
|
*/ |
|
|
|
|
|
printf("\n all blocks finished\n"); |
|
|
for(int j = 0; j < nBlocks*2 ; j++) { |
|
|
// printf(" %f ", *dev_a_out); |
|
|
printf(" %d \n",*tDot); |
|
|
tDot++; |
|
|
//dev_b_out++; |
|
|
} |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|