|
@@ -67,10 +67,10 @@ specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1 |
|
|
|
|
|
__host__ void load_data(int argc, char **argv, int *K, int **y, float **n); |
|
|
|
|
|
__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs); |
|
|
__device__ float sample_b(curandState *state, float a, int K, float flat_sum); |
|
|
__device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs); |
|
|
__device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum); |
|
|
__host__ float rnorm(); |
|
|
__device__ float rnorm(curandState *state ); |
|
|
__device__ float rnorm(curandState *state,int id ); |
|
|
__host__ float rgamma(float a, float b); |
|
|
|
|
|
__device__ float rgamma(curandState *state, int id, float a, float b); |
|
@@ -86,6 +86,8 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat |
|
|
__device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *n, |
|
|
float a, float b, int K, curandState *state); |
|
|
|
|
|
__global__ void mergePosterior(); |
|
|
|
|
|
int main(int argc, char **argv){ |
|
|
|
|
|
curandState *devStates; |
|
@@ -124,8 +126,10 @@ int main(int argc, char **argv){ |
|
|
//nBlocks = (K + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; |
|
|
nBlocks = 500; |
|
|
setup_kernel<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, 0, K); |
|
|
printf("alpha, beta\n"); |
|
|
seqMetroProcess<<<nBlocks,1>>>(K,nBlocks,dev_y,dev_n,devStates,dev_theta,dev_log_theta,a,b,trials); |
|
|
|
|
|
//mergePosterior<<<nBlocks,1>>>(); |
|
|
/*------ Free Memory -------------------------------------------*/ |
|
|
|
|
|
free(y); |
|
@@ -140,6 +144,11 @@ int main(int argc, char **argv){ |
|
|
return EXIT_SUCCESS; |
|
|
} |
|
|
|
|
|
__global__ void mergePosterior(){ |
|
|
printf("\n all blocks finished\n"); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
* Sample each theta from the appropriate gamma distribution |
|
@@ -163,25 +172,30 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat |
|
|
float a, float b, int trials){ |
|
|
/*------ MCMC ----------------------------------------------------*/ |
|
|
int i; |
|
|
printf("K: %d \n ",&K); |
|
|
|
|
|
int start = blockIdx.x * K/nBlocks; |
|
|
int lengthPerBlock = K/nBlocks; |
|
|
//partition the data |
|
|
int *yy; |
|
|
yy = &y[start]; |
|
|
yy = y+start; |
|
|
float *nn; |
|
|
nn = &n[start]; |
|
|
nn = n+start; |
|
|
|
|
|
float *sTheta; |
|
|
sTheta = &theta[start]; |
|
|
sTheta = theta+start; |
|
|
float *sLogTheta; |
|
|
sLogTheta = &log_theta[start]; |
|
|
sLogTheta = log_theta+start; |
|
|
|
|
|
printf("block id:%d\n",blockIdx.x); |
|
|
printf("length per block:%d\n",&lengthPerBlock); |
|
|
printf("start is:%d\n",&start); |
|
|
printf("partial data under block id: %d \n ",blockIdx.x); |
|
|
for(int j = 0; j < lengthPerBlock ; j++) { |
|
|
printf("%d\n ", yy[j]); |
|
|
} |
|
|
printf("alpha, beta\n"); |
|
|
printf(" %d \n ", yy); |
|
|
yy++; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -205,11 +219,12 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat |
|
|
sum_logs = sum_logs + *sLogTheta; |
|
|
sLogTheta++; |
|
|
} |
|
|
|
|
|
printf("sum of theta is %f\n",flat_sum); |
|
|
printf("sum of logtheta is %f\n",sum_logs); |
|
|
|
|
|
/* Sample hyperparameters. */ |
|
|
a = sample_a(state, a, b, lengthPerBlock, sum_logs); |
|
|
b = sample_b(state, a, lengthPerBlock, flat_sum); |
|
|
a = sample_a(state,threadIdx.x, a, b, lengthPerBlock, sum_logs); |
|
|
b = sample_b(state,threadIdx.x, a, lengthPerBlock, flat_sum); |
|
|
|
|
|
/* print hyperparameters. */ |
|
|
printf("%f, %f\n", a, b); |
|
@@ -260,10 +275,10 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){ |
|
|
* is adjusted at each step. |
|
|
*/ |
|
|
|
|
|
__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs){ |
|
|
__device__ float sample_a(curandState *state,int id, float a, float b, int K, float sum_logs){ |
|
|
|
|
|
static float sigma = 2; |
|
|
float U, log_acceptance_ratio, proposal = rnorm(state) * sigma + a; |
|
|
float U, log_acceptance_ratio, proposal = rnorm(state,id) * sigma + a; |
|
|
|
|
|
if(proposal <= 0) |
|
|
return a; |
|
@@ -272,7 +287,7 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum |
|
|
K * (proposal - a) * log(b) - |
|
|
K * (lgamma(proposal) - lgamma(a)); |
|
|
|
|
|
U = curand(state) / float(RAND_MAX); |
|
|
U = curand(&state[id]) / float(RAND_MAX); |
|
|
|
|
|
if(log(U) < log_acceptance_ratio){ |
|
|
sigma *= 1.1; |
|
@@ -288,11 +303,11 @@ __device__ float sample_a(curandState *state, float a, float b, int K, float sum |
|
|
* Sample b from a gamma distribution. |
|
|
*/ |
|
|
|
|
|
__device__ float sample_b(curandState *state, float a, int K, float flat_sum){ |
|
|
__device__ float sample_b(curandState *state,int id, float a, int K, float flat_sum){ |
|
|
|
|
|
float hyperA = K * a + 1; |
|
|
float hyperB = flat_sum; |
|
|
return rgamma(state,0,hyperA, hyperB); |
|
|
return rgamma(state,id,hyperA, hyperB); |
|
|
} |
|
|
|
|
|
|
|
@@ -317,10 +332,10 @@ __host__ float rnorm(){ |
|
|
} |
|
|
|
|
|
|
|
|
__device__ float rnorm(curandState *state){ |
|
|
__device__ float rnorm(curandState *state,int id){ |
|
|
|
|
|
float U1 = curand(state) / float(RAND_MAX); |
|
|
float U2 = curand(state) / float(RAND_MAX); |
|
|
float U1 = curand(&state[id]) / float(RAND_MAX); |
|
|
float U2 = curand(&state[id]) / float(RAND_MAX); |
|
|
float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2); |
|
|
/* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */ |
|
|
return V1; |
|
|