Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
A version that could run! lol
  • Loading branch information
yuz12012 committed Apr 20, 2017
1 parent 81d0a83 commit fd678a1
Showing 1 changed file with 49 additions and 26 deletions.
75 changes: 49 additions & 26 deletions parallel/para_gibbs.cu
Expand Up @@ -67,10 +67,10 @@ specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1


__host__ void load_data(int argc, char **argv, int *K, int **y, float **n); __host__ void load_data(int argc, char **argv, int *K, int **y, float **n);


__host__ float sample_a(float a, float b, int K, float sum_logs); __device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs);
__host__ float sample_b(float a, int K, float flat_sum); __device__ float sample_b(curandState *state, float a, int K, float flat_sum);

__host__ float rnorm(); __host__ float rnorm();
__device__ float rnorm(curandState *state );
__host__ float rgamma(float a, float b); __host__ float rgamma(float a, float b);


__device__ float rgamma(curandState *state, int id, float a, float b); __device__ float rgamma(curandState *state, int id, float a, float b);
Expand All @@ -89,8 +89,8 @@ __device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float *
int main(int argc, char **argv){ int main(int argc, char **argv){


curandState *devStates; curandState *devStates;
float a, b, flat_sum, sum_logs, *n, *dev_n, *dev_theta, *dev_log_theta; float a, b, *n, *dev_n, *dev_theta, *dev_log_theta;
int i, K, *y, *dev_y, nBlocks, trials = 1000; int K, *y, *dev_y, nBlocks, trials = 1000;


if(argc > 2) if(argc > 2)
trials = atoi(argv[2]); trials = atoi(argv[2]);
Expand Down Expand Up @@ -167,14 +167,19 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat
int start = blockIdx.x * K/nBlocks; int start = blockIdx.x * K/nBlocks;
int lengthPerBlock = K/nBlocks; int lengthPerBlock = K/nBlocks;
//partition the data //partition the data
int *yy = &y[start]; int *yy;
float *nn = &n[start]; yy = &y[start];
float *sTheta = &theta[start]; float *nn;
float *sLogTheta = &log_theta[start]; nn = &n[start];

float *sTheta;
sTheta = &theta[start];
float *sLogTheta;
sLogTheta = &log_theta[start];


printf("block id:%d\n",blockIdx.x); printf("block id:%d\n",blockIdx.x);
for(int j = 0; j < lengthPerBlock ; j++) { for(int j = 0; j < lengthPerBlock ; j++) {
printf("%d ", yy[j]); printf("%d\n ", yy[j]);
} }
printf("alpha, beta\n"); printf("alpha, beta\n");


Expand All @@ -185,18 +190,26 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat


for(i = 0; i < trials; i++){ for(i = 0; i < trials; i++){
//sample_theta<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, dev_theta, dev_log_theta, dev_y, dev_n, a, b, K); //sample_theta<<<nBlocks, THREADS_PER_BLOCK>>>(devStates, dev_theta, dev_log_theta, dev_y, dev_n, a, b, K);
sample_theta_seq(sTheta, sLogTheta, yy, nn, a, b, K, state); sample_theta_seq(sTheta, sLogTheta, yy, nn, a, b, lengthPerBlock, state);
/* Make iterators for thetas and log thetas. */
// thrust::device_ptr<float> theta(dev_theta);
// thrust::device_ptr<float> log_theta(dev_log_theta);


/* Compute pairwise sums of thetas and log_thetas. */ float flat_sum=0;
// flat_sum = thrust::reduce(theta, theta + K); for(int ii=0;ii<lengthPerBlock;ii++){
// sum_logs = thrust::reduce(log_theta, log_theta + K); //*ptr refers to the value at address

flat_sum = flat_sum + *sTheta;
sTheta++;
}

float sum_logs=0;
for(int ii=0;ii<lengthPerBlock;ii++){
//*ptr refers to the value at address
sum_logs = sum_logs + *sLogTheta;
sLogTheta++;
}


/* Sample hyperparameters. */ /* Sample hyperparameters. */
// a = sample_a(a, b, K, sum_logs); a = sample_a(state, a, b, lengthPerBlock, sum_logs);
// b = sample_b(a, K, flat_sum); b = sample_b(state, a, lengthPerBlock, flat_sum);


/* print hyperparameters. */ /* print hyperparameters. */
printf("%f, %f\n", a, b); printf("%f, %f\n", a, b);
Expand Down Expand Up @@ -247,10 +260,10 @@ __host__ void load_data(int argc, char **argv, int *K, int **y, float **n){
* is adjusted at each step. * is adjusted at each step.
*/ */


__host__ float sample_a(float a, float b, int K, float sum_logs){ __device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs){


static float sigma = 2; static float sigma = 2;
float U, log_acceptance_ratio, proposal = rnorm() * sigma + a; float U, log_acceptance_ratio, proposal = rnorm(state) * sigma + a;


if(proposal <= 0) if(proposal <= 0)
return a; return a;
Expand All @@ -259,7 +272,7 @@ __host__ float sample_a(float a, float b, int K, float sum_logs){
K * (proposal - a) * log(b) - K * (proposal - a) * log(b) -
K * (lgamma(proposal) - lgamma(a)); K * (lgamma(proposal) - lgamma(a));


U = rand() / float(RAND_MAX); U = curand(state) / float(RAND_MAX);


if(log(U) < log_acceptance_ratio){ if(log(U) < log_acceptance_ratio){
sigma *= 1.1; sigma *= 1.1;
Expand All @@ -275,11 +288,11 @@ __host__ float sample_a(float a, float b, int K, float sum_logs){
* Sample b from a gamma distribution. * Sample b from a gamma distribution.
*/ */


__host__ float sample_b(float a, int K, float flat_sum){ __device__ float sample_b(curandState *state, float a, int K, float flat_sum){


float hyperA = K * a + 1; float hyperA = K * a + 1;
float hyperB = flat_sum; float hyperB = flat_sum;
return rgamma(hyperA, hyperB); return rgamma(state,0,hyperA, hyperB);
} }




Expand All @@ -293,7 +306,7 @@ __host__ float sample_b(float a, int K, float flat_sum){
* This is actually the algorithm chosen by NVIDIA to calculate * This is actually the algorithm chosen by NVIDIA to calculate
* normal random variables on the GPU. * normal random variables on the GPU.
*/ */

__host__ float rnorm(){ __host__ float rnorm(){


float U1 = rand() / float(RAND_MAX); float U1 = rand() / float(RAND_MAX);
Expand All @@ -303,6 +316,16 @@ __host__ float rnorm(){
return V1; return V1;
} }



__device__ float rnorm(curandState *state){

float U1 = curand(state) / float(RAND_MAX);
float U2 = curand(state) / float(RAND_MAX);
float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2);
/* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */
return V1;
}



/* /*
* See device rgamma function. This is probably not the * See device rgamma function. This is probably not the
Expand Down

0 comments on commit fd678a1

Please sign in to comment.