From fd678a1ce31fa07e6dbcee8d7b34758ac7f87342 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Wed, 19 Apr 2017 20:12:57 -0400 Subject: [PATCH] A version that could run! lol --- parallel/para_gibbs.cu | 75 +++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/parallel/para_gibbs.cu b/parallel/para_gibbs.cu index a50fe13..1b04b7f 100644 --- a/parallel/para_gibbs.cu +++ b/parallel/para_gibbs.cu @@ -67,10 +67,10 @@ specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1 __host__ void load_data(int argc, char **argv, int *K, int **y, float **n); -__host__ float sample_a(float a, float b, int K, float sum_logs); -__host__ float sample_b(float a, int K, float flat_sum); - +__device__ float sample_a(curandState *state, float a, float b, int K, float sum_logs); +__device__ float sample_b(curandState *state, float a, int K, float flat_sum); __host__ float rnorm(); +__device__ float rnorm(curandState *state ); __host__ float rgamma(float a, float b); __device__ float rgamma(curandState *state, int id, float a, float b); @@ -89,8 +89,8 @@ __device__ void sample_theta_seq(float *theta, float *log_theta, int *y, float * int main(int argc, char **argv){ curandState *devStates; - float a, b, flat_sum, sum_logs, *n, *dev_n, *dev_theta, *dev_log_theta; - int i, K, *y, *dev_y, nBlocks, trials = 1000; + float a, b, *n, *dev_n, *dev_theta, *dev_log_theta; + int K, *y, *dev_y, nBlocks, trials = 1000; if(argc > 2) trials = atoi(argv[2]); @@ -167,14 +167,19 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat int start = blockIdx.x * K/nBlocks; int lengthPerBlock = K/nBlocks; //partition the data - int *yy = &y[start]; - float *nn = &n[start]; - float *sTheta = &theta[start]; - float *sLogTheta = &log_theta[start]; + int *yy; + yy = &y[start]; + float *nn; + nn = &n[start]; + + float *sTheta; + sTheta = &theta[start]; + float *sLogTheta; + sLogTheta = &log_theta[start]; printf("block id:%d\n",blockIdx.x); for(int j = 0; j < lengthPerBlock ; j++) { - printf("%d ", yy[j]); + printf("%d\n ", yy[j]); } printf("alpha, beta\n"); @@ -185,18 +190,26 @@ __global__ void seqMetroProcess(int K, int nBlocks, int *y, float *n, curandStat for(i = 0; i < trials; i++){ //sample_theta<<>>(devStates, dev_theta, dev_log_theta, dev_y, dev_n, a, b, K); - sample_theta_seq(sTheta, sLogTheta, yy, nn, a, b, K, state); - /* Make iterators for thetas and log thetas. */ - // thrust::device_ptr theta(dev_theta); - // thrust::device_ptr log_theta(dev_log_theta); + sample_theta_seq(sTheta, sLogTheta, yy, nn, a, b, lengthPerBlock, state); - /* Compute pairwise sums of thetas and log_thetas. */ - // flat_sum = thrust::reduce(theta, theta + K); - // sum_logs = thrust::reduce(log_theta, log_theta + K); - + float flat_sum=0; + for(int ii=0;ii