sequential/gibbs_metropolis.cpp

/*
Created by Zebulun Arendsee.
March 26, 2013

Modified by Will Landau.
June 30, 2013
will-landau.com
landau@iastate.edu

This program implements a MCMC algorithm for the following hierarchical
model:

y_k     ~ Poisson(n_k * theta_k)     k = 1, ..., K
theta_k ~ Gamma(a, b)
a       ~ Unif(0, a0)
b       ~ Unif(0, b0)

We let a0 and b0 be arbitrarily large.

Arguments:
    1) input filename
        With two space delimited columns holding integer values for
        y and float values for n.
    2) number of trials (1000 by default)

Output: A comma delimited file containing a column for a, b, and each
theta. All output is written to stdout.

Example dataset:

$ head -3 data.txt
4 0.91643
23 3.23709
7 0.40103

Example of compilation and execution:

$ nvcc gibbs_metropolis.cu -o gibbs
$ ./gibbs mydata.txt 2500 > output.csv
$

This code borrows from the nVidia developer zone documentation,
specifically http://docs.nvidia.com/cuda/curand/index.html#topic_1_2_1
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <random>

#define PI 3.14159265359f

void load_data(int argc, char **argv, int *K, int **y, float **n);

float sample_a(float a, float b, int K, float sum_logs);
float sample_b(float a, int K, float flat_sum);

float rnorm();

float rgamma(float a, float b);

void sample_theta(float *theta, float *log_theta,
                             int *y, float *n, float a, float b, int K);

int sum(int * array, int K);
int sum(float * array, int K);

int main(int argc, char **argv){

  float a, b, flat_sum, sum_logs, *n, *thetas, *log_thetas;
  int i, K, *y, trials = 1000;

  if(argc > 2)
    trials = atoi(argv[2]);

  load_data(argc, argv, &K, &y, &n);


  /*------ Allocate memory -----------------------------------------*/

  /* Allocate space for theta and log_theta on device and host */
  thetas = (float *) malloc(K * sizeof(float));
  log_thetas = (float *) malloc(K * sizeof(float));

  /*------ MCMC ----------------------------------------------------*/

  printf("alpha, beta\n");

  /* starting values of hyperparameters */
  a = 20;
  b = 1;

  /* Steps of MCMC */
  for(i = 0; i < trials; i++){
    sample_theta(thetas, log_thetas, y, n, a, b, K);

    /* Compute pairwise sums of thetas and log_thetas. */
    flat_sum = sum(thetas, K);
    sum_logs = sum(log_thetas, K);

    /* Sample hyperparameters. */
    a = sample_a(a, b, K, sum_logs);
    b = sample_b(a, K, flat_sum);

    /* print hyperparameters. */
    printf("%f, %f\n", a, b);
  }

  /*------ Free Memory -------------------------------------------*/

  free(y);
  free(n);
  free(thetas);
  free(log_thetas);

  return EXIT_SUCCESS;
}


/*
* Take sum of array given the length of the array.
*/

int sum(int * array, int array_length) {
  int tot = 0;
  for (int i = 0; i < array_length; i++)
    tot += array[i];
  return tot;
}


int sum(float * array, int array_length) {
  int tot = 0;
  for (int i = 0; i < array_length; i++)
    tot += array[i];
  return tot;
}

/*
 *  Read in data.
 */

void load_data(int argc, char **argv, int *K, int **y, float **n){
  int k;
  char line[128];
  FILE *fp;

  if(argc > 1){
    fp = fopen(argv[1], "r");
  } else {
    printf("Please provide input filename\n");
    exit(EXIT_FAILURE);
  }

  if(fp == NULL){
    printf("Cannot read file \n");
    exit(EXIT_FAILURE);
  }

  *K = 0;
  while( fgets (line, sizeof line, fp) != NULL )
    (*K)++;

  rewind(fp);

  *y = (int*) malloc((*K) * sizeof(int));
  *n = (float*) malloc((*K) * sizeof(float));

  for(k = 0; k < *K; k++)
    fscanf(fp, "%d %f", *y + k, *n + k);

  fclose(fp);
}


/*
 *  Metropolis algorithm for producing random a values.
 *  The proposal distribution in normal with a variance that
 *  is adjusted at each step.
 */

float sample_a(float a, float b, int K, float sum_logs){

  static float sigma = 2;
  float U, log_acceptance_ratio, proposal = rnorm() * sigma + a;

  if(proposal <= 0)
    return a;

  log_acceptance_ratio = (proposal - a) * sum_logs +
                         K * (proposal - a) * log(b) -
                         K * (lgamma(proposal) - lgamma(a));

  U = rand() / float(RAND_MAX);

  if(log(U) < log_acceptance_ratio){
    sigma *= 1.1;
    return proposal;
  } else {
    sigma /= 1.1;
    return a;
  }
}


/*
 *  Sample b from a gamma distribution.
 */

float sample_b(float a, int K, float flat_sum){

  float hyperA = K * a + 1;
  float hyperB = flat_sum;
  return rgamma(hyperA, hyperB);
}


/*
 *  Box-Muller Transformation: Generate one standard normal variable.
 *
 *  This algorithm can be more efficiently used by producing two
 *  random normal variables. However, for the CPU, much faster
 *  algorithms are possible (e.g. the Ziggurat Algorithm);
 *
 *  This is actually the algorithm chosen by NVIDIA to calculate
 *  normal random variables on the GPU.
 */

float rnorm(){

  float U1 = rand() / float(RAND_MAX);
  float U2 = rand() / float(RAND_MAX);
  float V1 = sqrt(-2 * log(U1)) * cos(2 * PI * U2);
  /* float V2 = sqrt(-2 * log(U2)) * cos(2 * PI * U1); */
  return V1;
}


/*
 *  See device rgamma function. This is probably not the
 *   fastest way to generate random gamma variables on a CPU.
 */

float rgamma(float a, float b){

  float d = a - 1.0 / 3;
  float Y, U, v;

  while(1){
    Y = rnorm();
    v = pow((1 + Y / sqrt(9 * d)), 3);

    /* Necessary to avoid taking the log of a negative number later. */
    if(v <= 0)
      continue;

    U = rand() / float(RAND_MAX);

    /* Accept the sample under the following condition.
       Otherwise repeat loop. */
    if(log(U) < 0.5 * pow(Y,2) + d * (1 - v + log(v)))
            return d * v / b;
  }
}


/*
 *  Sample each theta from the appropriate gamma distribution
 */

void sample_theta(float *theta, float *log_theta, int *y, float *n,
                             float a, float b, int K){

  float hyperA, hyperB;
  std::default_random_engine generator;

  for ( int i = 0; i < K; i++ ){
    hyperA = a + y[i];
    hyperB = b + n[i];
    std::gamma_distribution<double> distribution(hyperA, hyperB);
    theta[i] = distribution(generator);
    log_theta[i] = log(theta[i]);
  }
}