Permalink

jet08013
More work on the ocurse
63d2bb8
Apr 13, 2018

# coding: utf-8 | |

# #### Problem 3.10.8 | |

# | |

# Analysis of proportions: a survey was done of bicycle and | |

# other vehicular traffic in the neighborhood of the campus of the | |

# University of California, Berkeley, in the spring of 1993. | |

# Sixty city blocks were selected at random; each block was observed | |

# for one hour, and the numbers of bicycles and other vehicles traveling | |

# along that block were recorded. The sampling was stratified into six | |

# types of city blocks: busy, fairly busy, and residential streets, with | |

# and without bike routes, with ten blocks measured in each stratum. | |

# Table 3.3 displays the number of bicycles and other vehicles | |

# recorded in the study. For this problem, restrict your attention | |

# to the first four rows of the table: the data on residential streets. | |

# | |

# (a) Let $y_1$ , . . . , $y_{10}$ and $z_1$ , . . . , $z_8$ be the | |

# observed proportion of traffic that was on bicycles in the residential | |

# streets with bike lanes and with no bike lanes, respectively | |

# (so $y_1 = 16/(16 + 58)$ and $z_1 = 12/(12 + 113)$, for example). | |

# Set up a model so that the $y_i$ ’s are independent and identically | |

# distributed given parameters $\theta_y$ and the $z_i$ ’s are | |

# independent and identically distributed given parameters $\theta_z$ . | |

# | |

# (b) Set up a prior distribution that is independent in | |

# $\theta_y$ and $\theta_z$ . | |

# | |

# (c) Determine the posterior distribution for the parameters | |

# in your model and draw 1000 simulations from the posterior distribution. | |

# (Hint: $\theta_y$ and $\theta_z$ are independent in the posterior | |

# distribution, so they can be simulated independently.) | |

# | |

# (d) Let $\mu_y = E(y_i |\theta_y )$ be the mean of the distribution | |

# of the $y_i$ ’s; $\mu_y$ will be a function of $\theta_y$. | |

# Similarly, define $\mu_z$ . Using your posterior simulations from (c), | |

# plot a histogram of the posterior simulations of $\mu_y-\mu_z$, the | |

# expected difference in proportions in bicycle traffic on residential | |

# streets with and without bike lanes. We return to this example in | |

# Exercise 5.13. | |

# | |

# Gelman, Andrew; Carlin, John B.; Stern, Hal S.; Dunson, David B.; | |

# Vehtari, Aki; Rubin, Donald B.. Bayesian Data Analysis, | |

# Third Edition (Chapman & Hall/CRC Texts in Statistical Science) (Page 81). | |

# CRC Press. Kindle Edition. | |

# | |

# #### Data | |

# |Type |Bike lane? |Counts of Bikes/others| | |

# |--- |----------|----| | |

# |Residential |yes |16/58, 9/90, 10/48, 13/57, 19/103, 20/57, 18/86, 17/112, 35/273, 55/64 | | |

# |Residential |no |12/113, 1/18, 2/14, 4/44, 9/208, 7/67, 9/29, 8/154| | |

# | |

# Gelman, Andrew; Carlin, John B.; Stern, Hal S.; | |

# Dunson, David B.; Vehtari, Aki; Rubin, Donald B.. | |

# Bayesian Data Analysis, Third Edition ( | |

# Chapman & Hall/CRC Texts in Statistical Science) | |

# (Page 81). CRC Press. Kindle Edition. | |

# #### Probably best to first do 3.10.6 | |

# For that problem see the reference | |

# [Raftery, 1988](https://www.stat.washington.edu/raftery/Research/PDF/bka1988.pdf) | |

import pystan | |

import numpy as np | |

import matplotlib.pyplot as plt | |

stan_code=""" | |

data { | |

int<lower=1> N; | |

int bikes[N]; | |

int others[N]; | |

} | |

parameters { | |

real<lower=0> theta_b; | |

real<lower=0> theta_v; | |

} | |

model { | |

theta_b~uniform(0,100); | |

theta_v~uniform(0,100); | |

bikes~poisson(theta_b); | |

others~poisson(theta_v); | |

} | |

generated quantities { | |

real b_ppc; | |

real o_ppc; | |

real p ; | |

o_ppc=poisson_rng(theta_v); | |

b_ppc=poisson_rng(theta_b); | |

p=o_ppc/(o_ppc+b_ppc); | |

} | |

""" | |

sm=pystan.StanModel(model_code=stan_code) | |

fit=sm.sampling(data=dict({'N':10,'bikes':[16,9,10,13,19,20,18,17,35,55],'others':[58, 90, 48, 57, 103, 57, 86, 112, 273, 64] })) | |

print(fit.extract()) | |

print(len(fit.extract()['b_ppc'])) | |

fig,ax=plt.subplots(1,1) | |

ax.hist(fit.extract()['b_ppc'],density=True) | |

#ax[1].hist(bikes,density=True) | |

plt.show() | |