From 63d2bb8a6119c0c5f85846331402bf022ac215b3 Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Fri, 13 Apr 2018 16:38:27 -0400 Subject: [PATCH] More work on the ocurse --- .vscode/launch.json | 116 + BDA 3.10.8.html | 11825 ++++++++++++++++++++++++++++++++++++++++++ BDA 3.10.8.py | 107 + 3 files changed, 12048 insertions(+) create mode 100644 .vscode/launch.json create mode 100644 BDA 3.10.8.html create mode 100644 BDA 3.10.8.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..608b4f0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,116 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}" + }, + { + "name": "Python: Attach", + "type": "python", + "request": "attach", + "localRoot": "${workspaceFolder}", + "remoteRoot": "${workspaceFolder}", + "port": 3000, + "secret": "my_secret", + "host": "localhost" + }, + { + "name": "Python: Terminal (integrated)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + { + "name": "Python: Terminal (external)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "externalTerminal" + }, + { + "name": "Python: Django", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/manage.py", + "args": [ + "runserver", + "--noreload", + "--nothreading" + ], + "debugOptions": [ + "RedirectOutput", + "Django" + ] + }, + { + "name": "Python: Flask (0.11.x or later)", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "${workspaceFolder}/app.py" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload" + ] + }, + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "module.name" + }, + { + "name": "Python: Pyramid", + "type": "python", + "request": "launch", + "args": [ + "${workspaceFolder}/development.ini" + ], + "debugOptions": [ + "RedirectOutput", + "Pyramid" + ] + }, + { + "name": "Python: Watson", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/console.py", + "args": [ + "dev", + "runserver", + "--noreload=True" + ] + }, + { + "name": "Python: All debug Options", + "type": "python", + "request": "launch", + "pythonPath": "${config:python.pythonPath}", + "program": "${file}", + "module": "module.name", + "env": { + "VAR1": "1", + "VAR2": "2" + }, + "envFile": "${workspaceFolder}/.env", + "args": [ + "arg1", + "arg2" + ], + "debugOptions": [ + "RedirectOutput" + ] + } + ] +} \ No newline at end of file diff --git a/BDA 3.10.8.html b/BDA 3.10.8.html new file mode 100644 index 0000000..e194613 --- /dev/null +++ b/BDA 3.10.8.html @@ -0,0 +1,11825 @@ + + + +BDA 3.10.8 + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+

Problem 3.10.8

Analysis of proportions: a survey was done of bicycle and other vehicular traffic in the neighborhood of the campus of the University of California, Berkeley, in the spring of 1993. Sixty city blocks were selected at random; each block was observed for one hour, and the numbers of bicycles and other vehicles traveling along that block were recorded. The sampling was stratified into six types of city blocks: busy, fairly busy, and residential streets, with and without bike routes, with ten blocks measured in each stratum. Table 3.3 displays the number of bicycles and other vehicles recorded in the study. For this problem, restrict your attention to the first four rows of the table: the data on residential streets.

+

(a) Let $y_1$ , . . . , $y_{10}$ and $z_1$ , . . . , $z_8$ be the observed proportion of traffic that was on bicycles in the residential streets with bike lanes and with no bike lanes, respectively (so $y_1 = 16/(16 + 58)$ and $z_1 = 12/(12 + 113)$, for example). Set up a model so that the $y_i$ ’s are independent and identically distributed given parameters $\theta_y$ and the $z_i$ ’s are independent and identically distributed given parameters $\theta_z$ .

+

(b) Set up a prior distribution that is independent in $\theta_y$ and $\theta_z$ .

+

(c) Determine the posterior distribution for the parameters in your model and draw 1000 simulations from the posterior distribution. (Hint: $\theta_y$ and $\theta_z$ are independent in the posterior distribution, so they can be simulated independently.)

+

(d) Let $\mu_y = E(y_i |\theta_y )$ be the mean of the distribution of the $y_i$ ’s; $\mu_y$ will be a function of $\theta_y$. Similarly, define $\mu_z$ . Using your posterior simulations from (c), plot a histogram of the posterior simulations of $\mu_y-\mu_z$, the expected difference in proportions in bicycle traffic on residential streets with and without bike lanes. We return to this example in Exercise 5.13.

+

Gelman, Andrew; Carlin, John B.; Stern, Hal S.; Dunson, David B.; Vehtari, Aki; Rubin, Donald B.. Bayesian Data Analysis, Third Edition (Chapman & Hall/CRC Texts in Statistical Science) (Page 81). CRC Press. Kindle Edition.

+

Data

+ + + + + + + + + + + + + + + + + + +
TypeBike lane?Counts of Bikes/others
Residentialyes16/58, 9/90, 10/48, 13/57, 19/103, 20/57, 18/86, 17/112, 35/273, 55/64
Residentialno12/113, 1/18, 2/14, 4/44, 9/208, 7/67, 9/29, 8/154
+

Gelman, Andrew; Carlin, John B.; Stern, Hal S.; Dunson, David B.; Vehtari, Aki; Rubin, Donald B.. Bayesian Data Analysis, Third Edition (Chapman & Hall/CRC Texts in Statistical Science) (Page 81). CRC Press. Kindle Edition.

+ +
+
+
+
+
+
+
+

Probably best to first do 3.10.6

For that problem see the reference Raftery, 1988

+ +
+
+
+
+
+ + + + + + diff --git a/BDA 3.10.8.py b/BDA 3.10.8.py new file mode 100644 index 0000000..e0d645b --- /dev/null +++ b/BDA 3.10.8.py @@ -0,0 +1,107 @@ + +# coding: utf-8 + +# #### Problem 3.10.8 +# +# Analysis of proportions: a survey was done of bicycle and +# other vehicular traffic in the neighborhood of the campus of the +# University of California, Berkeley, in the spring of 1993. +# Sixty city blocks were selected at random; each block was observed +# for one hour, and the numbers of bicycles and other vehicles traveling +# along that block were recorded. The sampling was stratified into six +# types of city blocks: busy, fairly busy, and residential streets, with +# and without bike routes, with ten blocks measured in each stratum. +# Table 3.3 displays the number of bicycles and other vehicles +# recorded in the study. For this problem, restrict your attention +# to the first four rows of the table: the data on residential streets. +# +# (a) Let $y_1$ , . . . , $y_{10}$ and $z_1$ , . . . , $z_8$ be the +# observed proportion of traffic that was on bicycles in the residential +# streets with bike lanes and with no bike lanes, respectively +# (so $y_1 = 16/(16 + 58)$ and $z_1 = 12/(12 + 113)$, for example). +# Set up a model so that the $y_i$ ’s are independent and identically +# distributed given parameters $\theta_y$ and the $z_i$ ’s are +# independent and identically distributed given parameters $\theta_z$ . +# +# (b) Set up a prior distribution that is independent in +# $\theta_y$ and $\theta_z$ . +# +# (c) Determine the posterior distribution for the parameters +# in your model and draw 1000 simulations from the posterior distribution. +# (Hint: $\theta_y$ and $\theta_z$ are independent in the posterior +# distribution, so they can be simulated independently.) +# +# (d) Let $\mu_y = E(y_i |\theta_y )$ be the mean of the distribution +# of the $y_i$ ’s; $\mu_y$ will be a function of $\theta_y$. +# Similarly, define $\mu_z$ . Using your posterior simulations from (c), +# plot a histogram of the posterior simulations of $\mu_y-\mu_z$, the +# expected difference in proportions in bicycle traffic on residential +# streets with and without bike lanes. We return to this example in +# Exercise 5.13. +# +# Gelman, Andrew; Carlin, John B.; Stern, Hal S.; Dunson, David B.; +# Vehtari, Aki; Rubin, Donald B.. Bayesian Data Analysis, +# Third Edition (Chapman & Hall/CRC Texts in Statistical Science) (Page 81). +# CRC Press. Kindle Edition. +# +# #### Data +# |Type |Bike lane? |Counts of Bikes/others| +# |--- |----------|----| +# |Residential |yes |16/58, 9/90, 10/48, 13/57, 19/103, 20/57, 18/86, 17/112, 35/273, 55/64 | +# |Residential |no |12/113, 1/18, 2/14, 4/44, 9/208, 7/67, 9/29, 8/154| +# +# Gelman, Andrew; Carlin, John B.; Stern, Hal S.; +# Dunson, David B.; Vehtari, Aki; Rubin, Donald B.. +# Bayesian Data Analysis, Third Edition ( +# Chapman & Hall/CRC Texts in Statistical Science) +# (Page 81). CRC Press. Kindle Edition. + +# #### Probably best to first do 3.10.6 +# For that problem see the reference +# [Raftery, 1988](https://www.stat.washington.edu/raftery/Research/PDF/bka1988.pdf) +import pystan +import numpy as np +import matplotlib.pyplot as plt + +stan_code=""" +data { + int N; + + int bikes[N]; + int others[N]; +} + +parameters { + real theta_b; + real theta_v; + +} + +model { + theta_b~uniform(0,100); + theta_v~uniform(0,100); + + bikes~poisson(theta_b); + others~poisson(theta_v); +} + +generated quantities { + real b_ppc; + real o_ppc; + real p ; + + o_ppc=poisson_rng(theta_v); + b_ppc=poisson_rng(theta_b); + + p=o_ppc/(o_ppc+b_ppc); +} +""" +sm=pystan.StanModel(model_code=stan_code) +fit=sm.sampling(data=dict({'N':10,'bikes':[16,9,10,13,19,20,18,17,35,55],'others':[58, 90, 48, 57, 103, 57, 86, 112, 273, 64] })) +print(fit.extract()) +print(len(fit.extract()['b_ppc'])) +fig,ax=plt.subplots(1,1) +ax.hist(fit.extract()['b_ppc'],density=True) +#ax[1].hist(bikes,density=True) +plt.show() +