From 770e7f432c0567f029e56c820b6302eab0c60928 Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Fri, 27 Apr 2018 09:15:32 -0400 Subject: [PATCH 1/8] added 4.7.5 --- BDA 4.7.5.ipynb | 166 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 BDA 4.7.5.ipynb diff --git a/BDA 4.7.5.ipynb b/BDA 4.7.5.ipynb new file mode 100644 index 0000000..1320e4b --- /dev/null +++ b/BDA 4.7.5.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Problem 4.7.5.\n", + "\n", + "Approximate mean and variance.\n", + "1. Suppose x and y are independent normally distributed random variables, where x~N(4,1) and y~N(3,2). What are the mean and standard deviations of y/x? Compute this using simulation.\n", + "\n", + "2. Do the same computation without simulation.\n", + "\n", + "3. What assumptions do you need for part (2)?" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from scipy.stats import norm" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mean: 0.8079667283774233 sd: 0.6574088802979349\n" + ] + } + ], + "source": [ + "x=norm(4.0,1.0)\n", + "y=norm(3.0,2.0)\n", + "\n", + "samples_x=x.rvs(10000)\n", + "samples_y=y.rvs(10000)\n", + "z=samples_y/samples_x\n", + "print('mean:',np.mean(z),'sd:',np.sqrt(np.var(z)))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "h=norm(.807,.657)\n", + "fig,ax=plt.subplots(2,2)\n", + "ax[0,0].set_xlim(-5,5)\n", + "j=ax[0,0].hist(z,bins=100,density=True)\n", + "j=ax[0,1].hist(samples_x,bins=50,density=True)\n", + "j=ax[1,1].hist(samples_y,bins=50,density=True)\n", + "ax[0,0].plot(np.linspace(-5,5,100),h.pdf(np.linspace(-5,5,100)))\n", + "\n", + "b=plt.hist(z,bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 1.000e+00, 1.000e+00, 6.000e+00, 7.000e+00, 1.800e+01,\n", + " 5.800e+01, 1.330e+02, 3.230e+02, 6.530e+02, 1.059e+03, 1.401e+03,\n", + " 1.615e+03, 1.410e+03, 1.124e+03, 7.740e+02, 5.150e+02, 3.100e+02,\n", + " 2.150e+02, 1.320e+02, 7.000e+01, 5.800e+01, 2.700e+01, 2.600e+01,\n", + " 2.200e+01, 7.000e+00, 5.000e+00, 7.000e+00, 4.000e+00, 2.000e+00,\n", + " 3.000e+00, 2.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 1.000e+00,\n", + " 0.000e+00, 0.000e+00, 2.000e+00, 0.000e+00, 0.000e+00, 1.000e+00,\n", + " 0.000e+00, 1.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00]),\n", + " array([-9.34383882, -9.13726736, -8.9306959 , -8.72412443, -8.51755297,\n", + " -8.31098151, -8.10441005, -7.89783858, -7.69126712, -7.48469566,\n", + " -7.2781242 , -7.07155274, -6.86498127, -6.65840981, -6.45183835,\n", + " -6.24526689, -6.03869543, -5.83212396, -5.6255525 , -5.41898104,\n", + " -5.21240958, -5.00583812, -4.79926665, -4.59269519, -4.38612373,\n", + " -4.17955227, -3.97298081, -3.76640934, -3.55983788, -3.35326642,\n", + " -3.14669496, -2.9401235 , -2.73355203, -2.52698057, -2.32040911,\n", + " -2.11383765, -1.90726619, -1.70069472, -1.49412326, -1.2875518 ,\n", + " -1.08098034, -0.87440888, -0.66783741, -0.46126595, -0.25469449,\n", + " -0.04812303, 0.15844844, 0.3650199 , 0.57159136, 0.77816282,\n", + " 0.98473428, 1.19130575, 1.39787721, 1.60444867, 1.81102013,\n", + " 2.01759159, 2.22416306, 2.43073452, 2.63730598, 2.84387744,\n", + " 3.0504489 , 3.25702037, 3.46359183, 3.67016329, 3.87673475,\n", + " 4.08330621, 4.28987768, 4.49644914, 4.7030206 , 4.90959206,\n", + " 5.11616352, 5.32273499, 5.52930645, 5.73587791, 5.94244937,\n", + " 6.14902083, 6.3555923 , 6.56216376, 6.76873522, 6.97530668,\n", + " 7.18187815, 7.38844961, 7.59502107, 7.80159253, 8.00816399,\n", + " 8.21473546, 8.42130692, 8.62787838, 8.83444984, 9.0410213 ,\n", + " 9.24759277, 9.45416423, 9.66073569, 9.86730715, 10.07387861,\n", + " 10.28045008, 10.48702154, 10.693593 , 10.90016446, 11.10673592,\n", + " 11.31330739]),\n", + " )" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From aa114e22fee44b01a869476c89774f97e12ca604 Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Fri, 27 Apr 2018 09:33:26 -0400 Subject: [PATCH 2/8] Added a useful formula page --- Useful Formulae.ipynb | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 Useful Formulae.ipynb diff --git a/Useful Formulae.ipynb b/Useful Formulae.ipynb new file mode 100644 index 0000000..fd0c133 --- /dev/null +++ b/Useful Formulae.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conjugate Normal Distributions (known variance)\n", + "\n", + "We are trying to learn about the unknown mean of a normal distribution with known variance. \n", + "We choose a prior distribution is normal with mean $\\mu_{0}$ and variance $\\tau_{0}^2$. \n", + "We draw $n$ values $y_1,\\ldots, y_n$ from the distribution with known variance $\\sigma^2$. The posterior distribution\n", + "$p(\\mu|y_1,\\ldots,y_n)=p(y_1,\\ldots,y_n|\\mu)p(\\mu)$ is again normal. Let \n", + "$$\n", + "\\overline{y}=\\frac{1}{n}\\sum_{i=1}^{n} y_i\n", + "$$\n", + "be the sample mean. \n", + "\n", + "The posterior variance\n", + "is\n", + "$$\\frac{1}{\\tau_1^2}=\\frac{1}{\\tau_0^2}+\\frac{n}{\\sigma^2}$$\n", + "and the posterior mean is\n", + "$$\n", + "\\mu_1=\\frac{\\frac{\\mu_0}{\\tau_0^2}+\\frac{n\\overline{y}}{\\sigma^2}}{\\frac{1}{\\tau_{1}^2}}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def posterior(prior_mean,prior_variance,sample_mean,pop_variance,n):\n", + " post_var=1/((1/prior_variance) + n/pop_variance)\n", + " post_mean=(prior_mean/prior_variance+sample_mean*n/pop_variance)/(1/post_var)\n", + " return post_mean, post_var" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 1715a6d08308697b3c286fc2368abd85d934f85d Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Fri, 27 Apr 2018 12:01:51 -0400 Subject: [PATCH 3/8] updated useful formulae --- Useful Formulae.ipynb | 89 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/Useful Formulae.ipynb b/Useful Formulae.ipynb index fd0c133..3fc65f4 100644 --- a/Useful Formulae.ipynb +++ b/Useful Formulae.ipynb @@ -21,20 +21,103 @@ "and the posterior mean is\n", "$$\n", "\\mu_1=\\frac{\\frac{\\mu_0}{\\tau_0^2}+\\frac{n\\overline{y}}{\\sigma^2}}{\\frac{1}{\\tau_{1}^2}}\n", - "$$\n" + "$$\n", + "\n", + "The posterior sampling distribution $\\theta$ is\n", + "$$\n", + "p( z |y)=\\int_{\\theta} p(z|\\theta) d\\theta\n", + "$$\n", + "is a normal distribution with mean equal to the posterior mean $\\mu_1$ and variance equal to $\\sigma^2+\\tau_1^2$\n", + "where $\\tau_1$ is the posterior variance.\n", + "\n", + "See Pages 39-42 of BDA (Section 2.5) for more information." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ + "from scipy.stats import norm\n", + "import numpy as np\n", "def posterior(prior_mean,prior_variance,sample_mean,pop_variance,n):\n", " post_var=1/((1/prior_variance) + n/pop_variance)\n", " post_mean=(prior_mean/prior_variance+sample_mean*n/pop_variance)/(1/post_var)\n", - " return post_mean, post_var" + " return post_mean, post_var\n", + "\n", + "def post_sample(y,prior_mean,prior_variance,sample_mean,pop_variance,n):\n", + " post_mean,post_var=posterior(prior_mean,prior_variance,sample_mean,pop_variance,n)\n", + " return norm.pdf(y,post_mean,np.sqrt(pop_variance+post_var))\n", + " " ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7403867575800461" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "post_sample(-.25,1,.25,-.25,1,10)+post_sample(-.25,-1,.25,-.25,1,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.05095226579074726" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".1*post_sample(-.25,-1,.25,-.25,1,10)/(post_sample(-.25,1,.25,-.25,1,10)+post_sample(-.25,-1,.25,-.25,1,10))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.4414296078832747" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".9*post_sample(-.25,1,.25,-.25,1,10)/(post_sample(-.25,1,.25,-.25,1,10)+post_sample(-.25,-1,.25,-.25,1,10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 8e76a6b3b1770c250e246d97bcfbce6de7f51bae Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Fri, 27 Apr 2018 14:42:03 -0400 Subject: [PATCH 4/8] 5.9.8 added (in progress) --- BDA 5.9.8.ipynb | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 BDA 5.9.8.ipynb diff --git a/BDA 5.9.8.ipynb b/BDA 5.9.8.ipynb new file mode 100644 index 0000000..9a60fc0 --- /dev/null +++ b/BDA 5.9.8.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Discrete Mixture Models\n", + "\n", + "Discrete mixture models: if $p_m(\\theta)$, for $m=1,\\ldots,M$ are conjugate prior densities for the sampling model $y|\\theta$, show that the class of finite mixture prior densities given by \n", + "$$\n", + "p(\\theta)=\\sum_{1}^{M} \\lambda_m p_m(\\theta)\n", + "$$\n", + "is also a conjugate class, where the $\\lambda_m$’s are nonnegative weights that sum to 1. This can provide a useful extension of the natural conjugate prior family to more flexible distributional forms. As an example, use the mixture form to create a bimodal prior density for a normal mean, that is thought to be near $1$, with a standard deviation of $0.5$, but has a small probability of being near $−1$, with the same standard deviation. If the variance of each observation $y_1,\\ldots,y_{10}$ is known to be $1$, and their observed mean is $y =−0.25$, derive your posterior distribution for the mean, making a sketch of both prior and posterior densities. Be careful: the prior and posterior mixture proportions are different.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's skip the theory part and look at the example.\n", + "\n", + "We have\n", + "$$\n", + "p(\\theta|y_1,\\ldots,y_{10})\\propto p(y_1,\\ldots,y_10|\\theta)p(\\theta)$$\n", + "so\n", + "$$\n", + "p(\\theta|\\{y_{i}\\})\\propto \\sum \\lambda_{m}p(\\{y_{i}\\}|\\theta)p_{m}(\\theta)\n", + "$$\n", + "\n", + "Each of the terms $p_{m}(\\theta)p(\\{y_{i}\\}|\\theta)$\n", + "is equal to $p_{m}(\\theta|\\{y_{i}\\})p_{m}(\\{y_{i}\\})$.\n", + "\n", + "Therefore the total posterior density is a weighted sum\n", + "of the individual posteriors:\n", + "\n", + "$$p(\\theta|\\{y_{i}\\})=\\sum c_{m}p_{m}(\\theta|\\{y_{i}\\})$$\n", + "where \n", + "$$\n", + "c_{m}=\\frac{\\lambda_m p_{m}(\\{y_{i}\\})}{\\sum_{m} \\lambda_m p_{m}(\\{y_{i}\\}}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 681a084f8f5be0932cfaface8f589aec74dd7f9c Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Sun, 29 Apr 2018 09:15:40 -0400 Subject: [PATCH 5/8] still trying to figure out BDA 5.9.8 --- BDA 5.9.8.ipynb | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/BDA 5.9.8.ipynb b/BDA 5.9.8.ipynb index 9a60fc0..d3ea631 100644 --- a/BDA 5.9.8.ipynb +++ b/BDA 5.9.8.ipynb @@ -37,7 +37,79 @@ "where \n", "$$\n", "c_{m}=\\frac{\\lambda_m p_{m}(\\{y_{i}\\})}{\\sum_{m} \\lambda_m p_{m}(\\{y_{i}\\}}\n", - "$$" + "$$\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the special case under consideration, $p_1$ is normal with mean $-1$ and $\\sigma=.5$, $p_2$ is normal with mean $1$ and $\\sigma=.5$ and we can set $\\lambda_1=.1$ and $\\lambda_2=.9$. The $p_m(\\{y_{i}\\})$ can be calculated from the $t$ distribution. Drawing a sample of size $10$ from $p_1$ and getting a sample mean of $-.25$ and a sample variance of $1$ gives a $t$-statistics of $\\sqrt{10}(-.25+1)$ in the first case and $\\sqrt{10}(-.25-1)$ in the second. " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.041664931082753924\n", + "0.0035119750957915393\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from scipy.stats import norm, t\n", + "t_1=np.sqrt(9)*.75\n", + "t_2=np.sqrt(9)*1.25\n", + "print(t.pdf(t_1,df=9))\n", + "print(t.pdf(t_2,df=9))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5655172413793104" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".1*.041/(.1*.041+.9*.0035)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.43448275862068964" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".9*.0035/(.1*.041+.9*.0035)" ] }, { From 9c03e4d65c419d81cbac1dec49539b2dfdf21ff6 Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Sun, 29 Apr 2018 13:00:00 -0400 Subject: [PATCH 6/8] reconciling home and web --- BDA 5.9.3.ipynb | 96 ++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/BDA 5.9.3.ipynb b/BDA 5.9.3.ipynb index abfbe93..850878d 100644 --- a/BDA 5.9.3.ipynb +++ b/BDA 5.9.3.ipynb @@ -128,16 +128,16 @@ }, { "cell_type": "code", - "execution_count": 350, + "execution_count": 355, "metadata": {}, "outputs": [], "source": [ - "answers=sm.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=00)" + "answers=sm.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=5000)" ] }, { "cell_type": "code", - "execution_count": 351, + "execution_count": 356, "metadata": {}, "outputs": [ { @@ -145,31 +145,31 @@ "output_type": "stream", "text": [ "Inference for Stan model: anon_model_1c0a010b4129370aa04f0b4b9f729b4d.\n", - "4 chains, each with iter=500; warmup=250; thin=1; \n", - "post-warmup draws per chain=250, total post-warmup draws=1000.\n", + "4 chains, each with iter=5000; warmup=2500; thin=1; \n", + "post-warmup draws per chain=2500, total post-warmup draws=10000.\n", "\n", " mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n", - "theta[0] 12.07 0.66 9.38 -3.89 5.93 11.21 17.23 34.12 200 1.01\n", - "theta[1] 7.54 0.39 6.89 -6.07 3.02 7.48 12.15 21.17 312 1.01\n", - "theta[2] 5.45 0.46 8.58 -12.34 0.2 6.07 10.92 20.96 342 1.0\n", - "theta[3] 7.16 0.37 7.21 -7.68 2.72 7.31 11.83 20.44 383 1.01\n", - "theta[4] 4.51 0.43 6.58 -10.0 0.23 5.07 9.18 15.99 235 1.02\n", - "theta[5] 5.53 0.41 7.27 -10.59 1.01 6.16 10.74 18.24 313 1.01\n", - "theta[6] 11.03 0.53 7.5 -2.83 5.64 11.03 15.77 27.15 199 1.01\n", - "theta[7] 8.3 0.39 7.93 -6.88 3.08 8.28 13.26 24.5 414 1.0\n", - "mu 7.66 0.41 5.62 -3.36 4.06 7.92 11.47 18.73 192 1.02\n", - "tau 7.74 0.71 6.11 0.4 3.91 6.29 9.9 22.49 75 1.05\n", - "results[0] 11.81 0.57 12.82 -9.18 3.71 10.54 17.23 43.38 513 1.01\n", - "results[1] 7.26 0.52 12.33 -16.66 0.94 7.04 13.57 33.52 563 1.01\n", - "results[2] 5.43 0.42 12.64 -22.88 -1.07 5.99 12.55 29.09 902 1.0\n", - "results[3] 7.01 0.41 11.92 -16.05 1.03 7.18 13.39 29.57 861 1.0\n", - "results[4] 4.62 0.46 11.55 -23.93 -1.48 5.45 11.65 26.17 638 1.0\n", - "results[5] 5.83 0.44 11.71 -20.83 0.05 6.61 11.82 28.91 724 1.0\n", - "results[6] 10.76 0.56 11.87 -11.69 3.67 10.23 17.43 36.29 443 1.0\n", - "results[7] 8.01 0.54 12.75 -16.27 1.62 7.82 14.62 31.63 561 1.0\n", - "lp__ -18.65 1.21 5.8 -27.82 -22.04 -19.13 -16.31 0.06 23 1.18\n", + "theta[0] 11.66 0.17 8.25 -2.16 6.27 10.51 16.07 30.71 2223 1.0\n", + "theta[1] 8.02 0.1 6.27 -4.44 4.18 7.82 11.76 21.02 3674 1.0\n", + "theta[2] 6.36 0.12 7.68 -10.59 2.17 6.78 11.15 20.84 3806 1.0\n", + "theta[3] 7.79 0.11 6.43 -5.29 4.02 7.62 11.79 20.79 3689 1.0\n", + "theta[4] 5.19 0.13 6.38 -8.66 1.46 5.72 9.44 16.84 2357 1.0\n", + "theta[5] 6.3 0.12 6.62 -7.68 2.38 6.54 10.57 18.98 3277 1.0\n", + "theta[6] 10.99 0.15 6.78 -0.97 6.41 10.4 14.99 25.86 2181 1.0\n", + "theta[7] 8.61 0.13 7.97 -7.29 4.13 8.21 12.91 25.66 3946 1.0\n", + "mu 8.2 0.11 5.3 -1.85 5.01 7.96 11.32 18.88 2354 1.0\n", + "tau 6.84 0.2 5.58 0.93 2.99 5.46 9.11 20.69 751 1.01\n", + "results[0] 11.6 0.18 12.19 -8.79 4.97 9.96 16.83 40.26 4692 1.0\n", + "results[1] 7.97 0.13 10.69 -14.26 2.73 7.74 13.36 29.84 6547 1.0\n", + "results[2] 6.4 0.15 11.84 -18.96 1.18 6.95 12.45 28.63 5970 1.0\n", + "results[3] 7.79 0.14 11.1 -14.73 2.66 7.66 13.29 30.0 6438 1.0\n", + "results[4] 5.24 0.16 11.09 -19.44 0.35 6.13 11.03 24.67 4830 1.0\n", + "results[5] 6.36 0.15 10.98 -17.1 1.36 6.66 11.93 28.06 5695 1.0\n", + "results[6] 10.93 0.16 11.19 -8.59 4.94 9.77 16.05 35.59 4800 1.0\n", + "results[7] 8.64 0.14 11.63 -13.75 2.97 8.25 14.03 33.19 6700 1.0\n", + "lp__ -17.68 0.41 5.52 -27.62 -21.48 -18.16 -14.18 -5.46 179 1.04\n", "\n", - "Samples were drawn using NUTS at Sat Apr 21 16:45:21 2018.\n", + "Samples were drawn using NUTS at Sat Apr 21 16:45:42 2018.\n", "For each parameter, n_eff is a crude measure of effective sample size,\n", "and Rhat is the potential scale reduction factor on split chains (at \n", "convergence, Rhat=1).\n" @@ -182,12 +182,12 @@ }, { "cell_type": "code", - "execution_count": 352, + "execution_count": 357, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 353, + "execution_count": 358, "metadata": {}, "outputs": [], "source": [ @@ -217,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 354, + "execution_count": 359, "metadata": {}, "outputs": [ { @@ -227,14 +227,14 @@ "Chance is empirical probability that given school is best\n", " effect se Chance\n", "school \n", - "A 28 15 0.191\n", - "B 8 10 0.109\n", - "C -3 16 0.105\n", - "D 7 11 0.124\n", - "E -1 9 0.077\n", - "F 1 11 0.088\n", - "G 18 10 0.186\n", - "H 12 18 0.120\n", + "A 28 15 0.2047\n", + "B 8 10 0.1160\n", + "C -3 16 0.1010\n", + "D 7 11 0.1075\n", + "E -1 9 0.0766\n", + "F 1 11 0.0854\n", + "G 18 10 0.1761\n", + "H 12 18 0.1327\n", "Only thing that worries me is that Gelman has A best with prob=10%\n" ] } @@ -248,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 332, + "execution_count": 360, "metadata": {}, "outputs": [ { @@ -259,14 +259,14 @@ " as good as or better than corresponding column\n", " \n", " A B C D E F G H\n", - "A 1.00 0.61 0.64 0.60 0.68 0.66 0.53 0.58 \n", - "B 0.39 1.00 0.53 0.50 0.59 0.56 0.41 0.47 \n", - "C 0.36 0.47 1.00 0.47 0.55 0.52 0.37 0.44 \n", - "D 0.40 0.50 0.53 1.00 0.58 0.56 0.42 0.47 \n", - "E 0.32 0.41 0.45 0.42 1.00 0.47 0.33 0.38 \n", - "F 0.34 0.44 0.48 0.44 0.53 1.00 0.35 0.42 \n", - "G 0.47 0.59 0.63 0.58 0.67 0.65 1.00 0.57 \n", - "H 0.42 0.53 0.56 0.53 0.62 0.58 0.43 1.00 \n" + "A 1.00 0.59 0.62 0.60 0.66 0.64 0.52 0.58 \n", + "B 0.41 1.00 0.54 0.51 0.57 0.56 0.42 0.49 \n", + "C 0.38 0.46 1.00 0.47 0.54 0.51 0.39 0.45 \n", + "D 0.40 0.49 0.53 1.00 0.56 0.54 0.41 0.48 \n", + "E 0.34 0.43 0.46 0.44 1.00 0.48 0.35 0.41 \n", + "F 0.36 0.44 0.49 0.46 0.52 1.00 0.38 0.43 \n", + "G 0.48 0.58 0.61 0.59 0.65 0.62 1.00 0.57 \n", + "H 0.42 0.51 0.55 0.52 0.59 0.57 0.43 1.00 \n" ] } ], @@ -336,7 +336,7 @@ " }\n", "}\n", "'''\n", - "sm=pystan.StanModel(model_code=stan_code_2)" + "sm2=pystan.StanModel(model_code=stan_code_2)" ] }, { @@ -345,7 +345,7 @@ "metadata": {}, "outputs": [], "source": [ - "answers=sm.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=5000)\n" + "answers=sm2.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=5000)\n" ] }, { From 92c245e86749a901a24377bb323d9961935f82df Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Sun, 29 Apr 2018 13:22:49 -0400 Subject: [PATCH 7/8] More on 5.9.8 --- BDA 5.9.8.ipynb | 97 ++++++++++++++++++++++++++++++------------- Useful Formulae.ipynb | 2 +- 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/BDA 5.9.8.ipynb b/BDA 5.9.8.ipynb index d3ea631..5228ce3 100644 --- a/BDA 5.9.8.ipynb +++ b/BDA 5.9.8.ipynb @@ -46,70 +46,107 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the special case under consideration, $p_1$ is normal with mean $-1$ and $\\sigma=.5$, $p_2$ is normal with mean $1$ and $\\sigma=.5$ and we can set $\\lambda_1=.1$ and $\\lambda_2=.9$. The $p_m(\\{y_{i}\\})$ can be calculated from the $t$ distribution. Drawing a sample of size $10$ from $p_1$ and getting a sample mean of $-.25$ and a sample variance of $1$ gives a $t$-statistics of $\\sqrt{10}(-.25+1)$ in the first case and $\\sqrt{10}(-.25-1)$ in the second. " + "In the special case under consideration, $p_1$ is normal with mean $-1$ and $\\sigma=.5$, $p_2$ is normal with mean $1$ and $\\sigma=.5$ and we can set $\\lambda_1=.1$ and $\\lambda_2=.9$. The $p_m(\\{y_{i}\\})$ can be calculated from the $t$ distribution. Drawing a sample of size $10$ from $p_1$ and getting a sample mean of $-.25$ and a sample variance of $1$ gives a $t$-statistics of \n", + "$$\\frac{(\\overline{y}-\\mu)}{s/\\sqrt{N}}=\\frac{(-.25+1)}{1/\\sqrt{10}}=\\sqrt{10}(-.25+1)$$ in the first case and $\\sqrt{10}(-.25-1)$ in the second. " ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy.stats import norm, t\n", + "import matplotlib.pyplot as plt\n", + "t_1=np.sqrt(10)*.75\n", + "t_2=np.sqrt(10)*(-1.25)\n", + "p1y=t.pdf(t_1,df=9)\n", + "p2y=t.pdf(t_2,df=9)\n", + "lambda1,lambda2=(.1,.9)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "wt1=lambda1*p1y/(lambda1*p1y+lambda2*p2y)\n", + "wt2=lambda2*p2y/(lambda1*p1y+lambda2*p2y)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.041664931082753924\n", - "0.0035119750957915393\n" + "0.600590082806086 0.3994099171939141\n" ] } ], "source": [ - "import numpy as np\n", - "from scipy.stats import norm, t\n", - "t_1=np.sqrt(9)*.75\n", - "t_2=np.sqrt(9)*1.25\n", - "print(t.pdf(t_1,df=9))\n", - "print(t.pdf(t_2,df=9))" + "print(wt1, wt2)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 26, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.5655172413793104" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "def posterior(prior_mean,prior_variance,sample_mean,pop_variance,n):\n", + " post_var=1/((1/prior_variance) + n/pop_variance)\n", + " post_mean=(prior_mean/prior_variance+sample_mean*n/pop_variance)/(1/post_var)\n", + " return post_mean, post_var" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], "source": [ - ".1*.041/(.1*.041+.9*.0035)" + "post_mean1,post_var1=posterior(-1,.25,-.25,1,10)\n", + "post_mean2,post_var2=posterior(1,.25,-.25,1,10)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { + "image/png": "\n", "text/plain": [ - "0.43448275862068964" + "
" ] }, - "execution_count": 17, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - ".9*.0035/(.1*.041+.9*.0035)" + "x=np.linspace(-3,3,1000)\n", + "y1=lambda1*norm.pdf(x,-1,.5)+lambda2*norm.pdf(x,1,.5)\n", + "y2=wt1*norm.pdf(x,post_mean1,np.sqrt(post_var1))+wt2*norm.pdf(x,post_mean2,np.sqrt(post_var2))\n", + "fig,ax=plt.subplots(1)\n", + "ax.plot(x,y1,color='red',label='prior')\n", + "ax.plot(x,y2,color='blue',label='posterior')\n", + "ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This isn't consistent with the solutions, which I don't understand. The key issue is the meaning of the statement that \"the variance of each observation is known to be 1\". How does one compute $p_{m}(\\{y_{i}\\})$?" ] }, { @@ -136,7 +173,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.6.5" } }, "nbformat": 4, diff --git a/Useful Formulae.ipynb b/Useful Formulae.ipynb index 3fc65f4..5a3d891 100644 --- a/Useful Formulae.ipynb +++ b/Useful Formulae.ipynb @@ -136,7 +136,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.6.5" } }, "nbformat": 4, From 025db3a873daa6111df00dc165ef2861e80e8fe7 Mon Sep 17 00:00:00 2001 From: Jeremy Teitelbaum Date: Sun, 29 Apr 2018 13:26:52 -0400 Subject: [PATCH 8/8] more on 5.9.8 --- BDA 5.9.8.ipynb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/BDA 5.9.8.ipynb b/BDA 5.9.8.ipynb index 5228ce3..8ad5732 100644 --- a/BDA 5.9.8.ipynb +++ b/BDA 5.9.8.ipynb @@ -6,6 +6,10 @@ "source": [ "# Discrete Mixture Models\n", "\n", + " This solution differs from the one published here:\n", + "http://www.stat.columbia.edu/~gelman/book/solutions3.pdf\n", + "\n", + "\n", "Discrete mixture models: if $p_m(\\theta)$, for $m=1,\\ldots,M$ are conjugate prior densities for the sampling model $y|\\theta$, show that the class of finite mixture prior densities given by \n", "$$\n", "p(\\theta)=\\sum_{1}^{M} \\lambda_m p_m(\\theta)\n", @@ -52,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -68,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [