diff --git a/BDA 5.9.3.ipynb b/BDA 5.9.3.ipynb new file mode 100644 index 0000000..abfbe93 --- /dev/null +++ b/BDA 5.9.3.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Problem 5.9.3\n", + "\n", + "Hierarchical models and multiple comparisons:\n", + "\n", + "1. Reproduce the computations in Section 5.5 for the educational testing example. Use the posterior simulations to estimate:\n", + " * for each school $j$, the probability that it's coaching program is the best of the eight;\n", + " * for each pair of schools $(j,k)$ the probability that the $j$th school is better than the $k$th \n", + "2. Reproduce (1) but for the simpler model where the population variance $\\tau$ is $\\infty$ so the eight schools are independent.\n", + "3. Discuss the differences between 1 and 2.\n", + "4. What happens when $\\tau=0$?" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " effect se\n", + "school \n", + "A 28 15\n", + "B 8 10\n", + "C -3 16\n", + "D 7 11\n", + "E -1 9\n", + "F 1 11\n", + "G 18 10\n", + "H 12 18\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pystan\n", + "import matplotlib.pyplot as plt\n", + "\n", + "schools=['A','B','C','D','E','F','G','H']\n", + "effects=[28,8,-3,7,-1,1,18,12]\n", + "se=[15,10,16,11,9,11,10,18]\n", + "p55=pd.DataFrame(index=schools)\n", + "p55.index.name='school'\n", + "p55['effect']=np.array(effects)\n", + "p55['se']=np.array(se)\n", + "print(p55)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pooled mean= 7.685616724956035\n", + "pooled variance= 16.580525632563663\n" + ] + } + ], + "source": [ + "print('pooled mean=',sum(p55['effect']*1/p55['se']**2)/(sum(1/p55['se']**2)))\n", + "print('pooled variance=',(1/sum(1/p55['se']**2)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## First part" + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1c0a010b4129370aa04f0b4b9f729b4d NOW.\n" + ] + } + ], + "source": [ + "stan_code='''\n", + "data {\n", + " real means[8];\n", + " real se[8];\n", + "\n", + "}\n", + "\n", + "parameters {\n", + " real theta[8] ; \n", + " real mu ; \n", + " real tau ; \n", + "}\n", + "\n", + "model {\n", + " \n", + " theta~normal(mu,tau) ; \n", + " means~normal(theta,se) ; \n", + " \n", + "}\n", + "\n", + "generated quantities {\n", + " real results[8] ; \n", + " \n", + " \n", + " for(i in 1:8) {\n", + " results[i]=normal_rng(theta[i],tau);\n", + " }\n", + "}\n", + "'''\n", + "sm=pystan.StanModel(model_code=stan_code)" + ] + }, + { + "cell_type": "code", + "execution_count": 350, + "metadata": {}, + "outputs": [], + "source": [ + "answers=sm.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=00)" + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inference for Stan model: anon_model_1c0a010b4129370aa04f0b4b9f729b4d.\n", + "4 chains, each with iter=500; warmup=250; thin=1; \n", + "post-warmup draws per chain=250, total post-warmup draws=1000.\n", + "\n", + " mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n", + "theta[0] 12.07 0.66 9.38 -3.89 5.93 11.21 17.23 34.12 200 1.01\n", + "theta[1] 7.54 0.39 6.89 -6.07 3.02 7.48 12.15 21.17 312 1.01\n", + "theta[2] 5.45 0.46 8.58 -12.34 0.2 6.07 10.92 20.96 342 1.0\n", + "theta[3] 7.16 0.37 7.21 -7.68 2.72 7.31 11.83 20.44 383 1.01\n", + "theta[4] 4.51 0.43 6.58 -10.0 0.23 5.07 9.18 15.99 235 1.02\n", + "theta[5] 5.53 0.41 7.27 -10.59 1.01 6.16 10.74 18.24 313 1.01\n", + "theta[6] 11.03 0.53 7.5 -2.83 5.64 11.03 15.77 27.15 199 1.01\n", + "theta[7] 8.3 0.39 7.93 -6.88 3.08 8.28 13.26 24.5 414 1.0\n", + "mu 7.66 0.41 5.62 -3.36 4.06 7.92 11.47 18.73 192 1.02\n", + "tau 7.74 0.71 6.11 0.4 3.91 6.29 9.9 22.49 75 1.05\n", + "results[0] 11.81 0.57 12.82 -9.18 3.71 10.54 17.23 43.38 513 1.01\n", + "results[1] 7.26 0.52 12.33 -16.66 0.94 7.04 13.57 33.52 563 1.01\n", + "results[2] 5.43 0.42 12.64 -22.88 -1.07 5.99 12.55 29.09 902 1.0\n", + "results[3] 7.01 0.41 11.92 -16.05 1.03 7.18 13.39 29.57 861 1.0\n", + "results[4] 4.62 0.46 11.55 -23.93 -1.48 5.45 11.65 26.17 638 1.0\n", + "results[5] 5.83 0.44 11.71 -20.83 0.05 6.61 11.82 28.91 724 1.0\n", + "results[6] 10.76 0.56 11.87 -11.69 3.67 10.23 17.43 36.29 443 1.0\n", + "results[7] 8.01 0.54 12.75 -16.27 1.62 7.82 14.62 31.63 561 1.0\n", + "lp__ -18.65 1.21 5.8 -27.82 -22.04 -19.13 -16.31 0.06 23 1.18\n", + "\n", + "Samples were drawn using NUTS at Sat Apr 21 16:45:21 2018.\n", + "For each parameter, n_eff is a crude measure of effective sample size,\n", + "and Rhat is the potential scale reduction factor on split chains (at \n", + "convergence, Rhat=1).\n" + ] + } + ], + "source": [ + "print(answers)" + ] + }, + { + "cell_type": "code", + "execution_count": 352, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGC1JREFUeJzt3XuUHGWdxvHvswkEBQyQhBWSyAQTkbAiSgyoiEoUgyhBN0gianRZcc/Cqkc9mngBxBt4dkFW2UuOoIiXgPGWhWhkQV1FjRkuAmOIDDGQIQoTEsCIXBJ++0e9o0WnZ6Z6pjPTM+/zOWfOdL31VvWvq2uefuftnhpFBGZmloe/Ge4CzMxs6Dj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tC3ppK0TdLBQ3h/L5O0ron7+76kRen22yX9rIn7Pk3SD5u1vwbu96WS7kzPzclDff/WWhz6I5SkDZL+nH6Q75P0JUl7DWJ/bZJC0tjB1BURe0XE+sHso1TTuZKekPTH9PVbSV+QdEDp/n4aEYdU3NdX++sXESdExOVNqH2n4xkRX4uI4we77wE4D/hCem6+W7uy2edSbyTNk3SLpIclbZZ0naS2tK7f59qaw6E/sr0+IvYCXgi8CPjocBUy2BeLPra/MiL2BvYD3gA8E7ix2WGgwmj9eTgI6Oinzy49lyRNB74CvB8YD0wD/gN4stRtSJ7r3I3WkzwrEXEv8H3g7wAkHShphaQtkjolvbOnr6TZktrTaOs+SRemVf+Xvj+YRnwvTv3/QdJaSVslrZJ0UGlfIelMSXcCd5bapqfb4yV9RVK3pLslfbQnWNPUyQ2SLpK0BTi3n8f4RER0AKcC3RThgaRXSOoq1fQhSfem0eI6SXMkzQU+DJyaHtuvU98fS/qUpBuAR4CDU9s/lu5akj4v6SFJd0iaU1qxQdKrSsvl3yZ2Op6100WSXiJpTdr3GkkvKa37saRPpGP0R0k/lDSxt+Mj6Z3pud6SnvsDU/tdwMHA/6Q6xvVznBs5l86VtFzSlanGmyQ9v5ddHwH8LiKui8IfI+JbEXFPnRrqPtfWHA79UUDSVOC1wM2p6RtAF3AgMB/4dCmsLgYujohnAM8Grkrtx6bv+6RpgF+omP/9MPBGYBLw07TvspOBo4CZdUr7PMWo7mDg5cDbgHeU1h8FrAf2Bz5V5bFGxA7ge8DLatdJOgQ4C3hRGjG+BtgQET8APk0xktwrIsrB9FbgDGBv4O46d9lT40TgHODbkvarUOpOx7Om1v2Aa4B/ByYAFwLXSJpQ6vZmiuO1P7A78IF6dyTpOOAzwJuAA9LjWAYQEc8G7iGN5CPisb6KbvBcApgHfJNidP514LuSdquz65uA56YX+VeqwvRRX8+1DZxDf2T7rqQHgZ8BP6H4gZwKHAN8KCIejYhbgC9ShBvAE8B0SRMjYltE/LKP/b8L+ExErI2I7RTBeUR5tJ/Wb4mIP5c3lDSGYqS2JI3qNgD/VqoDYFNEfD4ittdu349NFCFTawcwDpgpabeI2BARd/Wzry9HREeq4Yk66+8HPpdGn1cC64ATG6i1NycCd0bEFem+vwHcAby+1OdLEfHbdGyuohgt13MacFlE3JRCfQnwYqX58ooGci4B3BgRy9OxuxDYAzi6dufpfZ5XAJPTY9ks6csVwr+359oGyKE/sp0cEftExEER8c8pHA4EtkTEH0v97qb4YQM4HXgOcEeaUnhdH/s/CLhY0oMpELYAKu0LYGMv206kGJ2WR8/lOvratj+TUy1PERGdwHspporul7SsZ5qjD/3VcG889aqEd1Mc48E6kJ1/s6g9Pn8o3X4E6C0gn7KviNgGPFCzr/4M5FyC0vGLiCf5628FO4mIX0bEmyJiEsXo/VjgI/3UVfe5toFz6I8+m4D9JO1dansWcC9ARNwZEQsppgwuAJZL2hOod7nVjcC7Uhj0fD0tIn5e6tPbZVo3U/xWUf6t4C919LNtr9J7Aq+nmGraSUR8PSKOSfcbFI+xr/vqr4bJklRafhbFMQb4E/D00rpnNrDfTTz12PTs+946ffvzlH2l53PCAPdVu99ez6Vkaul+/waYwl+PT68iYg3wbdJ7B/X091zbwDj0R5mI2Aj8HPiMpD0kHU4xuv8agKS3SJqURmUPps12ULxh9iTF/HuP/wKWSDosbTte0ikV69hB8Wv8pyTtnaaE3gf0+7HJeiTtJulQijnmZ1JMJdT2OUTScenNykeBP6fHBnAf0KbGP6GzP/DudP+nAIcCK9O6W4AFad0sijnvHvWOZ9lK4DmS3ixprKRTKd4XubrB+qCYS3+HpCPSY/80sDpNqQ1Yf+dScqSkN6r49NV7gceAnaYMJR2T3mzePy0/Fzipl779Ptc2cA790Wkh0EYx4voOcE5EXJvWzQU6JG2jeFN3QZqvfYTizdQb0nTO0RHxHYqR8jJJDwO3Ayc0UMe/UIyG11PMFX8duKzBx3JqqvVBYAXFtMWREVFvNDkOOJ/it4w/UAT2h9O6b6bvD0i6qYH7Xw3MSPv8FDA/Ih5I6z5G8Wb4VuDjFI8PgHrHs7zTtI/XUXwy5QHgg8DrImJzA7X17Ou6VMu3gN+nmhY0up9e9HUuQfFG66kUx+CtwBt7eW/kQYqQvy09nz9I+/tsqU8jz7UNkPxPVMxsICSdC0yPiLcMdy1WnUf6ZmYZceibmWXE0ztmZhnxSN/MLCODukjWrjBx4sRoa2sb7jLMzEaUG2+8cXP6w7c+tVzot7W10d7ePtxlmJmNKJLqXTtqJ57eMTPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLSMv9Re5Qa1t8Td32Dec3439fm5m1Fo/0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8tIpdCXNFfSOkmdkhbXWX+spJskbZc0v2bdIkl3pq9FzSrczMwa12/oSxoDXAKcAMwEFkqaWdPtHuDtwNdrtt0POAc4CpgNnCNp38GXbWZmA1FlpD8b6IyI9RHxOLAMmFfuEBEbIuJW4MmabV8DXBsRWyJiK3AtMLcJdZuZ2QBUCf3JwMbScldqq6LStpLOkNQuqb27u7virs3MrFFV/l2i6rRFxf1X2jYilgJLAWbNmlV137tUb/9GsTf+94pmNhJUGel3AVNLy1OATRX3P5htzcysyaqE/hpghqRpknYHFgArKu5/FXC8pH3TG7jHpzYzMxsG/YZ+RGwHzqII67XAVRHRIek8SScBSHqRpC7gFOC/JXWkbbcAn6B44VgDnJfazMxsGFSZ0yciVgIra9rOLt1eQzF1U2/by4DLBlGjmZk1if8i18wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy0il0Jc0V9I6SZ2SFtdZP07SlWn9akltqX03SZdLuk3SWklLmlu+mZk1ot/QlzQGuAQ4AZgJLJQ0s6bb6cDWiJgOXARckNpPAcZFxPOAI4F39bwgmJnZ0Ksy0p8NdEbE+oh4HFgGzKvpMw+4PN1eDsyRJCCAPSWNBZ4GPA483JTKzcysYVVCfzKwsbTcldrq9omI7cBDwASKF4A/Ab8H7gH+NSK2DLJmMzMboCqhrzptUbHPbGAHcCAwDXi/pIN3ugPpDEntktq7u7srlGRmZgNRJfS7gKml5SnApt76pKmc8cAW4M3ADyLiiYi4H7gBmFV7BxGxNCJmRcSsSZMmNf4ozMyskiqhvwaYIWmapN2BBcCKmj4rgEXp9nzg+ogIiimd41TYEzgauKM5pZuZWaP6Df00R38WsApYC1wVER2SzpN0Uup2KTBBUifwPqDnY52XAHsBt1O8eHwpIm5t8mMwM7OKxlbpFBErgZU1bWeXbj9K8fHM2u221Ws3M7Ph4b/INTPLiEPfzCwjDn0zs4w49M3MMlLpjdzRoG3xNcNdgpnZsPNI38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMZPPvEne13v4d44bzTxziSszMeueRvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRiqFvqS5ktZJ6pS0uM76cZKuTOtXS2orrTtc0i8kdUi6TdIezSvfzMwa0W/oSxoDXAKcAMwEFkqaWdPtdGBrREwHLgIuSNuOBb4K/FNEHAa8AniiadWbmVlDqoz0ZwOdEbE+Ih4HlgHzavrMAy5Pt5cDcyQJOB64NSJ+DRARD0TEjuaUbmZmjaoS+pOBjaXlrtRWt09EbAceAiYAzwFC0ipJN0n6YL07kHSGpHZJ7d3d3Y0+BjMzq6hK6KtOW1TsMxY4BjgtfX+DpDk7dYxYGhGzImLWpEmTKpRkZmYDUSX0u4CppeUpwKbe+qR5/PHAltT+k4jYHBGPACuBFw62aDMzG5gqob8GmCFpmqTdgQXAipo+K4BF6fZ84PqICGAVcLikp6cXg5cDv2lO6WZm1qh+L7gWEdslnUUR4GOAyyKiQ9J5QHtErAAuBa6Q1Ekxwl+Qtt0q6UKKF44AVkZE/SuTmZnZLlfpKpsRsZJiaqbcdnbp9qPAKb1s+1WKj22amdkw81/kmpllxKFvZpYRh76ZWUYc+mZmGfG/S9zF/G8UzayVeKRvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUbGDncBuWpbfE3d9g3nnzjElZhZTjzSNzPLSKXQlzRX0jpJnZIW11k/TtKVaf1qSW01658laZukDzSnbDMzG4h+Q1/SGOAS4ARgJrBQ0syabqcDWyNiOnARcEHN+ouA7w++XDMzG4wqI/3ZQGdErI+Ix4FlwLyaPvOAy9Pt5cAcSQKQdDKwHuhoTslmZjZQVUJ/MrCxtNyV2ur2iYjtwEPABEl7Ah8CPt7XHUg6Q1K7pPbu7u6qtZuZWYOqhL7qtEXFPh8HLoqIbX3dQUQsjYhZETFr0qRJFUoyM7OBqPKRzS5gaml5CrCplz5dksYC44EtwFHAfEmfBfYBnpT0aER8YdCVm5lZw6qE/hpghqRpwL3AAuDNNX1WAIuAXwDzgesjIoCX9XSQdC6wzYFvZjZ8+g39iNgu6SxgFTAGuCwiOiSdB7RHxArgUuAKSZ0UI/wFu7JoMzMbmEp/kRsRK4GVNW1nl24/CpzSzz7OHUB9ZmbWRP6LXDOzjIy6a+/0dk0bMzPzSN/MLCsOfTOzjDj0zcwyMurm9Ec6X2ffzHYlj/TNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsI5VCX9JcSeskdUpaXGf9OElXpvWrJbWl9ldLulHSben7cc0t38zMGtFv6EsaA1wCnADMBBZKmlnT7XRga0RMBy4CLkjtm4HXR8TzgEXAFc0q3MzMGldlpD8b6IyI9RHxOLAMmFfTZx5webq9HJgjSRFxc0RsSu0dwB6SxjWjcDMza1yV0J8MbCwtd6W2un0iYjvwEDChps/fAzdHxGO1dyDpDEntktq7u7ur1m5mZg0aW6GP6rRFI30kHUYx5XN8vTuIiKXAUoBZs2bV7tuAtsXX1G3fcP6JQ1yJmY1kVUb6XcDU0vIUYFNvfSSNBcYDW9LyFOA7wNsi4q7BFmxmZgNXJfTXADMkTZO0O7AAWFHTZwXFG7UA84HrIyIk7QNcAyyJiBuaVbSZmQ1Mv6Gf5ujPAlYBa4GrIqJD0nmSTkrdLgUmSOoE3gf0fKzzLGA68DFJt6Sv/Zv+KMzMrJIqc/pExEpgZU3b2aXbjwKn1Nnuk8AnB1mjmZk1if8i18wsIw59M7OMVJresdbV20c5wR/nNLOdeaRvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUb8kU37C1/J02z0c+hnqK/P9pvZ6ObQH8Uc7mZWy3P6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEH9m0AfMfc5mNPB7pm5llxKFvZpYRT+/YkPF0kNnw80jfzCwjDn0zs4x4esf65Qu3mY0eHumbmWXEoW9mlhGHvplZRjynb03X6HsA/iin2dCpFPqS5gIXA2OAL0bE+TXrxwFfAY4EHgBOjYgNad0S4HRgB/DuiFjVtOrNKujrRWikv7D4BdMa1W/oSxoDXAK8GugC1khaERG/KXU7HdgaEdMlLQAuAE6VNBNYABwGHAj8r6TnRMSOZj8Qy0czP03UrNB0+A5cjsduOB9zlTn92UBnRKyPiMeBZcC8mj7zgMvT7eXAHElK7csi4rGI+B3QmfZnZmbDoMr0zmRgY2m5Cziqtz4RsV3SQ8CE1P7Lmm0n196BpDOAM9LiNknrKlW/s4nA5gFuO9RGUq0wDPXqggFvOuhaB3HfA9lP049ts+rvxZCcC016DCPq50wXDKreg6p0qhL6qtMWFftU2ZaIWAosrVBLnyS1R8Sswe5nKIykWmFk1TuSagXXuyuNpFphaOqtMr3TBUwtLU8BNvXWR9JYYDywpeK2ZmY2RKqE/hpghqRpknaneGN2RU2fFcCidHs+cH1ERGpfIGmcpGnADOBXzSndzMwa1e/0TpqjPwtYRfGRzcsiokPSeUB7RKwALgWukNRJMcJfkLbtkHQV8BtgO3DmLv7kzqCniIbQSKoVRla9I6lWcL270kiqFYagXhUDcjMzy4Evw2BmlhGHvplZRkZF6EuaK2mdpE5Ji4e7nlqSLpN0v6TbS237SbpW0p3p+77DWWMPSVMl/UjSWkkdkt6T2lu13j0k/UrSr1O9H0/t0yStTvVemT6E0BIkjZF0s6Sr03Ir17pB0m2SbpHUntpa8lwAkLSPpOWS7kjn8ItbsV5Jh6Rj2vP1sKT3DkWtIz70S5eJOAGYCSxMl39oJV8G5ta0LQaui4gZwHVpuRVsB94fEYcCRwNnpuPZqvU+BhwXEc8HjgDmSjqa4lIgF6V6t1JcKqRVvAdYW1pu5VoBXhkRR5Q+P96q5wIU1wj7QUQ8F3g+xXFuuXojYl06pkdQXLPsEeA7DEWtETGiv4AXA6tKy0uAJcNdV50624DbS8vrgAPS7QOAdcNdYy91f4/iukstXy/wdOAmir8Y3wyMrXeODHONU9IP83HA1RR/wNiStaZ6NgATa9pa8lwAngH8jvQBlVavt1Tf8cANQ1XriB/pU/8yETtd6qEF/W1E/B4gfd9/mOvZiaQ24AXAalq43jRdcgtwP3AtcBfwYERsT11a6Zz4HPBB4Mm0PIHWrRWKv6D/oaQb0+VSoHXPhYOBbuBLafrsi5L2pHXr7bEA+Ea6vctrHQ2hX+lSD9YYSXsB3wLeGxEPD3c9fYmIHVH8mjyF4oJ+h9brNrRV7UzS64D7I+LGcnOdrsNea8lLI+KFFNOnZ0o6drgL6sNY4IXAf0bEC4A/0QJTOX1J79+cBHxzqO5zNIT+SL3Uw32SDgBI3+8f5nr+QtJuFIH/tYj4dmpu2Xp7RMSDwI8p3ovYJ10SBFrnnHgpcJKkDRRXqz2OYuTfirUCEBGb0vf7KeacZ9O650IX0BURq9PycooXgVatF4oX05si4r60vMtrHQ2hX+UyEa2ofOmKRRRz58MuXRL7UmBtRFxYWtWq9U6StE+6/TTgVRRv3v2I4pIg0CL1RsSSiJgSEW0U5+n1EXEaLVgrgKQ9Je3dc5ti7vl2WvRciIg/ABslHZKa5lBcDaAl600W8tepHRiKWof7TYwmvRHyWuC3FHO5HxnueurU9w3g98ATFKOR0ynmcq8D7kzf9xvuOlOtx1BML9wK3JK+XtvC9R4O3JzqvR04O7UfTHGdp06KX53HDXetNXW/Ari6lWtNdf06fXX0/Gy16rmQajsCaE/nw3eBfVu1XooPHjwAjC+17fJafRkGM7OMjIbpHTMzq8ihb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlG/h9vN194yrGc8gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig,ax=plt.subplots(1)\n", + "j=ax.hist(answers['tau'],bins=50,density=True)\n", + "j=ax.set_title('Posterior Distribution of Pop SD')" + ] + }, + { + "cell_type": "code", + "execution_count": 353, + "metadata": {}, + "outputs": [], + "source": [ + "predictions=answers.extract()['results']\n", + "def best_school(x,i):\n", + " return x[i]>=max(x)\n", + "def better_school(x,i,j):\n", + " return x[i]>=x[j]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chance is empirical probability that given school is best\n", + " effect se Chance\n", + "school \n", + "A 28 15 0.191\n", + "B 8 10 0.109\n", + "C -3 16 0.105\n", + "D 7 11 0.124\n", + "E -1 9 0.077\n", + "F 1 11 0.088\n", + "G 18 10 0.186\n", + "H 12 18 0.120\n", + "Only thing that worries me is that Gelman has A best with prob=10%\n" + ] + } + ], + "source": [ + "print('Chance is empirical probability that given school is best')\n", + "p55['Chance']=[sum([best_school(x,i) for x in predictions])/len(predictions) for i in range(8)]\n", + "print(p55)\n", + "print(\"Only thing that worries me is that Gelman has A best with prob=10%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 332, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empirical Probability that school in given row is\n", + " as good as or better than corresponding column\n", + " \n", + " A B C D E F G H\n", + "A 1.00 0.61 0.64 0.60 0.68 0.66 0.53 0.58 \n", + "B 0.39 1.00 0.53 0.50 0.59 0.56 0.41 0.47 \n", + "C 0.36 0.47 1.00 0.47 0.55 0.52 0.37 0.44 \n", + "D 0.40 0.50 0.53 1.00 0.58 0.56 0.42 0.47 \n", + "E 0.32 0.41 0.45 0.42 1.00 0.47 0.33 0.38 \n", + "F 0.34 0.44 0.48 0.44 0.53 1.00 0.35 0.42 \n", + "G 0.47 0.59 0.63 0.58 0.67 0.65 1.00 0.57 \n", + "H 0.42 0.53 0.56 0.53 0.62 0.58 0.43 1.00 \n" + ] + } + ], + "source": [ + "compare=[[sum([better_school(x,i,j) for x in predictions])/len(predictions) for i in range(8)] for j in range(8)]\n", + "l=['A','B','C','D','E','F','G','H']\n", + "print('Empirical Probability that school in given row is\\n as good as or better than corresponding column')\n", + "print(' ',end='')\n", + "print('{0:10}'.format(''))\n", + "for i in range(8):\n", + " print('{0:>10}'.format(l[i]),end='')\n", + "print()\n", + "for j in range(8):\n", + " print('{0:<6}'.format(l[j]),end='')\n", + " for i in range(8):\n", + " print('{0:4.2f} '.format(round(compare[i][j],2)),end=\"\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Second part" + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_3314be03c6a1d2db4b5293ee7101b10c NOW.\n" + ] + } + ], + "source": [ + "stan_code_2='''\n", + "data {\n", + " real means[8];\n", + " real se[8];\n", + "\n", + "}\n", + "\n", + "parameters {\n", + " real theta[8] ; \n", + "// real mu ; \n", + "// real tau ; \n", + "}\n", + "\n", + "model {\n", + " \n", + " // theta~normal(mu,tau) ; \n", + " means~normal(theta,se) ; \n", + " \n", + "}\n", + "\n", + "generated quantities {\n", + " real results[8] ; \n", + " \n", + " \n", + " for(i in 1:8) {\n", + " results[i]=normal_rng(theta[i],se[i]);\n", + " }\n", + "}\n", + "'''\n", + "sm=pystan.StanModel(model_code=stan_code_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "metadata": {}, + "outputs": [], + "source": [ + "answers=sm.sampling(data=dict({'means':p55['effect'],'se':p55['se']}),iter=5000)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 342, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Inference for Stan model: anon_model_3314be03c6a1d2db4b5293ee7101b10c.\n", + "4 chains, each with iter=5000; warmup=2500; thin=1; \n", + "post-warmup draws per chain=2500, total post-warmup draws=10000.\n", + "\n", + " mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n", + "theta[0] 27.97 0.15 14.96 -1.53 17.74 27.93 38.15 57.42 10000 1.0\n", + "theta[1] 8.13 0.1 9.94 -11.16 1.34 8.09 14.73 27.7 10000 1.0\n", + "theta[2] -3.24 0.16 16.22 -34.28 -14.16 -3.11 7.61 28.45 10000 1.0\n", + "theta[3] 7.01 0.11 10.91 -14.33 -0.39 7.0 14.49 28.16 10000 1.0\n", + "theta[4] -0.86 0.09 8.9 -18.45 -6.75 -0.86 4.95 16.99 10000 1.0\n", + "theta[5] 1.02 0.11 11.09 -20.34 -6.62 1.13 8.44 22.73 10000 1.0\n", + "theta[6] 17.89 0.1 10.11 -2.11 11.17 17.83 24.75 37.53 10000 1.0\n", + "theta[7] 11.87 0.18 17.87 -22.34 -0.36 11.86 23.8 46.91 10000 1.0\n", + "results[0] 28.25 0.21 21.3 -13.95 13.99 28.29 42.64 69.78 10000 1.0\n", + "results[1] 8.09 0.14 14.22 -19.52 -1.66 8.04 17.7 36.06 10000 1.0\n", + "results[2] -3.5 0.23 22.91 -47.94 -19.39 -3.27 12.05 40.73 10000 1.0\n", + "results[3] 7.01 0.16 15.6 -23.52 -3.69 7.07 17.64 37.45 10000 1.0\n", + "results[4] -0.83 0.13 12.62 -25.85 -9.24 -0.93 7.68 23.68 10000 1.0\n", + "results[5] 0.86 0.15 15.4 -28.99 -9.4 0.83 11.01 32.07 10000 1.0\n", + "results[6] 17.77 0.14 14.11 -10.2 8.31 17.82 27.31 45.08 10000 1.0\n", + "results[7] 11.77 0.25 25.4 -38.49 -5.21 11.54 28.6 61.73 10000 1.0\n", + "lp__ -4.0 0.03 1.99 -8.74 -5.13 -3.68 -2.53 -1.07 4933 1.0\n", + "\n", + "Samples were drawn using NUTS at Sat Apr 21 16:37:25 2018.\n", + "For each parameter, n_eff is a crude measure of effective sample size,\n", + "and Rhat is the potential scale reduction factor on split chains (at \n", + "convergence, Rhat=1)." + ] + }, + "execution_count": 342, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answers" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chance is empirical probability that given school is best\n", + " effect se Chance\n", + "school \n", + "A 28 15 0.4455\n", + "B 8 10 0.0575\n", + "C -3 16 0.0529\n", + "D 7 11 0.0588\n", + "E -1 9 0.0119\n", + "F 1 11 0.0276\n", + "G 18 10 0.1605\n", + "H 12 18 0.1853\n" + ] + } + ], + "source": [ + "predictions=answers.extract()['results']\n", + "def best_school(x,i):\n", + " return x[i]>=max(x)\n", + "def better_school(x,i,j):\n", + " return x[i]>=x[j]\n", + "print('Chance is empirical probability that given school is best')\n", + "p55['Chance']=[sum([best_school(x,i) for x in predictions])/len(predictions) for i in range(8)]\n", + "print(p55)" + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empirical Probability that school in given row is\n", + " as good as or better than corresponding column\n", + " \n", + " A B C D E F G H\n", + "A 1.00 0.78 0.84 0.79 0.88 0.85 0.66 0.70 \n", + "B 0.22 1.00 0.67 0.52 0.68 0.64 0.31 0.45 \n", + "C 0.16 0.33 1.00 0.36 0.46 0.45 0.21 0.33 \n", + "D 0.21 0.48 0.64 1.00 0.65 0.61 0.30 0.44 \n", + "E 0.12 0.32 0.54 0.35 1.00 0.47 0.16 0.33 \n", + "F 0.15 0.36 0.55 0.39 0.53 1.00 0.21 0.36 \n", + "G 0.34 0.69 0.79 0.70 0.84 0.79 1.00 0.58 \n", + "H 0.30 0.55 0.67 0.56 0.67 0.64 0.42 1.00 \n" + ] + } + ], + "source": [ + "compare=[[sum([better_school(x,i,j) for x in predictions])/len(predictions) for i in range(8)] for j in range(8)]\n", + "l=['A','B','C','D','E','F','G','H']\n", + "print('Empirical Probability that school in given row is\\n as good as or better than corresponding column')\n", + "print(' ',end='')\n", + "print('{0:10}'.format(''))\n", + "for i in range(8):\n", + " print('{0:>10}'.format(l[i]),end='')\n", + "print()\n", + "for j in range(8):\n", + " print('{0:<6}'.format(l[j]),end='')\n", + " for i in range(8):\n", + " print('{0:4.2f} '.format(round(compare[i][j],2)),end=\"\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md index f2310b7..85c4867 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # BDA Solutions to some problems from Bayesian Data Analysis, 3rd edition, by Gelman *et. al.* -This repository has selected solutions to Bayesian Data Analysis, 3rd edition, by Gelman *et. al.* - They are offered with no warranty and may be wrong. Comments welcome.