\n", - " | Unnamed: 0 | \n", - "abv | \n", - "ibu | \n", - "id | \n", - "name | \n", - "style | \n", - "brewery_id | \n", - "ounces | \n", - "
---|---|---|---|---|---|---|---|---|
0 | \n", - "0 | \n", - "0.050 | \n", - "NaN | \n", - "1436 | \n", - "Pub Beer | \n", - "American Pale Lager | \n", - "408 | \n", - "12.0 | \n", - "
1 | \n", - "1 | \n", - "0.066 | \n", - "NaN | \n", - "2265 | \n", - "Devil's Cup | \n", - "American Pale Ale (APA) | \n", - "177 | \n", - "12.0 | \n", - "
2 | \n", - "2 | \n", - "0.071 | \n", - "NaN | \n", - "2264 | \n", - "Rise of the Phoenix | \n", - "American IPA | \n", - "177 | \n", - "12.0 | \n", - "
3 | \n", - "3 | \n", - "0.090 | \n", - "NaN | \n", - "2263 | \n", - "Sinister | \n", - "American Double / Imperial IPA | \n", - "177 | \n", - "12.0 | \n", - "
4 | \n", - "4 | \n", - "0.075 | \n", - "NaN | \n", - "2262 | \n", - "Sex and Candy | \n", - "American IPA | \n", - "177 | \n", - "12.0 | \n", - "
5 | \n", - "5 | \n", - "0.077 | \n", - "NaN | \n", - "2261 | \n", - "Black Exodus | \n", - "Oatmeal Stout | \n", - "177 | \n", - "12.0 | \n", - "
6 | \n", - "6 | \n", - "0.045 | \n", - "NaN | \n", - "2260 | \n", - "Lake Street Express | \n", - "American Pale Ale (APA) | \n", - "177 | \n", - "12.0 | \n", - "
7 | \n", - "7 | \n", - "0.065 | \n", - "NaN | \n", - "2259 | \n", - "Foreman | \n", - "American Porter | \n", - "177 | \n", - "12.0 | \n", - "
8 | \n", - "8 | \n", - "0.055 | \n", - "NaN | \n", - "2258 | \n", - "Jade | \n", - "American Pale Ale (APA) | \n", - "177 | \n", - "12.0 | \n", - "
9 | \n", - "9 | \n", - "0.086 | \n", - "NaN | \n", - "2131 | \n", - "Cone Crusher | \n", - "American Double / Imperial IPA | \n", - "177 | \n", - "12.0 | \n", - "
\n", - " | abv | \n", - "ibu | \n", - "style | \n", - "
---|---|---|---|
14 | \n", - "0.061 | \n", - "60.0 | \n", - "American Pale Ale (APA) | \n", - "
21 | \n", - "0.099 | \n", - "92.0 | \n", - "American Barleywine | \n", - "
22 | \n", - "0.079 | \n", - "45.0 | \n", - "Winter Warmer | \n", - "
24 | \n", - "0.044 | \n", - "42.0 | \n", - "American Pale Ale (APA) | \n", - "
25 | \n", - "0.049 | \n", - "17.0 | \n", - "Fruit / Vegetable Beer | \n", - "
26 | \n", - "0.049 | \n", - "17.0 | \n", - "Fruit / Vegetable Beer | \n", - "
27 | \n", - "0.049 | \n", - "17.0 | \n", - "Fruit / Vegetable Beer | \n", - "
28 | \n", - "0.070 | \n", - "70.0 | \n", - "American IPA | \n", - "
29 | \n", - "0.070 | \n", - "70.0 | \n", - "American IPA | \n", - "
30 | \n", - "0.070 | \n", - "70.0 | \n", - "American IPA | \n", - "
\n", - " | abv | \n", - "ibu | \n", - "
---|---|---|
style | \n", - "\n", - " | \n", - " |
Abbey Single Ale | \n", - "0.049000 | \n", - "22.000000 | \n", - "
Altbier | \n", - "0.054625 | \n", - "34.125000 | \n", - "
American Adjunct Lager | \n", - "0.046545 | \n", - "11.000000 | \n", - "
American Amber / Red Ale | \n", - "0.057195 | \n", - "36.298701 | \n", - "
American Amber / Red Lager | \n", - "0.048063 | \n", - "23.250000 | \n", - "
American Barleywine | \n", - "0.099000 | \n", - "96.000000 | \n", - "
American Black Ale | \n", - "0.073150 | \n", - "68.900000 | \n", - "
American Blonde Ale | \n", - "0.050148 | \n", - "20.983607 | \n", - "
American Brown Ale | \n", - "0.057842 | \n", - "29.894737 | \n", - "
American Dark Wheat Ale | \n", - "0.052200 | \n", - "27.600000 | \n", - "
\n", - " | country | \n", - "year | \n", - "pop | \n", - "continent | \n", - "lifeExp | \n", - "gdpPercap | \n", - "
---|---|---|---|---|---|---|
0 | \n", - "Afghanistan | \n", - "1952 | \n", - "8425333.0 | \n", - "Asia | \n", - "28.801 | \n", - "779.445314 | \n", - "
1 | \n", - "Afghanistan | \n", - "1957 | \n", - "9240934.0 | \n", - "Asia | \n", - "30.332 | \n", - "820.853030 | \n", - "
2 | \n", - "Afghanistan | \n", - "1962 | \n", - "10267083.0 | \n", - "Asia | \n", - "31.997 | \n", - "853.100710 | \n", - "
3 | \n", - "Afghanistan | \n", - "1967 | \n", - "11537966.0 | \n", - "Asia | \n", - "34.020 | \n", - "836.197138 | \n", - "
4 | \n", - "Afghanistan | \n", - "1972 | \n", - "13079460.0 | \n", - "Asia | \n", - "36.088 | \n", - "739.981106 | \n", - "
\n", - " | country | \n", - "pop | \n", - "continent | \n", - "lifeExp | \n", - "gdpPercap | \n", - "
---|---|---|---|---|---|
year | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
1952 | \n", - "Afghanistan | \n", - "8425333.0 | \n", - "Asia | \n", - "28.801 | \n", - "779.445314 | \n", - "
1957 | \n", - "Afghanistan | \n", - "9240934.0 | \n", - "Asia | \n", - "30.332 | \n", - "820.853030 | \n", - "
1962 | \n", - "Afghanistan | \n", - "10267083.0 | \n", - "Asia | \n", - "31.997 | \n", - "853.100710 | \n", - "
1967 | \n", - "Afghanistan | \n", - "11537966.0 | \n", - "Asia | \n", - "34.020 | \n", - "836.197138 | \n", - "
1972 | \n", - "Afghanistan | \n", - "13079460.0 | \n", - "Asia | \n", - "36.088 | \n", - "739.981106 | \n", - "
1977 | \n", - "Afghanistan | \n", - "14880372.0 | \n", - "Asia | \n", - "38.438 | \n", - "786.113360 | \n", - "
1982 | \n", - "Afghanistan | \n", - "12881816.0 | \n", - "Asia | \n", - "39.854 | \n", - "978.011439 | \n", - "
1987 | \n", - "Afghanistan | \n", - "13867957.0 | \n", - "Asia | \n", - "40.822 | \n", - "852.395945 | \n", - "
1992 | \n", - "Afghanistan | \n", - "16317921.0 | \n", - "Asia | \n", - "41.674 | \n", - "649.341395 | \n", - "
1997 | \n", - "Afghanistan | \n", - "22227415.0 | \n", - "Asia | \n", - "41.763 | \n", - "635.341351 | \n", - "
2002 | \n", - "Afghanistan | \n", - "25268405.0 | \n", - "Asia | \n", - "42.129 | \n", - "726.734055 | \n", - "
2007 | \n", - "Afghanistan | \n", - "31889923.0 | \n", - "Asia | \n", - "43.828 | \n", - "974.580338 | \n", - "
\n", - " | pop | \n", - "lifeExp | \n", - "gdpPercap | \n", - "
---|---|---|---|
count | \n", - "1.200000e+01 | \n", - "12.000000 | \n", - "12.000000 | \n", - "
mean | \n", - "1.582372e+07 | \n", - "37.478833 | \n", - "802.674598 | \n", - "
std | \n", - "7.114583e+06 | \n", - "5.098646 | \n", - "108.202929 | \n", - "
min | \n", - "8.425333e+06 | \n", - "28.801000 | \n", - "635.341351 | \n", - "
25% | \n", - "1.122025e+07 | \n", - "33.514250 | \n", - "736.669343 | \n", - "
50% | \n", - "1.347371e+07 | \n", - "39.146000 | \n", - "803.483195 | \n", - "
75% | \n", - "1.779529e+07 | \n", - "41.696250 | \n", - "852.572136 | \n", - "
max | \n", - "3.188992e+07 | \n", - "43.828000 | \n", - "978.011439 | \n", - "
\n", - " | year | \n", - "pop | \n", - "continent | \n", - "lifeExp | \n", - "gdpPercap | \n", - "
---|---|---|---|---|---|
country | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
Afghanistan | \n", - "1952 | \n", - "8425333.0 | \n", - "Asia | \n", - "28.801 | \n", - "779.445314 | \n", - "
Albania | \n", - "1952 | \n", - "1282697.0 | \n", - "Europe | \n", - "55.230 | \n", - "1601.056136 | \n", - "
Algeria | \n", - "1952 | \n", - "9279525.0 | \n", - "Africa | \n", - "43.077 | \n", - "2449.008185 | \n", - "
Angola | \n", - "1952 | \n", - "4232095.0 | \n", - "Africa | \n", - "30.015 | \n", - "3520.610273 | \n", - "
Argentina | \n", - "1952 | \n", - "17876956.0 | \n", - "Americas | \n", - "62.485 | \n", - "5911.315053 | \n", - "
\n", - " | continent | \n", - "gdpPercap | \n", - "lifeExp | \n", - "pop | \n", - "year | \n", - "
---|---|---|---|---|---|
216 | \n", - "Asia | \n", - "368.469286 | \n", - "39.417 | \n", - "4693836.0 | \n", - "1952 | \n", - "
217 | \n", - "Asia | \n", - "434.038336 | \n", - "41.366 | \n", - "5322536.0 | \n", - "1957 | \n", - "
218 | \n", - "Asia | \n", - "496.913648 | \n", - "43.415 | \n", - "6083619.0 | \n", - "1962 | \n", - "
219 | \n", - "Asia | \n", - "523.432314 | \n", - "45.415 | \n", - "6960067.0 | \n", - "1967 | \n", - "
220 | \n", - "Asia | \n", - "421.624026 | \n", - "40.317 | \n", - "7450606.0 | \n", - "1972 | \n", - "
221 | \n", - "Asia | \n", - "524.972183 | \n", - "31.220 | \n", - "6978607.0 | \n", - "1977 | \n", - "
222 | \n", - "Asia | \n", - "624.475478 | \n", - "50.957 | \n", - "7272485.0 | \n", - "1982 | \n", - "
223 | \n", - "Asia | \n", - "683.895573 | \n", - "53.914 | \n", - "8371791.0 | \n", - "1987 | \n", - "
224 | \n", - "Asia | \n", - "682.303175 | \n", - "55.803 | \n", - "10150094.0 | \n", - "1992 | \n", - "
225 | \n", - "Asia | \n", - "734.285170 | \n", - "56.534 | \n", - "11782962.0 | \n", - "1997 | \n", - "
226 | \n", - "Asia | \n", - "896.226015 | \n", - "56.752 | \n", - "12926707.0 | \n", - "2002 | \n", - "
227 | \n", - "Asia | \n", - "1713.778686 | \n", - "59.723 | \n", - "14131858.0 | \n", - "2007 | \n", - "
\n", - " | continent | \n", - "gdpPercap | \n", - "lifeExp | \n", - "pop | \n", - "year | \n", - "
---|---|---|---|---|---|
1284 | \n", - "Africa | \n", - "493.323875 | \n", - "40.000 | \n", - "2534927.0 | \n", - "1952 | \n", - "
1285 | \n", - "Africa | \n", - "540.289398 | \n", - "41.500 | \n", - "2822082.0 | \n", - "1957 | \n", - "
1286 | \n", - "Africa | \n", - "597.473073 | \n", - "43.000 | \n", - "3051242.0 | \n", - "1962 | \n", - "
1287 | \n", - "Africa | \n", - "510.963714 | \n", - "44.100 | \n", - "3451079.0 | \n", - "1967 | \n", - "
1288 | \n", - "Africa | \n", - "590.580664 | \n", - "44.600 | \n", - "3992121.0 | \n", - "1972 | \n", - "
1289 | \n", - "Africa | \n", - "670.080601 | \n", - "45.000 | \n", - "4657072.0 | \n", - "1977 | \n", - "
1290 | \n", - "Africa | \n", - "881.570647 | \n", - "46.218 | \n", - "5507565.0 | \n", - "1982 | \n", - "
1291 | \n", - "Africa | \n", - "847.991217 | \n", - "44.020 | \n", - "6349365.0 | \n", - "1987 | \n", - "
1292 | \n", - "Africa | \n", - "737.068595 | \n", - "23.599 | \n", - "7290203.0 | \n", - "1992 | \n", - "
1293 | \n", - "Africa | \n", - "589.944505 | \n", - "36.087 | \n", - "7212583.0 | \n", - "1997 | \n", - "
1294 | \n", - "Africa | \n", - "785.653765 | \n", - "43.413 | \n", - "7852401.0 | \n", - "2002 | \n", - "
1295 | \n", - "Africa | \n", - "863.088464 | \n", - "46.242 | \n", - "8860588.0 | \n", - "2007 | \n", - "
" - ], - "text/plain": [ - "Hi. Didn't receive your email... Gapminder comes from this R library: https://t.co/BU1IFIGSxm. I will add citation asap.
— R+Py Graph Galleries (@R_Graph_Gallery) October 16, 2017
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note one immediate outcome of our reaching out to the author of The Python Graph Gallery: he realized he was not citing the source of his data [3], and promised to add proper credit. _It's always good form to credit your sources!_\n", - "\n", - "We visited the online repository of the data source, and posted an [issue report](https://github.com/jennybc/gapminder/issues/18) there, with our questions about data quality. The author promptly responded, saying that _her_ source was the [Gapminder.org website](http://www.gapminder.org/data/)—**Gapminder** is the non-profit founded by Hans Rosling to host public data and visualizations. She also said: _\" I don't doubt there could be data quality problems! It should definitely NOT be used as an authoritative source for life expectancy\"_\n", - "\n", - "So it turns out that the data we're using comes from a set of tools meant for teaching, and is not up-to-date with the latest vital statistics. The author ended up [adding a warning](https://github.com/jennybc/gapminder/commit/7b3ac7f477c78f21865fa7defea20e72cb9e2b8a) to make this clear to visitors of the repository on GitHub. \n", - "\n", - "#### This is a wonderful example of how people collaborate online via the open-source model.\n", - "\n", - "##### Note:\n", - "\n", - "For the most accurate data, you can visit the website of the [World Bank](https://data.worldbank.org)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using widgets to visualize interactively\n", - "\n", - "One more thing! This whole exploration began with our viewing the 2006 TED Talk by Hans Rosling: [\"The best stats you've ever seen\"](https://www.ted.com/talks/hans_rosling_shows_the_best_stats_you_ve_ever_seen). One of the most effective parts of the presentation is seeing the _animated_ bubble chart, illustrating how countries became healthier and richer over time. Do you want to make something like that?\n", - "\n", - "You can! Introducing [Jupyter Widgets](https://ipywidgets.readthedocs.io/en/latest/user_guide.html). The magic of interactive widgets is that they tie together the running Python code in a Jupyter notebook with Javascript and HTML running in the browser. You can use widgets to build interactive controls on data visualizations, with buttons, sliders, and more.\n", - "\n", - "To use widgets, the first step is to import the `widgets` module." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from ipywidgets import widgets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After importing `widgets`, you have available several UI (User Interaction) elements. One of our favorites is a _Slider_: an interactive sliding button. Here is a default slider that takes integer values, from 0 to 100 (but does nothing):" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a5474f75f08400090ca199b832ec70b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "A Jupyter Widget" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "widgets.IntSlider()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What we'd like to do is make an interactive visualization of bubble charts, with the year in a slider, so that we can run forwards and backwards in time by sliding the button, watching our plot update the bubbles in real time. Sound like magic? It almost is.\n", - "\n", - "The magic happens when you program what should happen when the value in the slider changes. A typical scenario is having a function that is executed with the value in the slider, interactively. To create that, we need two things:\n", - "\n", - "1. A function that will be called with the slider values, and\n", - "2. A call to an _interaction_ function from the `ipywidgets` package.\n", - "\n", - "Several interaction functions are available, for different actions you expect from the user: a click, a text entered in a box, or sliding the button on a slider.\n", - "You will need to explore the Jupyter Widgets documentation [4] to learn more.\n", - "\n", - "For this example, we'll be using a slider, a plotting function that makes our bubble chart, and the [`.interact()`](http://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html#) function to call our plotting function with each value of the slider.\n", - "\n", - "We do everything in one cell below. The first line creates an integer-value slider with our known years—from a minimum 1952, to a maximum 2007, stepping by 5—and assigns it to the variable name `slider`.\n", - "\n", - "Next, we define the function `roslingplot()`, which re-calculates the array of population values, gets the year-group we need from the `by_year` _GroupBy_ object, and makes a scater plot of life expectancy vs. per-capita income, like we did above. The `populations` array (divided by 60,000) sets the size of the bubble, and the previously defined `colors` array sets the color coding by continent.\n", - "\n", - "We also removed the colorbar (which added little information), and added the option `sharex=False` following the workaround suggested by someone on the open [issue report](https://github.com/pandas-dev/pandas/issues/10611) for the plotting bug we mentioned above.\n", - "\n", - "The last line in the cell below is a call to `.interact()`, passing our plotting function and the slider value assigned to its argument, `year`. Watch the magic happen!" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bbc97145cc924ae3a765f37a10594f60", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "A Jupyter Widget" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "slider = widgets.IntSlider(min=1952, max=2007, step=5)\n", - "\n", - "def roslingplot(year):\n", - " populations = by_year.get_group(year)['pop'].values\n", - " \n", - " by_year.get_group(year).plot.scatter(figsize=(12,8), \n", - " x='gdpPercap', y='lifeExp', s=populations/60000, \n", - " c=colors, cmap='Accent',\n", - " title='Life expectancy vs per-capita GDP in the year '+ str(year)+'\\n',\n", - " logx = 'True',\n", - " ylim = (25,85),\n", - " xlim = (1e2, 1e5),\n", - " edgecolors=\"white\",\n", - " alpha=0.6,\n", - " colorbar=False,\n", - " sharex=False)\n", - " pyplot.show();\n", - " \n", - "widgets.interact(roslingplot, year=slider);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References\n", - "\n", - "1. [The Soviet War in Afghanistan, 1979-1989](https://www.theatlantic.com/photo/2014/08/the-soviet-war-in-afghanistan-1979-1989/100786/), The Atlantic (2014), by Alan Taylor.\n", - "\n", - "2. US National Research Council Roundtable on the Demography of Forced Migration; H.E. Reed, C.B. Keely, editors. Forced Migration & Mortality (2001), National Academies Press, Washington DC; Chapter 5: The Demographic Analysis of Mortality Crises: The Case of Cambodia, 1970-1979, Patrick Heuveline. Available at: https://www.ncbi.nlm.nih.gov/books/NBK223346/\n", - "\n", - "3. gapminder: Data from Gapminder (R data package), by Jennifer (Jenny) Bryan, repository at https://github.com/jennybc/gapminder, v0.3.0 (Version v0.3.0) on Zenodo: https://doi.org/10.5281/zenodo.594018, licensed CC-BY 3.0\n", - "\n", - "4. [Jupyter Widgets User Guide](https://ipywidgets.readthedocs.io/en/latest/user_guide.html)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "Hi. Didn't receive your email... Gapminder comes from this R library: https://t.co/BU1IFIGSxm. I will add citation asap.
— R+Py Graph Galleries (@R_Graph_Gallery) October 16, 2017