diff --git a/lecture_17/in-class_regression.pdf b/lecture_17/in-class_regression.pdf new file mode 100644 index 0000000..628b495 Binary files /dev/null and b/lecture_17/in-class_regression.pdf differ diff --git a/lecture_17/lecture_17.ipynb b/lecture_17/lecture_17.ipynb index 3159c42..35e61da 100644 --- a/lecture_17/lecture_17.ipynb +++ b/lecture_17/lecture_17.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 65, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 5, "metadata": { "collapsed": true }, @@ -2218,7 +2218,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -2385,7 +2385,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -2409,7 +2409,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 8, "metadata": { "collapsed": false }, diff --git a/lecture_18/.Newtint.m.swp b/lecture_18/.Newtint.m.swp new file mode 100644 index 0000000..769fe2b Binary files /dev/null and b/lecture_18/.Newtint.m.swp differ diff --git a/lecture_18/.ipynb_checkpoints/lecture_18-checkpoint.ipynb b/lecture_18/.ipynb_checkpoints/lecture_18-checkpoint.ipynb new file mode 100644 index 0000000..2fd6442 --- /dev/null +++ b/lecture_18/.ipynb_checkpoints/lecture_18-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lecture_18/Newtint.m b/lecture_18/Newtint.m new file mode 100644 index 0000000..93e77f6 --- /dev/null +++ b/lecture_18/Newtint.m @@ -0,0 +1,33 @@ +function yint = Newtint_bak(x,y,xx) +% Newtint: Newton interpolating polynomial +% yint = Newtint(x,y,xx): Uses an (n - 1)-order Newton +% interpolating polynomial based on n data points (x, y) +% to determine a value of the dependent variable (yint) +% at a given value of the independent variable, xx. +% input: +% x = independent variable +% y = dependent variable +% xx = value of independent variable at which +% interpolation is calculated +% output: +% yint = interpolated value of dependent variable + +% compute the finite divided differences in the form of a +% difference table +n = length(x); +if length(y)~=n, error('x and y must be same length'); end +b = zeros(n,n); +% assign dependent variables to the first column of b. +b(:,1) = y(:); % the (:) ensures that y is a column vector. +for j = 2:n + for i = 1:n-j+1 + b(i,j) = (b(i+1,j-1)-b(i,j-1))/(x(i+j-1)-x(i)); + end +end +% use the finite divided differences to interpolate +xt = 1; +yint = b(1,1); +for j = 1:n-1 + xt = xt*(xx-x(j)); + yint = yint+b(1,j+1)*xt; +end diff --git a/lecture_18/challenger_oring.csv b/lecture_18/challenger_oring.csv new file mode 100644 index 0000000..11d647e --- /dev/null +++ b/lecture_18/challenger_oring.csv @@ -0,0 +1,24 @@ +Flight#,Temp,O-Ring Problem +1,53,1 +2,57,1 +3,58,1 +4,63,1 +5,66,0 +6,66.8,0 +7,67,0 +8,67.2,0 +9,68,0 +10,69,0 +11,69.8,1 +12,69.8,0 +13,70.2,1 +14,70.2,0 +15,72,0 +16,73,0 +17,75,0 +18,75,1 +19,75.8,0 +20,76.2,0 +21,78,0 +22,79,0 +23,81,0 diff --git a/lecture_18/lecture_18.ipynb b/lecture_18/lecture_18.ipynb new file mode 100644 index 0000000..10a9d0a --- /dev/null +++ b/lecture_18/lecture_18.ipynb @@ -0,0 +1,1882 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "setdefaults" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%plot --format svg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Nonlinear Regression\n", + "\n", + "We can define any function and minimize the sum of squares error even if the constants cannot be separated.\n", + "\n", + "$S_{r}=\\left[y-f(z_{1},z_{2},...)\\right]^{2}$\n", + "\n", + "Consider the function, \n", + "\n", + "$f(x) = a_{0}(1-e^{a_{1}x})$\n", + "\n", + "We can define the sum of squares error as a function of $a_{0}$ and $a_{1}$:\n", + "\n", + "$f_{SSE}(a_{0},a_{1})=\\sum_{i=1}^{n}\\left[y- a_{0}(1-e^{a_{1}x})\\right]^{2}$" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "function [SSE,yhat] = sse_nonlin_exp(a,x,y)\n", + " % This is a sum of squares error function based on \n", + " % the two input constants a0 and a1 where a=[a0,a1]\n", + " % and the data is x (independent), y (dependent)\n", + " % and yhat is the model with the given a0 and a1 values\n", + " a0=a(1);\n", + " a1=a(2);\n", + " yhat=a0*(1-exp(a1*x));\n", + " SSE=sum((y-a0*(1-exp(a1*x))).^2);\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Where the data we are fitting is:\n", + "\n", + "| x | y |\n", + "|---|---|\n", + " | 0.0 | 0.41213|\n", + " | 1.0 | -2.65190|\n", + " | 2.0 | 15.04049|\n", + " | 3.0 | 5.19368|\n", + " | 4.0 | -0.71086|\n", + " | 5.0 | 12.69008|\n", + " | 6.0 | 29.20309|\n", + " | 7.0 | 58.68879|\n", + " | 8.0 | 91.61117|\n", + " | 9.0 | 173.75492|\n", + " | 10.0 | 259.04083|" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "data=[\n", + " 0.00000 0.41213\n", + " 1.00000 -2.65190\n", + " 2.00000 15.04049\n", + " 3.00000 5.19368\n", + " 4.00000 -0.71086\n", + " 5.00000 12.69008\n", + " 6.00000 29.20309\n", + " 7.00000 58.68879\n", + " 8.00000 91.61117\n", + " 9.00000 173.75492\n", + " 10.00000 259.04083\n", + "\n", + "];\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the sum of squares for a0=-2.00 and a1=0.20 is 98118.4\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t-50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t100\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t150\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t200\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t250\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t300\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t6\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t8\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t10\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "[SSE,yhat]=sse_nonlin_exp([-2,0.2],data(:,1),data(:,2));\n", + "fprintf('the sum of squares for a0=%1.2f and a1=%1.2f is %1.1f',...\n", + "-2,0.2,SSE)\n", + "plot(data(:,1),data(:,2),'o',data(:,1),yhat)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a =\n", + "\n", + " -1.71891 0.50449\n", + "\n", + "fsse = 633.70\n" + ] + } + ], + "source": [ + "[a,fsse]=fminsearch(@(a) sse_nonlin_exp(a,data(:,1),data(:,2)),[-2,0.2])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t-50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t100\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t150\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t200\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t250\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t300\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t6\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t8\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t10\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "[sse,yhat]=sse_nonlin_exp(a,data(:,1),data(:,2));\n", + "plot(data(:,1),data(:,2),'o',data(:,1),yhat)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Case Study: Logistic Regression\n", + "\n", + "Many times the variable you predict is a binary (or discrete) value, such as pass/fail, broken/not-broken, etc. \n", + "\n", + "One method to fit this type of data is called [**logistic regression**](https://en.wikipedia.org/wiki/Logistic_regression).\n", + "\n", + "[Logistic Regression link 2](http://www.holehouse.org/mlclass/06_Logistic_Regression.html)\n", + "\n", + "We use a function that varies from 0 to 1 called a logistic function:\n", + "\n", + "$\\sigma(t)=\\frac{1}{1+e^{-t}}$\n", + "\n", + "We can use this function to describe the likelihood of failure (1) or success (0). When z=0, the probability of failure is 50%. " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.6\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.8\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t-10\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t-5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t10\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "t=linspace(-10,10);\n", + "sigma=@(t) 1./(1+exp(-t));\n", + "plot(t,sigma(t))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we make the assumption that we can predict the boundary between the pass-fail criteria with a function of our independent variable e.g.\n", + "\n", + "$y=\\left\\{\\begin{array}{cc} \n", + "1 & a_{0}+a_{1}x +\\epsilon >0 \\\\\n", + "0 & else \\end{array} \\right\\}$\n", + "\n", + "so the logistic function is now:\n", + "\n", + "$\\sigma(x)=\\frac{1}{1+e^{-(a_{0}+a_{1}x)}}$\n", + "\n", + "Here, there is not a direct sum of squares error, so we minimize a cost function: \n", + "\n", + "$J(a_{0},a_{1})=\\sum_{i=1}^{n}\\left[-y_{i}\\log(\\sigma(x_{i}))-(1-y_{i})\\log((1-\\sigma(x_{i})))\\right]$\n", + "\n", + "y=0,1 \n", + "\n", + "So the cost function either sums the " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example: Challenger O-ring failures\n", + "\n", + "The O-rings on the Challenger shuttles had problems when temperatures became low. We can look at the conditions when damage was observed to determine the likelihood of failure. \n", + "\n", + "[Challenger O-ring data powerpoint](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0ahUKEwjZvL7jkP3SAhUp04MKHXkXDkMQFggcMAA&url=http%3A%2F%2Fwww.stat.ufl.edu%2F~winner%2Fcases%2Fchallenger.ppt&usg=AFQjCNFyjwT7NmRthDkDEgch75Fc5dc66w&sig2=_qeteX6-ZEBwPW8SZN1mIA)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "oring =\n", + "\n", + " 1.00000 53.00000 1.00000\n", + " 2.00000 57.00000 1.00000\n", + " 3.00000 58.00000 1.00000\n", + " 4.00000 63.00000 1.00000\n", + " 5.00000 66.00000 0.00000\n", + " 6.00000 66.80000 0.00000\n", + " 7.00000 67.00000 0.00000\n", + " 8.00000 67.20000 0.00000\n", + " 9.00000 68.00000 0.00000\n", + " 10.00000 69.00000 0.00000\n", + " 11.00000 69.80000 1.00000\n", + " 12.00000 69.80000 0.00000\n", + " 13.00000 70.20000 1.00000\n", + " 14.00000 70.20000 0.00000\n", + " 15.00000 72.00000 0.00000\n", + " 16.00000 73.00000 0.00000\n", + " 17.00000 75.00000 0.00000\n", + " 18.00000 75.00000 1.00000\n", + " 19.00000 75.80000 0.00000\n", + " 20.00000 76.20000 0.00000\n", + " 21.00000 78.00000 0.00000\n", + " 22.00000 79.00000 0.00000\n", + " 23.00000 81.00000 0.00000\n", + "\n" + ] + } + ], + "source": [ + "% read data from csv file \n", + "% col 1 = index\n", + "% col 2 = temperature\n", + "% col 3 = 1 if damaged, 0 if undamaged\n", + "oring=dlmread('challenger_oring.csv',',',1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.6\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.8\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t55\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t60\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t65\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t70\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t75\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t80\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t85\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(oring(:,2),oring(:,3),'o')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "function J=sse_logistic(a,x,y)\n", + " % Create function to calculate SSE of logistic function\n", + " % t = a0+a1*x\n", + " % sigma(t) = 1./(1+e^(-t))\n", + " sigma=@(t) 1./(1+exp(-t));\n", + " a0=a(1);\n", + " a1=a(2);\n", + " t=a0+a1*x;\n", + " J = 1/length(x)*sum(-y.*log(sigma(t))-(1-y).*log(1-sigma(t)));\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "J = 0.88822\n", + "a =\n", + "\n", + " 15.03501 -0.23205\n", + "\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.6\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.8\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t50\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t55\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t60\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t65\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t70\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t75\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t80\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t85\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "J=sse_logistic([10,-0.2],oring(:,2),oring(:,3))\n", + "a=fminsearch(@(a) sse_logistic(a,oring(:,2),oring(:,3)),[0,-3])\n", + "\n", + "T=linspace(50,85);\n", + "plot(oring(:,2),oring(:,3),'o',T,sigma(a(1)+a(2)*T))" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "probability of failure when 70 degrees is 23.00% \n", + "probability of failure when 60 degrees is 75.25%\n" + ] + } + ], + "source": [ + "fprintf('probability of failure when 70 degrees is %1.2f%% ',100*sigma(a(1)+a(2)*70))\n", + "fprintf('probability of failure when 60 degrees is %1.2f%%',100*sigma(a(1)+a(2)*60))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interpolation\n", + "\n", + "Using regression (linear and nonlinear) you are faced with the problem, that you have lots of noisy data and you want to fit a physical model to it. \n", + "\n", + "You can use interpolation to solve the opposite problem, you have a little data with very little noise.\n", + "\n", + "## Linear interpolation\n", + "\n", + "If you are trying to find the value of f(x) for x between $x_{1}$ and $x_{2}$, then you can match the slopes\n", + "\n", + "$\\frac{f(x)-f(x_{1})}{x-x_{1}}=\\frac{f(x_{2})-f(x_{1})}{x_{2}-x_{1}}$\n", + "\n", + "or\n", + "\n", + "$f(x)=f(x_{1})+(x-x_{1})\\frac{f(x_{2})-f(x_{1})}{x_{2}-x_{1}}$\n", + "\n", + "### Example: Logarithms\n", + "\n", + "Engineers used to have to use interpolation in logarithm tables for calculations. Find ln(2) from \n", + "\n", + "a. ln(1) and ln(6)\n", + "\n", + "b. ln(1) and ln(4)\n", + "\n", + "c. just calculate it as ln(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ln(2)~0.358352\n", + "ln(2)~0.462098\n", + "ln(2)=0.693147\n" + ] + } + ], + "source": [ + "ln2_16=log(1)+(log(6)-log(1))/(6-1)*(2-1);\n", + "fprintf('ln(2)~%f\\n',ln2_16)\n", + "ln2_14=log(1)+(log(4)-log(1))/(4-1)*(2-1);\n", + "fprintf('ln(2)~%f\\n',ln2_14)\n", + "ln2=log(2);\n", + "fprintf('ln(2)=%f\\n',ln2)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t3\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t6\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t \n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_3a\n", + "\n", + "\t\t \n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_4a\n", + "\n", + "\t\t \n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x=linspace(1,6);\n", + "plot(x,log(x),2,log(2),'*',...\n", + "[1,2,6],[log(1),ln2_16,log(6)],'o-',...\n", + "[1,2,4],[log(1),ln2_14,log(4)],'s-')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quadratic interpolation (intro curvature)\n", + "\n", + "Assume function is parabola between 3 points. The function is can be written as:\n", + "\n", + "$f_{2}(x)=b_{1}+b_{2}(x-x_{1})+b_{3}(x-x_{1})(x-x_{2})$\n", + "\n", + "When $x=x_{1}$\n", + "\n", + "$f(x_{1})=b_{1}$\n", + "\n", + "when $x=x_{2}$\n", + "\n", + "$b_{2}=\\frac{f(x_{2})-f(x_{1})}{x_{2}-x_{1}}$\n", + "\n", + "when $x=x_{3}$\n", + "\n", + "$b_{3}=\\frac{\\frac{f(x_{3})-f(x_{2})}{x_{3}-x_{2}}\n", + "-\\frac{f(x_{2})-f(x_{1})}{x_{2}-x_{1}}}{x_{3}-x_{1}}$\n", + "\n", + "#### Reexamining the ln(2) with ln(1), ln(4), and ln(6):" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b1 = 0\n", + "b2 = 0.46210\n", + "b3 = -0.051873\n" + ] + } + ], + "source": [ + "x1=1;\n", + "x2=4;\n", + "x3=6;\n", + "f1=log(x1);\n", + "f2=log(x2);\n", + "f3=log(x3);\n", + "\n", + "b1=f1\n", + "b2=(f2-f1)/(x2-x1)\n", + "b3=(f3-f2)/(x3-x2)-b2;\n", + "b3=b3/(x3-x1)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t3\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t6\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t \n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_3a\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_4a\n", + "\n", + "\t\n", + "\t\n", + "\tgnuplot_plot_5a\n", + "\n", + "\t \n", + "\t\n", + "\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x=linspace(1,6);\n", + "f=@(x) b1+b2*(x-x1)+b3*(x-x1).*(x-x2);\n", + "plot(x,log(x),2,log(2),'*',...\n", + "[1,4,6],[log(1),log(4),log(6)],'ro',...\n", + "x,f(x),'r-',2,f(2),'s')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Newton's Interpolating Polynomials\n", + "\n", + "For n-data points, we can fit an (n-1)th-polynomial\n", + "\n", + "$f_{n-1}(x)=b_{1}+b_{2}(x-x_{1})+\\cdots+b_{n}(x-x_{1})(x-x_{2})\\cdots(x-x_{n})$\n", + "\n", + "where \n", + "\n", + "$b_{1}=f(x_{1})$\n", + "\n", + "$b_{2}=\\frac{f(x_{2})-f(x_{1})}{x_{2}-x_{1}}$\n", + "\n", + "$b_{3}=\\frac{\\frac{f(x_{3})-f(x_{2})}{x_{3}-x_{2}}\n", + "-b_{2}}{x_{3}-x_{1}}$\n", + "\n", + "$\\vdots$\n", + "\n", + "$b_{n}=f(x_{n},x_{n-1},...,x_{2},x_{1})\n", + "=\\frac{f(x_{n},x_{n-1},...x_{2})-f(x_{n-1},x_{n-2},...,x_{1})}{x_{n}-x_{1}}$\n", + "\n", + "**e.g. for 4 data points:**\n", + "\n", + "![Newton Interpolation Iterations](newton_interpolation.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ln(2)=0.693147\n", + "ln(2)~0.722462\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "Gnuplot\n", + "Produced by GNUPLOT 5.0 patchlevel 3 \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\t\n", + "\t \n", + "\t \n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\t\n", + "\t\t-1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t-0.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1.5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t0\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t1\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t2\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t3\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t4\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t5\n", + "\t\n", + "\n", + "\n", + "\t\t\n", + "\t\t6\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\tgnuplot_plot_1a\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\tgnuplot_plot_2a\n", + "\n", + "\t \n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_3a\n", + "\n", + "\t \n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "\t\n", + "\tgnuplot_plot_4a\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "yy=zeros(size(xx));\n", + "x=[0.5,1,3,4,5,6]; % define independent var's\n", + "y=log(x); % define dependent var's\n", + "xx=linspace(min(x),max(x));\n", + "for i=1:length(xx)\n", + " yy(i)=Newtint(x,y,xx(i));\n", + "end\n", + "plot(xx,log(xx),2,log(2),'*',...\n", + "x,y,'ro',...\n", + "xx,yy,'r-')\n", + "\n", + "fprintf('ln(2)=%f',log(2))\n", + "fprintf('ln(2)~%f',Newtint(x,y,2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Octave", + "language": "octave", + "name": "octave" + }, + "language_info": { + "file_extension": ".m", + "help_links": [ + { + "text": "MetaKernel Magics", + "url": "https://github.com/calysto/metakernel/blob/master/metakernel/magics/README.md" + } + ], + "mimetype": "text/x-octave", + "name": "octave", + "version": "0.19.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lecture_18/newton_interpolation.png b/lecture_18/newton_interpolation.png new file mode 100644 index 0000000..5990cb5 Binary files /dev/null and b/lecture_18/newton_interpolation.png differ