theorem1.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "import math\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setting random seed for numpy and tensorflow\n",
    "tf.set_random_seed(963)\n",
    "np.random.seed(963)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "sess = tf.InteractiveSession()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def weight_variable(shape):\n",
    "    initial = tf.random_normal(shape, stddev=1.0)\n",
    "    return tf.Variable(initial)\n",
    "\n",
    "def bias_vector(size):\n",
    "    initial = np.random.normal(0, 1.0, size)\n",
    "    return initial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Generate a series of input data points that match the condition in Theorem 1.\n",
    "\n",
    "def genPoints(size):\n",
    "    v = np.arange(1,size + 1,1)\n",
    "    vmag = np.linalg.norm(v)\n",
    "    vnorm = v / vmag\n",
    "    output = np.sqrt(size)*vnorm\n",
    "    return np.expand_dims(output,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n"
     ]
    }
   ],
   "source": [
    "#Create deep linear neural network struture with following parameters:\n",
    "\n",
    "N = 1000             # size of dataset\n",
    "FN = 1               # number of features in dataset\n",
    "d = 10               # number of nodes per layer\n",
    "numLayers = 1        # number of hidden layers\n",
    "x_dat = genPoints(N) # generate data pts\n",
    "\n",
    "W = []\n",
    "\n",
    "W.append(weight_variable([FN,d]))\n",
    "for i in range(0,numLayers):\n",
    "    W.append(weight_variable([d,d]))\n",
    "W.append(weight_variable([d,1]))\n",
    "\n",
    "x  = tf.placeholder(tf.float32, shape=[N,FN])\n",
    "y_ = tf.placeholder(tf.float32, shape=[N,1])\n",
    "\n",
    "y = tf.identity(tf.matmul(x,W[0]))\n",
    "\n",
    "for i in range(1, numLayers+2):\n",
    "    y = tf.identity(tf.matmul(y, W[i]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "\n",
    "train_loss = tf.reduce_sum(tf.math.scalar_mul((1/(2*N)),tf.square(y-y_)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate a function that can be represented by the network\n",
    "resolution = np.arange(0,2,0.002)\n",
    "resolution = np.expand_dims(resolution,1)\n",
    "sess.run(tf.global_variables_initializer())\n",
    "y_dat = y.eval(feed_dict={x: x_dat})\n",
    "resolution_y = y.eval(feed_dict={x: resolution})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate R (optimal weights) and find largest singular value in R\n",
    "M = []\n",
    "\n",
    "for i in range(0, numLayers+2):\n",
    "    M.append(W[i].eval(session=sess))\n",
    "\n",
    "R = M[0]\n",
    "for i in range(1, numLayers+2):\n",
    "    R = np.matmul(R,M[i])\n",
    "\n",
    "u, s, vh = np.linalg.svd(R, full_matrices=True)\n",
    "\n",
    "rLarge = s.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.046412347403755924\n"
     ]
    }
   ],
   "source": [
    "# calculate the upperbound on the step size shown in Theorem 1\n",
    "L = numLayers+2\n",
    "\n",
    "deltaUp = 2.0 / (L * (rLarge ** (2*(L-1)/L)))\n",
    "print(deltaUp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate a series of decreasing step sizes from the calculated upperbound.\n",
    "deltas = [deltaUp]\n",
    "sum = deltaUp\n",
    "for i in range(0, 19):\n",
    "    sum = sum - 0.005\n",
    "    if(sum > 0.005):\n",
    "        deltas.append(sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run the Gradient Descent Algorithm for 1000 independent random initializations with a maximum threshold\n",
    "# of 1000 iterations of GD. When the difference in gradient is less than the value 'tolerance', then the\n",
    "# run has converged to a solution.\n",
    "\n",
    "tolerance = 0.001\n",
    "numRuns = 1000\n",
    "maxIters = 1000\n",
    "for d in deltas:\n",
    "    numConverge = 0\n",
    "    for r in range(numRuns):\n",
    "        opt = tf.train.GradientDescentOptimizer(d)\n",
    "        train_step = opt.minimize(train_loss)\n",
    "        sess.run(tf.global_variables_initializer())\n",
    "        grads_and_vars = opt.compute_gradients(train_loss)\n",
    "        grad_norms = [tf.nn.l2_loss(g) for g, v in grads_and_vars]\n",
    "        grad_norm = tf.add_n(grad_norms)\n",
    "        loss,oldgr = sess.run([train_loss,grad_norm],feed_dict={x: x_dat, y_: y_dat})\n",
    "        for i in range(maxIters):\n",
    "            if math.isnan(loss) or math.isinf(loss):\n",
    "                break\n",
    "            if math.isnan(oldgr) or math.isinf(oldgr):\n",
    "                break\n",
    "            train_step.run(feed_dict={x: x_dat, y_: y_dat})\n",
    "            loss,newgr = sess.run([train_loss,grad_norm],feed_dict={x: x_dat, y_: y_dat})\n",
    "            if abs(newgr - oldgr) < tolerance:\n",
    "                numConverge = numConverge + 1\n",
    "                break\n",
    "            oldgr = newgr\n",
    "    print(\"delta[\"+ str(d) +\"]: percentage: \" + str(numConverge) + \"/\" + str(numRuns))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import tensorflow as tf\n",
	"import matplotlib.pyplot as plt\n",
	"import sklearn\n",
	"import math\n",
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Setting random seed for numpy and tensorflow\n",
	"tf.set_random_seed(963)\n",
	"np.random.seed(963)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"sess = tf.InteractiveSession()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"def weight_variable(shape):\n",
	" initial = tf.random_normal(shape, stddev=1.0)\n",
	" return tf.Variable(initial)\n",
	"\n",
	"def bias_vector(size):\n",
	" initial = np.random.normal(0, 1.0, size)\n",
	" return initial"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"#Generate a series of input data points that match the condition in Theorem 1.\n",
	"\n",
	"def genPoints(size):\n",
	" v = np.arange(1,size + 1,1)\n",
	" vmag = np.linalg.norm(v)\n",
	" vnorm = v / vmag\n",
	" output = np.sqrt(size)*vnorm\n",
	" return np.expand_dims(output,1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
	"Instructions for updating:\n",
	"Colocations handled automatically by placer.\n"
	]
	}
	],
	"source": [
	"#Create deep linear neural network struture with following parameters:\n",
	"\n",
	"N = 1000 # size of dataset\n",
	"FN = 1 # number of features in dataset\n",
	"d = 10 # number of nodes per layer\n",
	"numLayers = 1 # number of hidden layers\n",
	"x_dat = genPoints(N) # generate data pts\n",
	"\n",
	"W = []\n",
	"\n",
	"W.append(weight_variable([FN,d]))\n",
	"for i in range(0,numLayers):\n",
	" W.append(weight_variable([d,d]))\n",
	"W.append(weight_variable([d,1]))\n",
	"\n",
	"x = tf.placeholder(tf.float32, shape=[N,FN])\n",
	"y_ = tf.placeholder(tf.float32, shape=[N,1])\n",
	"\n",
	"y = tf.identity(tf.matmul(x,W[0]))\n",
	"\n",
	"for i in range(1, numLayers+2):\n",
	" y = tf.identity(tf.matmul(y, W[i]))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"#\n",
	"\n",
	"train_loss = tf.reduce_sum(tf.math.scalar_mul((1/(2*N)),tf.square(y-y_)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"# generate a function that can be represented by the network\n",
	"resolution = np.arange(0,2,0.002)\n",
	"resolution = np.expand_dims(resolution,1)\n",
	"sess.run(tf.global_variables_initializer())\n",
	"y_dat = y.eval(feed_dict={x: x_dat})\n",
	"resolution_y = y.eval(feed_dict={x: resolution})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"# generate R (optimal weights) and find largest singular value in R\n",
	"M = []\n",
	"\n",
	"for i in range(0, numLayers+2):\n",
	" M.append(W[i].eval(session=sess))\n",
	"\n",
	"R = M[0]\n",
	"for i in range(1, numLayers+2):\n",
	" R = np.matmul(R,M[i])\n",
	"\n",
	"u, s, vh = np.linalg.svd(R, full_matrices=True)\n",
	"\n",
	"rLarge = s.max()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.046412347403755924\n"
	]
	}
	],
	"source": [
	"# calculate the upperbound on the step size shown in Theorem 1\n",
	"L = numLayers+2\n",
	"\n",
	"deltaUp = 2.0 / (L * (rLarge ** (2*(L-1)/L)))\n",
	"print(deltaUp)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Generate a series of decreasing step sizes from the calculated upperbound.\n",
	"deltas = [deltaUp]\n",
	"sum = deltaUp\n",
	"for i in range(0, 19):\n",
	" sum = sum - 0.005\n",
	" if(sum > 0.005):\n",
	" deltas.append(sum)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Run the Gradient Descent Algorithm for 1000 independent random initializations with a maximum threshold\n",
	"# of 1000 iterations of GD. When the difference in gradient is less than the value 'tolerance', then the\n",
	"# run has converged to a solution.\n",
	"\n",
	"tolerance = 0.001\n",
	"numRuns = 1000\n",
	"maxIters = 1000\n",
	"for d in deltas:\n",
	" numConverge = 0\n",
	" for r in range(numRuns):\n",
	" opt = tf.train.GradientDescentOptimizer(d)\n",
	" train_step = opt.minimize(train_loss)\n",
	" sess.run(tf.global_variables_initializer())\n",
	" grads_and_vars = opt.compute_gradients(train_loss)\n",
	" grad_norms = [tf.nn.l2_loss(g) for g, v in grads_and_vars]\n",
	" grad_norm = tf.add_n(grad_norms)\n",
	" loss,oldgr = sess.run([train_loss,grad_norm],feed_dict={x: x_dat, y_: y_dat})\n",
	" for i in range(maxIters):\n",
	" if math.isnan(loss) or math.isinf(loss):\n",
	" break\n",
	" if math.isnan(oldgr) or math.isinf(oldgr):\n",
	" break\n",
	" train_step.run(feed_dict={x: x_dat, y_: y_dat})\n",
	" loss,newgr = sess.run([train_loss,grad_norm],feed_dict={x: x_dat, y_: y_dat})\n",
	" if abs(newgr - oldgr) < tolerance:\n",
	" numConverge = numConverge + 1\n",
	" break\n",
	" oldgr = newgr\n",
	" print(\"delta[\"+ str(d) +\"]: percentage: \" + str(numConverge) + \"/\" + str(numRuns))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}