theorem4.py

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

tf.set_random_seed(962) #random seed for consistency
np.random.seed(962)
sess = tf.InteractiveSession()

def weight_variable(shape):
    initial = tf.random_normal(shape, stddev=1.0)
    return tf.Variable(initial)

def bias_vector(size):
    initial = np.random.normal(0, 1.0, size)
    return initial


#Start here
# intilize the tensorflow training set
x_dat = np.arange(0,1,0.001)
x_dat = np.expand_dims(x_dat,1)
N = len(x_dat)
d = 20  #number of nodes in the hidden layer
W = weight_variable([d,1])
V = weight_variable([1,d])
b = bias_vector([1,d])
x  = tf.placeholder(tf.float32, shape=[N,1])
y_ = tf.placeholder(tf.float32, shape=[N,1])
y = tf.matmul(tf.nn.relu(tf.matmul(x,V)-b),W)
train_loss = tf.reduce_sum(tf.square(y-y_))
sess.run(tf.global_variables_initializer())
y_dat = y.eval(feed_dict={x: x_dat})

#calculate the bounds from the paper for the optimal solution
bound = 1/max([(abs(x_dat[i])*abs(y_dat[i])) for i in range(N)])
bound = bound[0]

#Start here, can be set to bound
n = 10          #enter the number of datapoints
nstart = .117  #the higher of the step size
nend = .115   #the lower of the step size
r = 1000      #random iterations

#initilize the memory of the results
sums = [[0.0 for i in range(r)] for j in range(n)]
delta = [nstart+((i/(n-1))*(nend-nstart)) for i in range(n)]
conv = [[0 for i in range(r)] for j in range(n)]
conv2 = [[0 for i in range(r)] for j in range(n)]
#iterate over all the random initlizations
for rand_iter in range(r):
    print(rand_iter) #Print the intialization to see how far the program has run
    for step_iter in range(n): #iterate over all the step sizes

        #Set the step size
        delta_small = delta[step_iter]
        #random weight initialization
        train_step = tf.train.GradientDescentOptimizer(delta_small).minimize(train_loss)
        sess.run(tf.global_variables_initializer())
        #Run the first time to get gradient
        z3_dat = sess.run(y, feed_dict={x: x_dat})
        gradim1 = 0
        gradi = 1000000 #arbitratraily high
        gradip1 = 10000000 #arbitratraily high
        #iterate up to 1000 times
        for g in range(1001):
            train_step.run(feed_dict={x: x_dat, y_: y_dat})
            z4_dat = sess.run(y, feed_dict={x: x_dat}) #store the results
            gradip1 = abs(z3_dat[0]-z4_dat[0])/delta_small # calculate the gradient
            #Check if the gradient has converged
            if(abs(abs(gradim1-gradi) - abs(gradi-gradip1)))<.001:
                conv2[step_iter][rand_iter]= 1
                break
            #update
            gradim1 = gradi
            gradi = gradip1
            z3_dat = z4_dat
        #record error from intened solution
        sums[step_iter][rand_iter] = sum([(y_dat[j]-z3_dat[j])**2 for j in range(len(z3_dat))])
        #If the function is within 100 of the intended solution, Layapunov stable convergence!
        if (sums[step_iter][rand_iter] <1000):
            conv[step_iter][rand_iter]= 1

#percent gradient convergence
perc = [sum(i)/(r) for i in conv]
#percent solution convergence
perc2 = [sum(i)/(r) for i in conv2]
plt.plot(delta,perc2)
plt.xlabel("Step Size")
plt.ylabel("Convergence by Gradient Change Percentage")
plt.show()

plt.plot(delta,perc)
plt.xlabel("Step Size")
plt.ylabel("Convergence by Accuracy Percentage")
plt.show()
	import numpy as np
	import matplotlib.pyplot as plt
	import tensorflow as tf

	tf.set_random_seed(962) #random seed for consistency
	np.random.seed(962)
	sess = tf.InteractiveSession()

	def weight_variable(shape):
	initial = tf.random_normal(shape, stddev=1.0)
	return tf.Variable(initial)

	def bias_vector(size):
	initial = np.random.normal(0, 1.0, size)
	return initial


	#Start here
	# intilize the tensorflow training set
	x_dat = np.arange(0,1,0.001)
	x_dat = np.expand_dims(x_dat,1)
	N = len(x_dat)
	d = 20 #number of nodes in the hidden layer
	W = weight_variable([d,1])
	V = weight_variable([1,d])
	b = bias_vector([1,d])
	x = tf.placeholder(tf.float32, shape=[N,1])
	y_ = tf.placeholder(tf.float32, shape=[N,1])
	y = tf.matmul(tf.nn.relu(tf.matmul(x,V)-b),W)
	train_loss = tf.reduce_sum(tf.square(y-y_))
	sess.run(tf.global_variables_initializer())
	y_dat = y.eval(feed_dict={x: x_dat})

	#calculate the bounds from the paper for the optimal solution
	bound = 1/max([(abs(x_dat[i])*abs(y_dat[i])) for i in range(N)])
	bound = bound[0]

	#Start here, can be set to bound
	n = 10 #enter the number of datapoints
	nstart = .117 #the higher of the step size
	nend = .115 #the lower of the step size
	r = 1000 #random iterations

	#initilize the memory of the results
	sums = [[0.0 for i in range(r)] for j in range(n)]
	delta = [nstart+((i/(n-1))*(nend-nstart)) for i in range(n)]
	conv = [[0 for i in range(r)] for j in range(n)]
	conv2 = [[0 for i in range(r)] for j in range(n)]
	#iterate over all the random initlizations
	for rand_iter in range(r):
	print(rand_iter) #Print the intialization to see how far the program has run
	for step_iter in range(n): #iterate over all the step sizes

	#Set the step size
	delta_small = delta[step_iter]
	#random weight initialization
	train_step = tf.train.GradientDescentOptimizer(delta_small).minimize(train_loss)
	sess.run(tf.global_variables_initializer())
	#Run the first time to get gradient
	z3_dat = sess.run(y, feed_dict={x: x_dat})
	gradim1 = 0
	gradi = 1000000 #arbitratraily high
	gradip1 = 10000000 #arbitratraily high
	#iterate up to 1000 times
	for g in range(1001):
	train_step.run(feed_dict={x: x_dat, y_: y_dat})
	z4_dat = sess.run(y, feed_dict={x: x_dat}) #store the results
	gradip1 = abs(z3_dat[0]-z4_dat[0])/delta_small # calculate the gradient
	#Check if the gradient has converged
	if(abs(abs(gradim1-gradi) - abs(gradi-gradip1)))<.001:
	conv2[step_iter][rand_iter]= 1
	break
	#update
	gradim1 = gradi
	gradi = gradip1
	z3_dat = z4_dat
	#record error from intened solution
	sums[step_iter][rand_iter] = sum([(y_dat[j]-z3_dat[j])**2 for j in range(len(z3_dat))])
	#If the function is within 100 of the intended solution, Layapunov stable convergence!
	if (sums[step_iter][rand_iter] <1000):
	conv[step_iter][rand_iter]= 1

	#percent gradient convergence
	perc = [sum(i)/(r) for i in conv]
	#percent solution convergence
	perc2 = [sum(i)/(r) for i in conv2]
	plt.plot(delta,perc2)
	plt.xlabel("Step Size")
	plt.ylabel("Convergence by Gradient Change Percentage")
	plt.show()

	plt.plot(delta,perc)
	plt.xlabel("Step Size")
	plt.ylabel("Convergence by Accuracy Percentage")
	plt.show()