274 lines
No EOL
8.5 KiB
Python
Executable file
274 lines
No EOL
8.5 KiB
Python
Executable file
import numpy as np
|
|
import time
|
|
import pdb
|
|
|
|
from matplotlib import pyplot
|
|
|
|
# save theta to p5_params.npz that can be used by easynn
|
|
def save_theta(theta):
|
|
f1_W, f1_b, f2_W, f2_b = theta
|
|
|
|
np.savez_compressed("p5_params.npz", **{
|
|
"f1.weight": f1_W,
|
|
"f1.bias": f1_b,
|
|
"f2.weight": f2_W,
|
|
"f2.bias": f2_b
|
|
})
|
|
|
|
|
|
# initialize theta using uniform distribution [-bound, bound]
|
|
# return theta as (f1_W, f1_b, f2_W, f2_b)
|
|
def initialize_theta(bound):
|
|
f1_W = np.random.uniform(-bound, bound, (32, 784))
|
|
f1_b = np.random.uniform(-bound, bound, 32)
|
|
f2_W = np.random.uniform(-bound, bound, (10, 32))
|
|
f2_b = np.random.uniform(-bound, bound, 10)
|
|
return (f1_W, f1_b, f2_W, f2_b)
|
|
|
|
|
|
# forward:
|
|
# x = Flatten(images)
|
|
# g = Linear_f1(x)
|
|
# h = ReLU(g)
|
|
# z = Linear_f2(h)
|
|
# return (z, h, g, x)
|
|
def forward(images, theta):
|
|
# number of samples
|
|
N = images.shape[0]
|
|
|
|
# unpack theta into f1 and f2
|
|
f1_W, f1_b, f2_W, f2_b = theta
|
|
|
|
# x = Flatten(images)
|
|
x = images.astype(float).transpose(0,3,1,2).reshape((N, -1))
|
|
|
|
# g = Linear_f1(x)
|
|
g = np.zeros((N, f1_b.shape[0]))
|
|
for i in range(N):
|
|
g[i, :] = np.matmul(f1_W, x[i])+f1_b
|
|
|
|
# h = ReLU(g)
|
|
h = g*(g > 0)
|
|
|
|
# z = Linear_f2(h)
|
|
z = np.zeros((N, f2_b.shape[0]))
|
|
for i in range(N):
|
|
z[i, :] = np.matmul(f2_W, h[i])+f2_b
|
|
|
|
return (z, h, g, x)
|
|
|
|
|
|
# backprop:
|
|
# J = cross entropy between labels and softmax(z)
|
|
# return nabla_J
|
|
def backprop(labels, theta, z, h, g, x):
|
|
# number of samples
|
|
N = labels.shape[0]
|
|
|
|
# unpack theta into f1 and f2
|
|
f1_W, f1_b, f2_W, f2_b = theta
|
|
|
|
# nabla_J consists of partial J to partial f1_W, f1_b, f2_W, f2_b
|
|
p_f1_W = np.zeros(f1_W.shape)
|
|
p_f1_b = np.zeros(f1_b.shape)
|
|
p_f2_W = np.zeros(f2_W.shape)
|
|
p_f2_b = np.zeros(f2_b.shape)
|
|
|
|
for i in range(N):
|
|
# compute the contribution to nabla_J for sample i
|
|
|
|
# cross entropy and softmax
|
|
# compute partial J to partial z[i]
|
|
# scale by 1/N for averaging
|
|
expz = np.exp(z[i]-max(z[i]))
|
|
p_z = expz/sum(expz)/N
|
|
p_z[labels[i]] -= 1/N
|
|
|
|
# z = Linear_f2(h)
|
|
# compute partial J to partial h[i]
|
|
# accumulate partial J to partial f2_W, f2_b
|
|
p_h = np.dot(f2_W.T, p_z)
|
|
p_f2_W += np.outer(p_z, h[i])
|
|
p_f2_b += p_z
|
|
|
|
# h = ReLU(g)
|
|
# compute partial J to partial g[i]
|
|
p_g = p_h * (g[i] > 0)
|
|
|
|
|
|
# g = Linear_f1(x)
|
|
# accumulate partial J to partial f1_W, f1_b
|
|
p_f1_W += np.outer(p_g, x[i])
|
|
p_f1_b += p_g
|
|
|
|
return (p_f1_W, p_f1_b, p_f2_W, p_f2_b)
|
|
|
|
|
|
# apply SGD to update theta by nabla_J and the learning rate epsilon
|
|
# return updated theta
|
|
def update_theta(theta, nabla_J, epsilon):
|
|
# ToDo: modify code below as needed
|
|
#updated_theta = theta
|
|
#return updated_theta
|
|
f1_W, f1_b, f2_W, f2_b = theta
|
|
p_f1_W, p_f1_b, p_f2_W, p_f2_b = nabla_J
|
|
|
|
# update the weights and biases for the first layer (f1)
|
|
f1_W_updated = f1_W - epsilon * p_f1_W
|
|
f1_b_updated = f1_b - epsilon * p_f1_b
|
|
|
|
# update the weights and biases for the second layer (f2)
|
|
f2_W_updated = f2_W - epsilon * p_f2_W
|
|
f2_b_updated = f2_b - epsilon * p_f2_b
|
|
|
|
return (f1_W_updated, f1_b_updated, f2_W_updated, f2_b_updated)
|
|
|
|
def print_training_hyperparams_for_session(epsilon, batch_size, bound):
|
|
print("Starting training session with 10 epochs:")
|
|
print("")
|
|
print("Hyperparameters:")
|
|
print(f"epsilon: {epsilon}")
|
|
print(f"bound: {bound}")
|
|
print(f"batch_size: {batch_size}")
|
|
print("")
|
|
print("Results:")
|
|
|
|
def plot_epoch(epochs, accuracies, epsilon, batch_size, bound):
|
|
pyplot.figure(figsize=(10, 6))
|
|
pyplot.plot(epochs, accuracies, label=f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}")
|
|
pyplot.xlabel('Epoch')
|
|
pyplot.ylabel('Accuracy')
|
|
pyplot.title('Training Accuracy over Epochs')
|
|
pyplot.legend()
|
|
pyplot.grid(True)
|
|
pyplot.show()
|
|
|
|
def plot_all_epochs(training_results):
|
|
pyplot.figure(figsize=(12, 8))
|
|
|
|
for epochs, accuracies, epsilon, batch_size, bound in training_results:
|
|
label = f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}"
|
|
pyplot.plot(epochs, accuracies, label=label)
|
|
|
|
pyplot.xlabel('Epoch')
|
|
pyplot.ylabel('Accuracy')
|
|
pyplot.title('Training Accuracy over Epochs for Different Hyperparameters')
|
|
pyplot.legend()
|
|
pyplot.grid(True)
|
|
pyplot.show()
|
|
|
|
def plot_table(training_results):
|
|
# Setting up the data for the table
|
|
cell_text = []
|
|
columns = ['Epoch', 'Accuracy', 'Epsilon', 'Batch Size', 'Bound']
|
|
for result in training_results:
|
|
epochs, accuracies, epsilon, batch_size, bound = result
|
|
for epoch, accuracy in zip(epochs, accuracies):
|
|
cell_text.append([epoch, f"{accuracy:.3f}", epsilon, batch_size, bound])
|
|
|
|
# Determine the figure size needed for the table
|
|
figsize = (10, len(cell_text) * 0.2)
|
|
fig, ax = pyplot.subplots(figsize=figsize)
|
|
ax.axis('tight')
|
|
ax.axis('off')
|
|
|
|
# Create the table
|
|
table = ax.table(cellText=cell_text, colLabels=columns, loc='center', cellLoc='center')
|
|
|
|
# Adjust table scale
|
|
table.auto_set_font_size(False)
|
|
table.set_fontsize(8)
|
|
table.auto_set_column_width(col=list(range(len(columns))))
|
|
|
|
pyplot.show()
|
|
|
|
|
|
def start_training(epsilon, batch_size, bound, mnist_train):
|
|
|
|
# ToDo: set numpy random seed to the last 8 digits of your CWID
|
|
np.random.seed(20497299)
|
|
|
|
validation_images = mnist_train["images"][:1000]
|
|
validation_labels = mnist_train["labels"][:1000]
|
|
training_images = mnist_train["images"][1000:]
|
|
training_labels = mnist_train["labels"][1000:]
|
|
|
|
# hyperparameters
|
|
# we can experiment with these values to see if increasing or decreasing
|
|
# these values can influence our accuracy
|
|
# default values
|
|
#bound = 1 # initial weight range
|
|
#epsilon = 0.00001 # learning rate
|
|
|
|
#print_training_hyperparams_for_session(epsilon, batch_size, bound)
|
|
# start training
|
|
|
|
accuracies = []
|
|
epochs = []
|
|
start = time.time()
|
|
theta = initialize_theta(bound)
|
|
batches = training_images.shape[0]//batch_size
|
|
for epoch in range(10):
|
|
indices = np.arange(training_images.shape[0])
|
|
np.random.shuffle(indices)
|
|
for i in range(batches):
|
|
batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]]
|
|
batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]]
|
|
|
|
z, h, g, x = forward(batch_images, theta)
|
|
nabla_J = backprop(batch_labels, theta, z, h, g, x)
|
|
theta = update_theta(theta, nabla_J, epsilon)
|
|
|
|
# check accuracy using validation examples
|
|
z, _, _, _ = forward(validation_images, theta)
|
|
pred_labels = z.argmax(axis = 1)
|
|
accuracy = sum(pred_labels == validation_labels) / validation_images.shape[0]
|
|
accuracies.append(accuracy)
|
|
epochs.append(epoch)
|
|
#count = sum(pred_labels == validation_labels)
|
|
print("epoch %d, accuracy %.3f, time %.2f" % (
|
|
epoch, accuracy, time.time()-start))
|
|
|
|
#plot_epoch(epochs, accuracies, epsilon, batch_size, bound)
|
|
|
|
# save the weights to be submitted
|
|
save_theta(theta)
|
|
|
|
# return this data so we can plot it with matplotlib
|
|
return epochs, accuracies
|
|
|
|
def main():
|
|
training_results = []
|
|
mnist_train = np.load("mnist_train.npz")
|
|
|
|
training_results = []
|
|
|
|
# load training data once
|
|
mnist_train = np.load("mnist_train.npz")
|
|
|
|
# we can add to this list if we want to test combinations of hyperparameters
|
|
hyperparams = [
|
|
(0.00001, 1, 4), # default params
|
|
(0.00001, 0.1, 4),
|
|
(0.00001, 0.5, 4),
|
|
(0.00001, 0.7, 4),
|
|
(0.00001, 0.01, 4),
|
|
(0.00001, 0.01, 3),
|
|
(0.00001, 0.01, 2),
|
|
(0.000013, 0.012, 1),
|
|
(0.000013, 0.012002899999999983, 1),
|
|
(0.000013, 0.01200591999999996, 1),
|
|
]
|
|
|
|
for epsilon, bound, batch_size in hyperparams:
|
|
epochs, accuracies = start_training(epsilon, batch_size, bound, mnist_train)
|
|
training_results.append((epochs, accuracies, epsilon, batch_size, bound))
|
|
|
|
# uncomment if you would like to see plotted results
|
|
#plot_all_epochs(training_results)
|
|
|
|
# plot table
|
|
#plot_table(training_results[9])
|
|
|
|
if __name__ == '__main__':
|
|
main() |