Merge branch 'master' into 'main'

project 3 complete. 10/10 tests passing. p1 an p2 tests still pass....

See merge request jtpmcdevitt/oop_and_ml!1
This commit is contained in:
Juthatip McDevitt 2024-02-09 02:34:32 +00:00
commit 893918163a
57 changed files with 3535 additions and 0 deletions

9
Makefile Executable file
View file

@ -0,0 +1,9 @@
.PHONY: all libeasynn easynn_test
all: libeasynn easynn_test
libeasynn:
g++ -Wall src/*.cpp -fPIC -O -g -shared -o libeasynn.so
easynn_test: libeasynn
g++ -Wall easynn_test.cpp -g -lm -L. -Wl,-rpath=. -leasynn -o easynn_test

145
easynn.py Executable file
View file

@ -0,0 +1,145 @@
class Expr:
next_id = 0
def __init__(self, op, inputs):
self.op = op
self.inputs = inputs
self.id = Expr.next_id
Expr.next_id += 1
if not isinstance(op, Op):
raise Exception("Not an operator: %s" % op)
def __dfs_post(self, ids, visitor):
ids[self.id] = True
for expr in self.inputs:
if expr.id in ids:
continue
expr.__dfs_post(ids, visitor)
visitor(self)
def statements(self):
lines = []
self.__dfs_post({}, lambda that: lines.append("%s" % that))
return "\n".join(lines)
def __str__(self):
args = ",".join(["t%d" % expr.id for expr in self.inputs])
return "t%d = %s(%s)" % (self.id, self.op, args)
def __promote(r):
if isinstance(r, Expr):
return r
else:
return Const(r)
def __add__(self, r):
return Op("", "Add", 2, {})(self, Expr.__promote(r))
def __sub__(self, r):
return Op("", "Sub", 2, {})(self, Expr.__promote(r))
def __mul__(self, r):
return Op("", "Mul", 2, {})(self, Expr.__promote(r))
def __neg__(self):
return Op("", "Neg", 1, {})(self)
def compile(self, builder):
self.__dfs_post({}, lambda that: builder.append(that))
return builder.build()
def resolve(self, parameters):
self.__dfs_post({}, lambda that: that.op.resolve(parameters))
return self
class Op:
def __init__(self, name, op_type, num_args, parameters):
self.name = name
self.op_type = op_type
self.num_args = num_args
self.parameters = parameters
def __call__(self, *inputs):
if self.num_args >= 0 and self.num_args != len(inputs):
raise Exception("%s: need %d arguments but found %d" % (self, self.num_args, len(inputs)))
for i, expr in enumerate(inputs):
if not isinstance(expr, Expr):
raise Exception("%s: arg %d is not an expression: %s" % (self, i, expr))
return Expr(self, inputs)
def __str__(self):
name = "%s.%s" % (self.name, self.op_type)
if len(self.parameters) == 0:
return name
params = ",".join(["%s=%s" % (k, v.shape if hasattr(v, "shape") else v) for k, v in self.parameters.items()])
return "%s[%s]" % (name, params)
def resolve(self, parameters):
if self.name == "":
return
for k, v in parameters.items():
if k.startswith(self.name+"."):
self.parameters[k[len(self.name)+1:]] = v
def Const(c):
if isinstance(c, (int, float)):
c = float(c)
elif hasattr(c, "shape"):
c = c.astype(float)
else:
raise Exception("Const must be float or int or ndarray: %s" % c)
return Expr(Op("", "Const", 0, {
"value": c
}), [])
def Input(n):
return Expr(Op(n, "Input", 0, {}), [])
def Input2d(n, h, w, ic):
return Expr(Op(n, "Input2d", 0, {
"height": h,
"width": w,
"in_channels": ic
}), [])
def MaxPool2d(k, s):
return Op("", "MaxPool2d", 1, {
"kernel_size": k,
"stride": s
})
def ReLU():
return Op("", "ReLU", 1, {})
def Flatten():
return Op("", "Flatten", 1, {})
def Conv2d(n, ic, oc, k, p = 0):
return Op(n, "Conv2d", 1, {
"in_channels": ic,
"out_channels": oc,
"kernel_size": k,
"padding": p
})
def Linear(n, i, o):
return Op(n, "Linear", 1, {
"in_features": i,
"out_features": o
})
def Show():
return Op("", "Show", 1, {})

130
easynn_cpp.py Executable file
View file

@ -0,0 +1,130 @@
import numpy as np
import ctypes
import time
_libeasynn = ctypes.CDLL("./libeasynn.so")
_libeasynn.create_program.restype = ctypes.c_void_p
_libeasynn.append_expression.argtypes = [
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_char_p,
ctypes.c_char_p,
ctypes.c_void_p,
ctypes.c_int
]
_libeasynn.add_op_param_double.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.c_double
]
_libeasynn.add_op_param_ndarray.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p
]
_libeasynn.build.restype = ctypes.c_void_p
_libeasynn.build.argtypes = [
ctypes.c_void_p
]
_libeasynn.add_kwargs_double.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.c_double
]
_libeasynn.add_kwargs_ndarray.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p
]
_libeasynn.execute.argtypes = [
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p
]
def check_and_raise(ret):
if ret != 0:
raise Exception("libeasynn error: code %d" % ret)
def to_float_or_ndarray(c_dim, c_shape, c_data):
if c_dim.value == 0:
return c_data[0]
shape = [c_shape[k] for k in range(c_dim.value)]
N = 1
for s in shape:
N *= s
print("shape =", shape, "N =", N)
flat = np.array([c_data[i] for i in range(N)])
return flat.reshape(shape)
class Eval:
def __init__(self, evaluation):
self.evaluation = evaluation
def __call__(self, **kwargs):
start = time.time()
for k, v in kwargs.items():
if isinstance(v, (int, float)):
_libeasynn.add_kwargs_double(
self.evaluation, k.encode(), ctypes.c_double(v))
elif hasattr(v, "shape"):
v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A'])
_libeasynn.add_kwargs_ndarray(
self.evaluation, k.encode(), v.ndim, v.ctypes.shape,
v.ctypes.data)
else:
raise Exception("%s: kwargs must be float or int or ndarray" % k)
c_dim = ctypes.c_int()
c_shape = ctypes.POINTER(ctypes.c_size_t)()
c_data = ctypes.POINTER(ctypes.c_double)()
check_and_raise(_libeasynn.execute(self.evaluation,
ctypes.byref(c_dim), ctypes.byref(c_shape), ctypes.byref(c_data)))
res = to_float_or_ndarray(c_dim, c_shape, c_data)
t = time.time()-start
if t > 0.1:
print("c++ time %.2f" % t)
return res
class Builder:
def __init__(self):
self.program = _libeasynn.create_program()
def append(self, expr):
inputs = [ex.id for ex in expr.inputs]
num_inputs = len(inputs)
op = expr.op
_libeasynn.append_expression(
self.program, expr.id,
op.name.encode(), op.op_type.encode(),
(ctypes.c_int*num_inputs)(*inputs), num_inputs)
for k, v in op.parameters.items():
if isinstance(v, (int, float)):
check_and_raise(_libeasynn.add_op_param_double(
self.program, k.encode(), ctypes.c_double(v)))
elif hasattr(v, "shape"):
v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A'])
check_and_raise(_libeasynn.add_op_param_ndarray(
self.program, k.encode(), v.ndim, v.ctypes.shape,
v.ctypes.data))
else:
raise Exception("%s: op params must be float or int or ndarray: %s" % (expr, k))
def build(self):
return Eval(_libeasynn.build(self.program))

189
easynn_golden.py Executable file
View file

@ -0,0 +1,189 @@
import numpy as np
import time
def Input(expr, op, args, **kwargs):
if op.name in kwargs:
c = kwargs[op.name]
if isinstance(c, (int, float)):
return float(c)
elif hasattr(c, "shape"):
return c.astype(float)
else:
raise Exception("%s: Input must be float or int or ndarray: %s" % (expr, c))
else:
raise Exception("%s: missing input" % expr)
def Input2d(expr, op, args, **kwargs):
if not op.name in kwargs:
raise Exception("%s: missing input" % expr)
imgs = kwargs[op.name]
if not hasattr(imgs, "shape"):
raise Exception("%s: Input must be ndarray: %s" % (expr, imgs))
if any([
len(imgs.shape) != 4,
imgs.shape[1] != op.parameters["height"],
imgs.shape[2] != op.parameters["width"],
imgs.shape[3] != op.parameters["in_channels"]]):
raise Exception("%s: Invalid input size: %s" % (expr, imgs.shape))
# NHWC => NCHW
return imgs.astype(float).transpose(0,3,1,2)
def Const(expr, op, args, **kwargs):
return op.parameters["value"]
def Neg(expr, op, args, **kwargs):
return -args[0]
def Add(expr, op, args, **kwargs):
a = args[0]
b = args[1]
if not hasattr(a, "shape") and not hasattr(b, "shape"):
return a+b
elif hasattr(a, "shape") and hasattr(b, "shape"):
if a.shape != b.shape:
raise Exception("%s: size mismatch: %s+%s" % (expr, a.shape, b.shape))
return a+b
else:
raise Exception("%s: cannot mix scalar and ndarray" % expr)
def Sub(expr, op, args, **kwargs):
a = args[0]
b = args[1]
if not hasattr(a, "shape") and not hasattr(b, "shape"):
return a-b
elif hasattr(a, "shape") and hasattr(b, "shape"):
if a.shape != b.shape:
raise Exception("%s: size mismatch: %s-%s" % (expr, a.shape, b.shape))
return a-b
else:
raise Exception("%s: cannot mix scalar and ndarray" % expr)
def Mul(expr, op, args, **kwargs):
a = args[0]
b = args[1]
if not hasattr(a, "shape") or not hasattr(b, "shape"):
return a*b
else:
if len(a.shape) != 2 or len(b.shape) != 2:
raise Exception("%s: matmul only: %s*%s" % (expr, a.shape, b.shape))
if a.shape[1] != b.shape[0]:
raise Exception("%s: size mismatch: %s*%s" % (expr, a.shape, b.shape))
return np.matmul(a, b)
def Flatten(expr, op, args, **kwargs):
x = args[0]
if not hasattr(x, "shape"):
raise Exception("%s: ndarray only: %s" % (expr, imgs))
return x.reshape((x.shape[0], -1))
def ReLU(expr, op, args, **kwargs):
x = args[0]
return x*(x > 0)
def Linear(expr, op, args, **kwargs):
x = args[0]
if not hasattr(x, "shape"):
raise Exception("%s: ndarray only: %s" % (expr, x))
if "weight" not in op.parameters or "bias" not in op.parameters:
raise Exception("%s: missing weight or bias" % expr)
weight = op.parameters["weight"]
bias = op.parameters["bias"]
if not hasattr(weight, "shape") or not hasattr(bias, "shape"):
raise Exception("%s: ndarray only for weight or bias" % expr)
in_features = op.parameters["in_features"]
out_features = op.parameters["out_features"]
if any([
len(x.shape) != 2,
x.shape[1] != in_features,
weight.shape != (out_features, in_features),
bias.shape != (out_features,)]):
raise Exception("%s: size mismatch: %s*%s+%s" % (expr, weight.shape, x.shape, bias.shape))
return np.einsum("ni,oi->no", x, weight,
optimize = "optimal")+bias.reshape((1, out_features))
def MaxPool2d(expr, op, args, **kwargs):
x = args[0]
if not hasattr(x, "shape"):
raise Exception("%s: ndarray only: %s" % (expr, x))
kernel_size = op.parameters["kernel_size"]
stride = op.parameters["stride"]
if kernel_size != stride:
raise Exception("%s: kernel_size != stride" % expr)
if any([
len(x.shape) != 4,
x.shape[2]%stride != 0,
x.shape[3]%stride != 0]):
raise Exception("%s: size mismatch: %s" % (expr, x.shape))
new_shape = (x.shape[0], x.shape[1], x.shape[2]//stride, stride, x.shape[3]//stride, stride)
return np.nanmax(x.reshape(new_shape), axis = (3,5))
def Conv2d(expr, op, args, **kwargs):
x = args[0]
if not hasattr(x, "shape"):
raise Exception("%s: ndarray only: %s" % (expr, x))
if "weight" not in op.parameters or "bias" not in op.parameters:
raise Exception("%s: missing weight or bias" % expr)
weight = op.parameters["weight"]
bias = op.parameters["bias"]
in_channels = op.parameters["in_channels"]
out_channels = op.parameters["out_channels"]
kernel_size = op.parameters["kernel_size"]
padding = op.parameters["padding"]
if any([
len(x.shape) != 4,
x.shape[1] != in_channels,
weight.shape != (out_channels, in_channels, kernel_size, kernel_size),
bias.shape != (out_channels,)]):
raise Exception("%s: size mismatch: %s" % (expr, x.shape))
if padding != 0:
tmp = np.zeros((x.shape[0], x.shape[1], x.shape[2]+2*padding, x.shape[3]+2*padding))
tmp[:, :, 1:-2, 1:-2] = x
x = tmp
conv_shape = x.shape[:2]+(x.shape[2]+1-kernel_size, x.shape[3]+1-kernel_size, kernel_size, kernel_size)
conv_strides = x.strides+x.strides[2:]
conv = np.lib.stride_tricks.as_strided(x, shape = conv_shape, strides = conv_strides, writeable = False)
return np.einsum("nihwyx,oiyx->nohw", conv, weight,
optimize = "optimal")+bias.reshape((1, out_channels, 1, 1))
class Eval:
def __init__(self, program):
self.program = program
def __call__(self, **kwargs):
start = time.time()
values = {}
for expr in self.program:
args = [values[ex.id] for ex in expr.inputs]
if expr.op.op_type not in globals():
raise Exception("%s: not implemented" % expr)
values[expr.id] = globals()[expr.op.op_type](expr, expr.op, args, **kwargs)
#print("numpy op", expr.op.op_type, "time %.2f" % (time.time()-start))
res = values[self.program[-1].id]
t = time.time()-start
if t > 0.1:
print("numpy time %.2f" % t)
return res
class Builder:
def __init__(self):
self.program = []
def append(self, expr):
self.program.append(expr)
def build(self):
return Eval(self.program)

47
easynn_test.cpp Executable file
View file

@ -0,0 +1,47 @@
/**
* A simple test program helps you to debug your easynn implementation.
*/
#include <stdio.h>
#include "src/libeasynn.h"
int main()
{
program *prog = create_program();
int inputs0[] = {};
append_expression(prog, 0, "a", "Input", inputs0, 0);
//int inputs1[] = {0, 0};
//append_expression(prog, 1, "", "Add", inputs1, 2);
//int inputs2[] = {2, 2};
//append_expression(prog, 2, "", "Mul", inputs2, 2);
//int inputs3[] = {10, 5};
//append_expression(prog, 3, "", "Sub", inputs3, 2);
evaluation *eval = build(prog);
add_kwargs_double(eval, "a", 5);
// tensor tests
// it is easier for me
// to just compile the library and run the python grading tests than to use this program
int dim = 0;
size_t *shape = nullptr;
double *data = nullptr;
if (execute(eval, &dim, &shape, &data) != 0)
{
printf("evaluation fails\n");
return -1;
}
if (dim == 0)
printf("res = %f\n", data[0]);
else
printf("result as tensor is not supported yet\n");
return 0;
}

12
ece449.code-workspace Executable file
View file

@ -0,0 +1,12 @@
{
"folders": [
{
"path": "."
}
],
"settings": {
"files.associations": {
"array": "cpp"
}
}
}

30
grade.py Executable file
View file

@ -0,0 +1,30 @@
import traceback
import multiprocessing
def grade_all(name, begin, end, funcs):
grade = 0
for q in range(begin, end):
def func():
exit(0 if funcs["grade_Q%d" % q]() else 1)
try:
p = multiprocessing.Process(target = func)
p.start()
p.join(300)
if p.is_alive():
p.terminate()
print("============Q%d timeout!============\n" % q, flush = True)
elif p.exitcode == 0:
print("============Q%d passed!============\n" % q, flush = True)
grade += 1
else:
print("============Q%d failed!============\n" % q, flush = True)
except Exception as e:
print("============Q%d failed!============" % q, flush = True)
print(traceback.format_exc())
print("Grading result: %d functions passed" % grade)
print("*************************************************************")
print("* You may receive 0 points unless your code tests correctly *")
print("* Please commit and push your code at least daily. *")
print("* DO NOT CHANGE any grade_px.py for grading *")
print("*************************************************************")

104
grade_p1.py Executable file
View file

@ -0,0 +1,104 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import grade
import prj01
# create
def grade_Q1():
a = prj01.Q1()
if not hasattr(a, "shape"):
return False
if a.shape != (10, 5):
return False
return all([a[i, j] == i+j for i in range(10) for j in range(5)])
# add
def grade_Q2():
a = np.random.rand(100, 10)
b = np.random.rand(100, 10)
c = prj01.Q2(a, b)
if not hasattr(c, "shape"):
return False
if c.shape != (100, 10):
return False
return all([abs(c[i, j]-a[i, j]-b[i, j]) < 1e-9 for i in range(100) for j in range(10)])
# mul
def grade_Q3():
a = np.array([[i for i in range(10)]])
b = a.transpose()
c = prj01.Q3(a, b)
d = prj01.Q3(b, a)
if not hasattr(c, "shape") or not hasattr(d, "shape"):
return False
if c.shape != (1, 1) or d.shape != (10, 10):
return False
return c[0, 0] == 285 and all([d[i, j] == i*j for i in range(10) for j in range(10)])
# max
def grade_Q4():
a = np.random.rand(100, 10)
b = prj01.Q4(a)
if len(b) != 100:
return False
return all([a[i, b[i]] >= a[i, j] for i in range(100) for j in range(10)])
# solve
def grade_Q5():
A = -np.random.rand(100, 100)+np.diag([100]*100)
b = np.random.rand(100, 1)
x = prj01.Q5(A, b)
Ax = prj01.Q3(A, x)
return np.allclose(Ax, b)
# a+b
def grade_Q6():
a = np.random.rand(100, 10)
b = np.random.rand(100, 10)
c = prj01.Q6().compile(golden.Builder())(a = a, b = b)
return np.allclose(c, prj01.Q2(a, b))
# a+b*c
def grade_Q7():
a = np.random.rand(100, 50)
b = np.random.rand(100, 10)
c = np.random.rand(10, 50)
d = prj01.Q7().compile(golden.Builder())(a = a, b = b, c = c)
return np.allclose(d, prj01.Q2(a, prj01.Q3(b, c)))
# Ax+b
def grade_Q8():
A = np.random.rand(100, 100)
x = np.random.rand(100, 1)
b = np.random.rand(100, 1)
y = prj01.Q8(A, b).compile(golden.Builder())(x = x)
return np.allclose(y, prj01.Q2(prj01.Q3(A, x), b))
# x**n
def grade_Q9():
x = np.random.rand(100, 100)
y = prj01.Q9(8).compile(golden.Builder())(x = x)
x2 = prj01.Q3(x, x)
x4 = prj01.Q3(x2, x2)
x8 = prj01.Q3(x4, x4)
return np.allclose(y, x8)
# |x|
def grade_Q10():
x = np.random.randn(100, 100)
y = prj01.Q10().compile(golden.Builder())(x = x)
return np.allclose(y, np.abs(x))
grade.grade_all("p1", 1, 11, globals())

89
grade_p2.py Executable file
View file

@ -0,0 +1,89 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import easynn_cpp as cpp
import grade
def random_kwargs(kwargs):
return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
def is_same(p, n, **kwargs):
e0 = p.compile(golden.Builder())
e1 = p.compile(cpp.Builder())
nkwargs = [random_kwargs(kwargs) for i in range(n)]
return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
def grade_Q1():
x = nn.Input("x")
return is_same(x, 1, x = None)
def grade_Q2():
c = nn.Const(np.random.random())
return is_same(c, 1)
def grade_Q3():
x = nn.Input("x")
c = nn.Const(np.random.random())
y = x+c
return is_same(y, 1, x = None)
def grade_Q4():
x = nn.Input("x")
c = nn.Const(np.random.random())
y = x*c
return is_same(y, 1, x = None)
def grade_Q5():
x = nn.Input("x")
y = nn.Input("y")
z = x+y
return is_same(z, 1, x = None, y = None)
def grade_Q6():
x = nn.Input("x")
y = nn.Input("y")
z = x*y
return is_same(z, 1, x = None, y = None)
def grade_Q7():
x = nn.Input("x")
y = nn.Input("y")
z = nn.Input("z")
r = x+y*z
return is_same(r, 1, x = None, y = None, z = None)
def grade_Q8():
x = nn.Input("x")
y = nn.Input("y")
z = nn.Input("z")
r = (x-y)*z
return is_same(r, 1, x = None, y = None, z = None)
def grade_Q9():
x = nn.Input("x")
y = nn.Input("y")
c = nn.Const(np.random.random())
z = x*y-x*c-y*c+c*c
return is_same(z, 10, x = None, y = None)
def grade_Q10():
x = nn.Input("x")
y = nn.Input("y")
c = nn.Const(np.random.random())
z = x*y-x*c-y*c+c*c
return is_same(z, 10, x = None, y = None)
grade.grade_all("p2", 1, 11, globals())

99
grade_p3.py Executable file
View file

@ -0,0 +1,99 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import easynn_cpp as cpp
import grade
def random_kwargs(kwargs):
return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
def is_same(p, n, **kwargs):
e0 = p.compile(golden.Builder())
e1 = p.compile(cpp.Builder())
nkwargs = [random_kwargs(kwargs) for i in range(n)]
return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
def grade_Q1():
x = nn.Input("x")
return is_same(x, 1, x = (9,)) and is_same(x, 1, x = (9, 9))
def grade_Q2():
c1 = nn.Const(np.random.random((10,)))
c2 = nn.Const(np.random.random((10, 10)))
return is_same(c1, 1) and is_same(c2, 1)
def grade_Q3():
x = nn.Input("x")
y = nn.Input("y")
z = x+y
return all([
is_same(z, 1, x = (11,), y = (11,)),
is_same(z, 1, x = (11, 12), y = (11, 12)),
is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)),
is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))])
def grade_Q4():
x = nn.Input("x")
y = nn.Input("y")
z = x-y
return all([
is_same(z, 1, x = (11,), y = (11,)),
is_same(z, 1, x = (11, 12), y = (11, 12)),
is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)),
is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))])
def grade_Q5():
x = nn.Input("x")
y = nn.Input("y")
z = x*y
return is_same(z, 1, x = (11, 12), y = (12, 13))
def grade_Q6():
x = nn.Input("x")
y = nn.Input("y")
z = x*y
return is_same(z, 1, x = None, y = (12, 13)) and is_same(z, 1, x = (11, 12), y = None)
def grade_Q7():
x = nn.Input("x")
y = nn.Input("y")
z = nn.Input("z")
r = x+y*z
return is_same(r, 1, x = (11, 13), y = (11, 12), z = (12, 13))
def grade_Q8():
x = nn.Input("x")
y = nn.Input("y")
z = nn.Input("z")
r = (x-y)*z
return is_same(r, 1, x = (11, 12), y = (11, 12), z = (12, 13))
def grade_Q9():
x = nn.Input("x")
y = nn.Input("y")
z = nn.Input("z")
r = x*y*z
return is_same(r, 1, x = (11, 12), y = (12, 13), z = (13, 14))
def grade_Q10():
x = nn.Input("x")
y = nn.Input("y")
c1 = nn.Const(np.random.random((11, 12)))
c2 = nn.Const(np.random.random((12, 13)))
z = x*y-x*c2-c1*y+c1*c2
return is_same(z, 1, x = (11, 12), y = (12, 13))
grade.grade_all("p3", 1, 11, globals())

151
grade_p4.py Executable file
View file

@ -0,0 +1,151 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import easynn_cpp as cpp
import grade
def random_kwargs(kwargs):
return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
def is_same(p, n, **kwargs):
e0 = p.compile(golden.Builder())
e1 = p.compile(cpp.Builder())
nkwargs = [random_kwargs(kwargs) for i in range(n)]
return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
def grade_Q1():
relu = nn.ReLU()
x = relu(nn.Input("x"))
return is_same(x, 1, x = (10, 11, 12, 13))
def grade_Q2():
flatten = nn.Flatten()
x = flatten(nn.Input("x"))
return is_same(x, 1, x = (10, 11, 12, 13))
def grade_Q3():
x = nn.Input2d("images", 10, 11, 3)
return is_same(x, 1, images = (50, 10, 11, 3))
def grade_Q4():
f = nn.Linear("f", 100, 10)
x = f(nn.Input("x"))
x.resolve({
"f.weight": np.random.random((10, 100)),
"f.bias": np.random.random((10,))})
return is_same(x, 1, x = (50, 100))
def grade_Q5():
pool = nn.MaxPool2d(3, 3)
x = pool(nn.Input2d("x", 12, 15, 3))
return is_same(x, 1, x = (10, 12, 15, 3))
def grade_Q6():
c = nn.Conv2d("c", 3, 16, 5)
x = c(nn.Input2d("x", 15, 20, 3))
x.resolve({
"c.weight": np.random.random((16, 3, 5, 5)),
"c.bias": np.random.random((16,))
})
return is_same(x, 1, x = (10, 15, 20, 3))
def grade_Q7():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 100)
f2 = nn.Linear("f2", 100, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("msimple_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"][:20]
infer0 = x.compile(golden.Builder())
infer1 = x.compile(cpp.Builder())
logit0 = infer0(images = images)
logit1 = infer1(images = images)
return np.allclose(logit0, logit1)
def grade_Q8():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 100)
f2 = nn.Linear("f2", 100, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("msimple_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"][:1000]
infer0 = x.compile(golden.Builder())
infer1 = x.compile(cpp.Builder())
label0 = infer0(images = images).argmax(axis = 1)
label1 = infer1(images = images).argmax(axis = 1)
return np.allclose(label0, label1)
def grade_Q9():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26
c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24
x = pool(relu(c2(relu(c1(x))))) # 24->12
c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10
c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8
x = pool(relu(c4(relu(c3(x))))) # 8->4
f = nn.Linear("f", 16*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("mnist_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"][:20]
infer0 = x.compile(golden.Builder())
infer1 = x.compile(cpp.Builder())
logit0 = infer0(images = images)
logit1 = infer1(images = images)
return np.allclose(logit0, logit1)
def grade_Q10():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26
c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24
x = pool(relu(c2(relu(c1(x))))) # 24->12
c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10
c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8
x = pool(relu(c4(relu(c3(x))))) # 8->4
f = nn.Linear("f", 16*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("mnist_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"][:1000]
infer0 = x.compile(golden.Builder())
infer1 = x.compile(cpp.Builder())
label0 = infer0(images = images).argmax(axis = 1)
label1 = infer1(images = images).argmax(axis = 1)
return np.allclose(label0, label1)
grade.grade_all("p4", 1, 11, globals())

79
grade_p5.py Executable file
View file

@ -0,0 +1,79 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import grade
def grade_Q1():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 32)
f2 = nn.Linear("f2", 32, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("p5_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
print(count)
return count > 8500
def grade_Q2():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 32)
f2 = nn.Linear("f2", 32, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("p5_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9000
def grade_Q3():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 32)
f2 = nn.Linear("f2", 32, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("p5_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9300
def grade_Q4():
relu = nn.ReLU()
flatten = nn.Flatten()
f1 = nn.Linear("f1", 28*28, 32)
f2 = nn.Linear("f2", 32, 10)
x = nn.Input2d("images", 28, 28, 1)
x = flatten(x)
x = f2(relu(f1(x)))
x.resolve(np.load("p5_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9500
grade.grade_all("p5", 1, 5, globals())

91
grade_p6.py Executable file
View file

@ -0,0 +1,91 @@
import numpy as np
import easynn as nn
import easynn_golden as golden
import grade
def grade_Q1():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
x = pool(relu(c1(x))) # 24->12
c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
x = pool(relu(c2(x))) # 8->4
f = nn.Linear("f", 8*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("p6_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
print(count)
return count > 8500
def grade_Q2():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
x = pool(relu(c1(x))) # 24->12
c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
x = pool(relu(c2(x))) # 8->4
f = nn.Linear("f", 8*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("p6_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9000
def grade_Q3():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
x = pool(relu(c1(x))) # 24->12
c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
x = pool(relu(c2(x))) # 8->4
f = nn.Linear("f", 8*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("p6_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9300
def grade_Q4():
pool = nn.MaxPool2d(2, 2)
relu = nn.ReLU()
flatten = nn.Flatten()
x = nn.Input2d("images", 28, 28, 1)
c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
x = pool(relu(c1(x))) # 24->12
c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
x = pool(relu(c2(x))) # 8->4
f = nn.Linear("f", 8*4*4, 10)
x = f(flatten(x))
x.resolve(np.load("p6_params.npz"))
mnist_test = np.load("mnist_test.npz")
images = mnist_test["images"]
labels = mnist_test["labels"]
infer = x.compile(golden.Builder())
pred_labels = infer(images = images).argmax(axis = 1)
count = sum(labels == pred_labels)
return count > 9500
grade.grade_all("p6", 1, 5, globals())

BIN
mnist_params.npz Executable file

Binary file not shown.

BIN
mnist_test.npz Executable file

Binary file not shown.

BIN
mnist_train.npz Executable file

Binary file not shown.

BIN
msimple_params.npz Executable file

Binary file not shown.

BIN
p5_params.npz Executable file

Binary file not shown.

76
prj01.py Executable file
View file

@ -0,0 +1,76 @@
import numpy as np
import easynn as nn
# Create a numpy array of 10 rows and 5 columns.
# Set the element at row i and column j to be i+j.
def Q1():
array = np.zeros((10, 5))
for i in range(10):
for j in range(5):
array[i, j] = i+j
return array
# Add two numpy arrays together.
def Q2(a, b):
return a + b
# Multiply two 2D numpy arrays using matrix multiplication.
def Q3(a, b):
return np.matmul(a, b)
# For each row of a 2D numpy array, find the column index
# with the maximum element. Return all these column indices.
def Q4(a):
return [ np.argmax(row) for row in a ]
# Solve Ax = b.
def Q5(A, b):
return np.linalg.solve(A, b)
# Return an EasyNN expression for a+b.
def Q6():
a = nn.Input('a')
b = nn.Input('b')
c = a+b
return c
# Return an EasyNN expression for a+b*c.
def Q7():
a = nn.Input('a')
b = nn.Input('b')
c = nn.Input('c')
d = a+b*c
return d
# Given A and b, return an EasyNN expression for Ax+b.
def Q8(A, b):
A_const = nn.Const(A)
b_const = nn.Const(b)
x_input = nn.Input("x")
expr = ((A_const*x_input) + b_const)
return expr
# Given n, return an EasyNN expression for x**n.
def Q9(n):
x = nn.Input("x")
n_const = nn.Const(n)
expr = x
for _ in range(n - 1):
expr = expr * x
return expr
# the element-wise absolute value |x|.
def Q10():
x = nn.Input("x")
zero = nn.Const(0)
max = nn.ReLU()(x)
min = x - max
abs = max - min
return abs

274
prj05.py Executable file
View file

@ -0,0 +1,274 @@
import numpy as np
import time
import pdb
from matplotlib import pyplot
# save theta to p5_params.npz that can be used by easynn
def save_theta(theta):
f1_W, f1_b, f2_W, f2_b = theta
np.savez_compressed("p5_params.npz", **{
"f1.weight": f1_W,
"f1.bias": f1_b,
"f2.weight": f2_W,
"f2.bias": f2_b
})
# initialize theta using uniform distribution [-bound, bound]
# return theta as (f1_W, f1_b, f2_W, f2_b)
def initialize_theta(bound):
f1_W = np.random.uniform(-bound, bound, (32, 784))
f1_b = np.random.uniform(-bound, bound, 32)
f2_W = np.random.uniform(-bound, bound, (10, 32))
f2_b = np.random.uniform(-bound, bound, 10)
return (f1_W, f1_b, f2_W, f2_b)
# forward:
# x = Flatten(images)
# g = Linear_f1(x)
# h = ReLU(g)
# z = Linear_f2(h)
# return (z, h, g, x)
def forward(images, theta):
# number of samples
N = images.shape[0]
# unpack theta into f1 and f2
f1_W, f1_b, f2_W, f2_b = theta
# x = Flatten(images)
x = images.astype(float).transpose(0,3,1,2).reshape((N, -1))
# g = Linear_f1(x)
g = np.zeros((N, f1_b.shape[0]))
for i in range(N):
g[i, :] = np.matmul(f1_W, x[i])+f1_b
# h = ReLU(g)
h = g*(g > 0)
# z = Linear_f2(h)
z = np.zeros((N, f2_b.shape[0]))
for i in range(N):
z[i, :] = np.matmul(f2_W, h[i])+f2_b
return (z, h, g, x)
# backprop:
# J = cross entropy between labels and softmax(z)
# return nabla_J
def backprop(labels, theta, z, h, g, x):
# number of samples
N = labels.shape[0]
# unpack theta into f1 and f2
f1_W, f1_b, f2_W, f2_b = theta
# nabla_J consists of partial J to partial f1_W, f1_b, f2_W, f2_b
p_f1_W = np.zeros(f1_W.shape)
p_f1_b = np.zeros(f1_b.shape)
p_f2_W = np.zeros(f2_W.shape)
p_f2_b = np.zeros(f2_b.shape)
for i in range(N):
# compute the contribution to nabla_J for sample i
# cross entropy and softmax
# compute partial J to partial z[i]
# scale by 1/N for averaging
expz = np.exp(z[i]-max(z[i]))
p_z = expz/sum(expz)/N
p_z[labels[i]] -= 1/N
# z = Linear_f2(h)
# compute partial J to partial h[i]
# accumulate partial J to partial f2_W, f2_b
p_h = np.dot(f2_W.T, p_z)
p_f2_W += np.outer(p_z, h[i])
p_f2_b += p_z
# h = ReLU(g)
# compute partial J to partial g[i]
p_g = p_h * (g[i] > 0)
# g = Linear_f1(x)
# accumulate partial J to partial f1_W, f1_b
p_f1_W += np.outer(p_g, x[i])
p_f1_b += p_g
return (p_f1_W, p_f1_b, p_f2_W, p_f2_b)
# apply SGD to update theta by nabla_J and the learning rate epsilon
# return updated theta
def update_theta(theta, nabla_J, epsilon):
# ToDo: modify code below as needed
#updated_theta = theta
#return updated_theta
f1_W, f1_b, f2_W, f2_b = theta
p_f1_W, p_f1_b, p_f2_W, p_f2_b = nabla_J
# update the weights and biases for the first layer (f1)
f1_W_updated = f1_W - epsilon * p_f1_W
f1_b_updated = f1_b - epsilon * p_f1_b
# update the weights and biases for the second layer (f2)
f2_W_updated = f2_W - epsilon * p_f2_W
f2_b_updated = f2_b - epsilon * p_f2_b
return (f1_W_updated, f1_b_updated, f2_W_updated, f2_b_updated)
def print_training_hyperparams_for_session(epsilon, batch_size, bound):
print("Starting training session with 10 epochs:")
print("")
print("Hyperparameters:")
print(f"epsilon: {epsilon}")
print(f"bound: {bound}")
print(f"batch_size: {batch_size}")
print("")
print("Results:")
def plot_epoch(epochs, accuracies, epsilon, batch_size, bound):
pyplot.figure(figsize=(10, 6))
pyplot.plot(epochs, accuracies, label=f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}")
pyplot.xlabel('Epoch')
pyplot.ylabel('Accuracy')
pyplot.title('Training Accuracy over Epochs')
pyplot.legend()
pyplot.grid(True)
pyplot.show()
def plot_all_epochs(training_results):
pyplot.figure(figsize=(12, 8))
for epochs, accuracies, epsilon, batch_size, bound in training_results:
label = f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}"
pyplot.plot(epochs, accuracies, label=label)
pyplot.xlabel('Epoch')
pyplot.ylabel('Accuracy')
pyplot.title('Training Accuracy over Epochs for Different Hyperparameters')
pyplot.legend()
pyplot.grid(True)
pyplot.show()
def plot_table(training_results):
# Setting up the data for the table
cell_text = []
columns = ['Epoch', 'Accuracy', 'Epsilon', 'Batch Size', 'Bound']
for result in training_results:
epochs, accuracies, epsilon, batch_size, bound = result
for epoch, accuracy in zip(epochs, accuracies):
cell_text.append([epoch, f"{accuracy:.3f}", epsilon, batch_size, bound])
# Determine the figure size needed for the table
figsize = (10, len(cell_text) * 0.2)
fig, ax = pyplot.subplots(figsize=figsize)
ax.axis('tight')
ax.axis('off')
# Create the table
table = ax.table(cellText=cell_text, colLabels=columns, loc='center', cellLoc='center')
# Adjust table scale
table.auto_set_font_size(False)
table.set_fontsize(8)
table.auto_set_column_width(col=list(range(len(columns))))
pyplot.show()
def start_training(epsilon, batch_size, bound, mnist_train):
# ToDo: set numpy random seed to the last 8 digits of your CWID
np.random.seed(20497299)
validation_images = mnist_train["images"][:1000]
validation_labels = mnist_train["labels"][:1000]
training_images = mnist_train["images"][1000:]
training_labels = mnist_train["labels"][1000:]
# hyperparameters
# we can experiment with these values to see if increasing or decreasing
# these values can influence our accuracy
# default values
#bound = 1 # initial weight range
#epsilon = 0.00001 # learning rate
#print_training_hyperparams_for_session(epsilon, batch_size, bound)
# start training
accuracies = []
epochs = []
start = time.time()
theta = initialize_theta(bound)
batches = training_images.shape[0]//batch_size
for epoch in range(10):
indices = np.arange(training_images.shape[0])
np.random.shuffle(indices)
for i in range(batches):
batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]]
batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]]
z, h, g, x = forward(batch_images, theta)
nabla_J = backprop(batch_labels, theta, z, h, g, x)
theta = update_theta(theta, nabla_J, epsilon)
# check accuracy using validation examples
z, _, _, _ = forward(validation_images, theta)
pred_labels = z.argmax(axis = 1)
accuracy = sum(pred_labels == validation_labels) / validation_images.shape[0]
accuracies.append(accuracy)
epochs.append(epoch)
#count = sum(pred_labels == validation_labels)
print("epoch %d, accuracy %.3f, time %.2f" % (
epoch, accuracy, time.time()-start))
#plot_epoch(epochs, accuracies, epsilon, batch_size, bound)
# save the weights to be submitted
save_theta(theta)
# return this data so we can plot it with matplotlib
return epochs, accuracies
def main():
training_results = []
mnist_train = np.load("mnist_train.npz")
training_results = []
# load training data once
mnist_train = np.load("mnist_train.npz")
# we can add to this list if we want to test combinations of hyperparameters
hyperparams = [
(0.00001, 1, 4), # default params
(0.00001, 0.1, 4),
(0.00001, 0.5, 4),
(0.00001, 0.7, 4),
(0.00001, 0.01, 4),
(0.00001, 0.01, 3),
(0.00001, 0.01, 2),
(0.000013, 0.012, 1),
(0.000013, 0.012002899999999983, 1),
(0.000013, 0.01200591999999996, 1),
]
for epsilon, bound, batch_size in hyperparams:
epochs, accuracies = start_training(epsilon, batch_size, bound, mnist_train)
training_results.append((epochs, accuracies, epsilon, batch_size, bound))
# uncomment if you would like to see plotted results
#plot_all_epochs(training_results)
# plot table
#plot_table(training_results[9])
if __name__ == '__main__':
main()

374
prj06.py Executable file
View file

@ -0,0 +1,374 @@
import numpy as np
import time
import random
import matplotlib.pyplot as plt
# save theta to p6_params.npz that can be used by easynn
def save_theta(theta):
c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
np.savez_compressed("p6_params.npz", **{
"c1.weight": c1_W,
"c1.bias": c1_b,
"c2.weight": c2_W,
"c2.bias": c2_b,
"f.weight": f_W,
"f.bias": f_b
})
# initialize theta using uniform distribution [-bound, bound]
# return theta as (c1_W, c1_b, c2_W, c2_b, f_W, f_b)
def initialize_theta(bound):
c1_W = np.random.uniform(-bound, bound, (8, 1, 5, 5))
c1_b = np.random.uniform(-bound, bound, 8)
c2_W = np.random.uniform(-bound, bound, (8, 8, 5, 5))
c2_b = np.random.uniform(-bound, bound, 8)
f_W = np.random.uniform(-bound, bound, (10, 128))
f_b = np.random.uniform(-bound, bound, 10)
return (c1_W, c1_b, c2_W, c2_b, f_W, f_b)
# return out_nchw
def Conv2d(in_nchw, kernel_W, kernel_b):
OC, IC, KH, KW = kernel_W.shape
N, C, H, W = in_nchw.shape
if C != IC or kernel_b.shape != (OC,):
raise Exception("Conv2d size mismatch: %s @ (%s, %s)" % (
in_nchw.shape, kernel_W.shape, kernel_b.shape))
# view in_nchw as a 6D tensor
shape = (N, IC, H-KH+1, W-KW+1, KH, KW)
strides = in_nchw.strides+in_nchw.strides[2:]
data = np.lib.stride_tricks.as_strided(in_nchw,
shape = shape, strides = strides, writeable = False)
# np.einsum("nihwyx,oiyx->nohw", data, kernel_W)
nhwo = np.tensordot(data, kernel_W, ((1,4,5), (1,2,3)))
return nhwo.transpose(0,3,1,2)+kernel_b.reshape((1, OC, 1, 1))
# return p_b for the whole batch
def Conv2d_backprop_b(p_out, in_nchw, kernel_W, kernel_b):
OC, IC, KH, KW = kernel_W.shape
N, C, H, W = in_nchw.shape
if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
raise Exception("Conv2d_backprop_b size mismatch: %s = %s @ (%s, %s)" % (
p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
return np.einsum("nohw->o", p_out, optimize = "optimal")/N
# return p_W for the whole batch
def Conv2d_backprop_W(p_out, in_nchw, kernel_W, kernel_b):
OC, IC, KH, KW = kernel_W.shape
N, C, H, W = in_nchw.shape
if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
raise Exception("Conv2d_backprop_W size mismatch: %s = %s @ (%s, %s)" % (
p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
# view in_nchw as a 6D tensor
shape = (N, IC, KH, KW, H-KH+1, W-KW+1)
strides = in_nchw.strides+in_nchw.strides[2:]
data = np.lib.stride_tricks.as_strided(in_nchw,
shape = shape, strides = strides, writeable = False)
# np.einsum("nohw,niyxhw->oiyx", p_out, data)
return np.tensordot(p_out, data, ((0,2,3), (0,4,5)))/N
# return p_in for the whole batch
def Conv2d_backprop_in(p_out, in_nchw, kernel_W, kernel_b):
OC, IC, KH, KW = kernel_W.shape
N, C, H, W = in_nchw.shape
if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
raise Exception("Conv2d_backprop_in size mismatch: %s = %s @ (%s, %s)" % (
p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
# view p_out as a padded 6D tensor
padded = np.zeros((N, OC, H+KH-1, W+KW-1))
padded[:, :, KH-1:-KH+1, KW-1:-KW+1] = p_out
shape = (N, IC, H, W, KH, KW)
strides = padded.strides+padded.strides[2:]
data = np.lib.stride_tricks.as_strided(padded,
shape = shape, strides = strides, writeable = False)
# np.einsum("nohwyx,oiyx->nihw", data, kernel_W)
nhwi = np.tensordot(data, kernel_W, ((1,4,5), (0,2,3)))
return nhwi.transpose((0,3,1,2))
# return out_nchw
def MaxPool_2by2(in_nchw):
N, C, H, W = in_nchw.shape
shape = (N, C, H//2, 2, W//2, 2)
return np.nanmax(in_nchw.reshape(shape), axis = (3,5))
# return p_in for the whole batch
def MaxPool_2by2_backprop(p_out, out_nchw, in_nchw):
p_in = np.zeros(in_nchw.shape)
p_in[:, :, 0::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 0::2])
p_in[:, :, 0::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 1::2])
p_in[:, :, 1::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 0::2])
p_in[:, :, 1::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 1::2])
return p_in
# forward:
# x = NCHW(images)
# cx = Conv2d_c1(x)
# rx = ReLU(cx)
# y = MaxPool(rx)
# cy = Conv2d_c2(hx)
# ry = ReLU(cy)
# g = MaxPool(ry)
# h = Flatten(g)
# z = Linear_f(h)
# return (z, h, g, ry, cy, y, rx, cx, x)
def forward(images, theta):
# number of samples
N = images.shape[0]
# unpack theta into c1, c2, and f
c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
# x = NCHW(images)
x = images.astype(float).transpose(0,3,1,2)
# cx = Conv2d_c1(x)
cx = Conv2d(x, c1_W, c1_b)
# rx = ReLU(cx)
rx = cx*(cx > 0)
# y = MaxPool(rx)
y = MaxPool_2by2(rx)
# cy = Conv2d_c2(y)
cy = Conv2d(y, c2_W, c2_b)
# ry = ReLU(cy)
ry = cy*(cy > 0)
# g = MaxPool(ry)
g = MaxPool_2by2(ry)
# h = Flatten(g)
h = g.reshape((N, -1))
# z = Linear_f(h)
z = np.zeros((N, f_b.shape[0]))
for i in range(N):
z[i, :] = np.matmul(f_W, h[i])+f_b
return (z, h, g, ry, cy, y, rx, cx, x)
def backprop(labels, theta, z, h, g, ry, cy, y, rx, cx, x):
# Number of samples
N = labels.shape[0]
# Unpack theta into c1, c2, and f
c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
# Initialize gradients for each parameter
p_c1_W, p_c1_b = np.zeros_like(c1_W), np.zeros_like(c1_b)
p_c2_W, p_c2_b = np.zeros_like(c2_W), np.zeros_like(c2_b)
p_f_W, p_f_b = np.zeros_like(f_W), np.zeros_like(f_b)
# sample-by-sample from z to h
#p_f_W = np.zeros(f_W.shape)
#p_f_b = np.zeros(f_b.shape)
p_h = np.zeros(h.shape)
# Step 1: Backprop through linear layer
for i in range(N):
# Softmax and cross-entropy gradient
expz = np.exp(z[i] - np.max(z[i]))
softmax = expz / np.sum(expz)
softmax[labels[i]] -= 1
p_z = softmax / N
# Backprop through linear layer to compute gradients
p_h[i, :] = np.dot(f_W.T, p_z)
p_f_W += np.outer(p_h[i, :], p_z).T
p_f_b += p_z
# Average the gradients over the batch
p_f_W /= N
p_f_b /= N
# Step 2: Backprop through flatten operation
p_g = p_h.reshape(g.shape)
# Step 3: Backprop through MaxPool layer
p_ry = MaxPool_2by2_backprop(p_g, g, ry)
# Step 4: Backprop through ReLU layer
p_cy = (ry > 0) * p_ry
# Step 5: Backprop through second convolutional layer
p_c2_W = Conv2d_backprop_W(p_cy, y, c2_W, c2_b)
p_c2_b = Conv2d_backprop_b(p_cy, y, c2_W, c2_b)
p_y = Conv2d_backprop_in(p_cy, y, c2_W, c2_b)
# Step 6: Backprop through first MaxPool layer
p_rx = MaxPool_2by2_backprop(p_y, y, rx)
# Step 7: Backprop through first ReLU layer
p_cx = (rx > 0) * p_rx
# Step 8: Backprop through first convolutional layer
p_c1_W = Conv2d_backprop_W(p_cx, x, c1_W, c1_b)
p_c1_b = Conv2d_backprop_b(p_cx, x, c1_W, c1_b)
return (p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b)
# apply SGD to update theta by nabla_J and the learning rate epsilon
# return updated theta
def update_theta(theta, nabla_J, epsilon):
# Unpack the current parameters and their corresponding gradients
c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b = nabla_J
# Update each parameter by subtracting the gradient times the learning rate
c1_W_updated = c1_W - epsilon * p_c1_W
c1_b_updated = c1_b - epsilon * p_c1_b
c2_W_updated = c2_W - epsilon * p_c2_W
c2_b_updated = c2_b - epsilon * p_c2_b
f_W_updated = f_W - epsilon * p_f_W
f_b_updated = f_b - epsilon * p_f_b
# Return the updated parameters as a tuple
updated_theta = (c1_W_updated, c1_b_updated, c2_W_updated, c2_b_updated, f_W_updated, f_b_updated)
return updated_theta
def sample_attempts(accuracies, num_samples=10):
total_attempts = len(accuracies)
if total_attempts <= num_samples:
return accuracies
# Always include first and last attempts
sampled_indices = [0, total_attempts - 1]
# Sample additional attempts, excluding the first and last
additional_samples = random.sample(range(1, total_attempts - 1), num_samples - 2)
sampled_indices.extend(additional_samples)
# Sort the indices to maintain chronological order
sampled_indices.sort()
# Get the sampled attempts
sampled_accuracies = [accuracies[i] for i in sampled_indices]
return sampled_accuracies
# hyperparameters
# default params
#bound = 1 # initial weight range
#epsilon = 0.00001 # learning rate
#batch_size = 4
# start training
def start_training(mnist_train, bound, epsilon, batch_size):
print(f"bound: {bound}, epsilon: {epsilon}, batch_size: {batch_size}")
validation_images = mnist_train["images"][:1000]
validation_labels = mnist_train["labels"][:1000]
training_images = mnist_train["images"][1000:21000]
training_labels = mnist_train["labels"][1000:21000]
#training_images = mnist_train["images"][1000:11000]
#training_labels = mnist_train["labels"][1000:11000]
start = time.time()
theta = initialize_theta(bound)
batches = training_images.shape[0]//batch_size
accuracies = []
for epoch in range(5):
indices = np.arange(training_images.shape[0])
np.random.shuffle(indices)
for i in range(batches):
batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]]
batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]]
z, h, g, ry, cy, y, rx, cx, x = forward(batch_images, theta)
nabla_J = backprop(batch_labels, theta, z, h, g, ry, cy, y, rx, cx, x)
theta = update_theta(theta, nabla_J, epsilon)
# check accuracy using validation examples
z, _, _, _, _, _, _, _, _ = forward(validation_images, theta)
pred_labels = z.argmax(axis = 1)
count = sum(pred_labels == validation_labels)
accuracy = count/validation_images.shape[0]
accuracies.append(accuracy)
print("epoch %d, accuracy %.3f, time %.2f" % (
epoch, accuracy, time.time()-start))
# save the weights to be submitted
save_theta(theta)
return accuracies
def plot_accuracies(accuracies):
plt.figure(figsize=(12, 6))
for attempt, acc_list in accuracies:
plt.plot(acc_list, label=f'Attempt {attempt+1}')
plt.title('Model Accuracy over Epochs for Sampled Attempts')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()
def main():
# list of (attempt, accuracies_for_run)
accuracies = []
# ToDo: set numpy random seed to the last 8 digits of your CWID
np.random.seed(20497299)
# load training data once
mnist_train = np.load("mnist_train.npz")
max_accuracy = 0.93 # Target accuracy
max_attempts = 999999 # Maximum number of attempts to reach the target accuracy
# initial starting values
# notes from experimentation:
# 90 percent accuracy
# attempt: 36
# increase bound/epsilon *= 1.01
bound = 0.01
epsilon = 0.0001
batch_size = 1
# 90 percent accuracy if we start at initial starting values. on 36th attempt
# note: if we start at these below values we do not hit 90. only if we start at 0.01/0.0001 and *= by 1.01
#bound = 0.014166027560312683
#epsilon = 0.0001416602756031268
#batch_size = 1
attempt = 0
while attempt < max_attempts:
print(f"Attempt: {attempt} out of: {max_attempts}")
accuracies_for_run = start_training(mnist_train, bound, epsilon, batch_size)
accuracies.append((attempt, accuracies_for_run))
if any(i >= max_accuracy for i in accuracies_for_run):
print(f"Reached Max Accuracy: {max_accuracy}")
print(f"bound: {bound}")
print(f"epsilon: {epsilon}")
print(f"batch_size: {batch_size}")
sampled_accuracies = sample_attempts(accuracies)
plot_accuracies(sampled_accuracies)
break
bound *= 1.0005 # Increase by 0.05%
epsilon *= 1.0005 # Increase by 0.05%
#bound *= 1.01 # Increase by 1%
#epsilon *= 1.01 # Increase by 1%
attempt += 1
if __name__ == "__main__":
main()

17
src/eval_add.cpp Executable file
View file

@ -0,0 +1,17 @@
#include "eval_add.h"
tensor eval_add::compute(const tensor &a, const tensor &b) {
tensor c = a + b;
return c;
}
void eval_add::eval(vars_type &variables, const kwargs_type &kwargs) {
assert(inputs_.size() == 2);
auto ita = variables.find(inputs_[0]);
auto itb = variables.find(inputs_[1]);
variables[expr_id_] = compute(ita->second,itb->second);
}
std::shared_ptr<eval_op> eval_add::clone(const expression &expr) {
return std::make_shared<eval_add>(expr);
}

21
src/eval_add.h Executable file
View file

@ -0,0 +1,21 @@
#ifndef EVAL_ADD_H
#define EVAL_ADD_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_add: public eval_op {
tensor compute(const tensor &a, const tensor &b);
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_add() {}
eval_add(const expression &expr): eval_op(expr) {}
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Add") == proto_map.end());
proto_map["Add"] = std::make_shared<eval_add>(); // where is expr?
}
}; // class eval_add
#endif

17
src/eval_const.cpp Executable file
View file

@ -0,0 +1,17 @@
#include "eval_const.h"
eval_const::eval_const(const expression &expr): eval_op(expr) {
auto params = expr.get_op_param_tensor("value");
value_ = params;
// value_.print(); // uncomment for viewing logging details of tensor object
}
void eval_const::eval(vars_type &variables, const kwargs_type &kwargs) {
variables[expr_id_] = value_;
// value_.print(); // uncomment for viewing logging details of tensor object
}
std::shared_ptr<eval_op> eval_const::clone(const expression &expr) {
return std::make_shared<eval_const>(expr);
}

21
src/eval_const.h Executable file
View file

@ -0,0 +1,21 @@
#ifndef EVAL_CONST_H
#define EVAL_CONST_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_const: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_const() {}
eval_const(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Const") == proto_map.end());
proto_map["Const"] = std::make_shared<eval_const>(); // where is expr?
}
}; // class eval_const
#endif

36
src/eval_conv2d.cpp Executable file
View file

@ -0,0 +1,36 @@
#include "eval_conv2d.h"
eval_conv2d::eval_conv2d(const expression &expr): eval_op(expr),
in_channels(expr.get_op_param_int("in_channels")),
kernel_size(expr.get_op_param_int("kernel_size")),
out_channels(expr.get_op_param_int("out_channels")),
padding(expr.get_op_param_double("padding")),
weight(expr.get_op_param_tensor("weight")),
bias(expr.get_op_param_tensor("bias")),
stride(expr.get_op_param_int("stride"))
{
}
void eval_conv2d::eval(vars_type &variables, const kwargs_type &kwargs) {
if (inputs_.empty()) {
throw std::runtime_error("No input tensor for eval_conv2d.");
}
// Get the input tensor from variables using the expression ID of the input
int inputExprId = inputs_[0];
if (variables.find(inputExprId) == variables.end()) {
throw std::runtime_error("Input tensor not found in variables map.");
}
tensor input_tensor = variables.at(inputExprId);
// Perform the Conv2D operation
tensor conv2d_tensor = Conv2D(input_tensor, weight, bias, in_channels, out_channels, kernel_size);
// Store the result in the variables map using the current expression ID
variables[expr_id_] = conv2d_tensor;
}
std::shared_ptr<eval_op> eval_conv2d::clone(const expression &expr) {
return std::make_shared<eval_conv2d>(expr);
}

38
src/eval_conv2d.h Executable file
View file

@ -0,0 +1,38 @@
#ifndef EVAL_CONV2D_H
#define EVAL_CONV2D_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_conv2d: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_conv2d () {}
eval_conv2d(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Conv2d") == proto_map.end());
proto_map["Conv2d"] = std::make_shared<eval_conv2d>(); // where is expr?
}
private:
int in_channels;
int kernel_size;
int out_channels;
double padding;
tensor weight;
tensor bias;
int stride;
/*
tensor weight;
tensor bias;
int kernel_size;
int stride;
int in_channels;
int out_channels;
double padding;
*/
}; // class eval_conv2d
#endif

27
src/eval_flatten.cpp Executable file
View file

@ -0,0 +1,27 @@
#include "eval_flatten.h"
eval_flatten::eval_flatten(const expression &expr): eval_op(expr) {
}
void eval_flatten::eval(vars_type &variables, const kwargs_type &kwargs) {
// Check that inputs_ is not empty
assert(!inputs_.empty());
// Retrieve the tensor from variables using the first input expression ID
const int inputExprId = inputs_[0];
tensor t = variables.at(inputExprId);
// For debugging: print the input tensor
std::cout << "Input Tensor for Flatten (expr_id " << inputExprId << "):" << std::endl;
//t.print();
// Perform the Flatten operation
tensor flattenResult = Flatten(t);
// Store the result in variables under the key expr_id_
variables[expr_id_] = flattenResult;
}
std::shared_ptr<eval_op> eval_flatten::clone(const expression &expr) {
return std::make_shared<eval_flatten>(expr);
}

21
src/eval_flatten.h Executable file
View file

@ -0,0 +1,21 @@
#ifndef EVAL_FLATTEN_H
#define EVAL_FLATTEN_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_flatten: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_flatten () {}
eval_flatten(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Flatten") == proto_map.end());
proto_map["Flatten"] = std::make_shared<eval_flatten>(); // where is expr?
}
}; // class eval_flatten
#endif

13
src/eval_input.cpp Executable file
View file

@ -0,0 +1,13 @@
#include "eval_input.h"
eval_input::eval_input(const expression &expr): eval_op(expr) {
}
void eval_input::eval(vars_type &variables, const kwargs_type &kwargs) {
variables[expr_id_] = kwargs.at(op_name_);
}
std::shared_ptr<eval_op> eval_input::clone(const expression &expr) {
return std::make_shared<eval_input>(expr);
}

21
src/eval_input.h Executable file
View file

@ -0,0 +1,21 @@
#ifndef EVAL_INPUT_H
#define EVAL_INPUT_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_input: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_input () {}
eval_input(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Input") == proto_map.end());
proto_map["Input"] = std::make_shared<eval_input>(); // where is expr?
}
}; // class eval_input
#endif

23
src/eval_input2d.cpp Executable file
View file

@ -0,0 +1,23 @@
#include "eval_input2d.h"
eval_input2d::eval_input2d(const expression &expr): eval_op(expr), height(expr.get_op_param_int("height")), width(expr.get_op_param_int("width")) {
}
void eval_input2d::eval(vars_type &variables, const kwargs_type &kwargs) {
// Retrieve the tensor directly from kwargs using op_name_
tensor t = kwargs.at(op_name_);
// For debugging: print the input tensor
std::cout << "Input Tensor for Input2d (op_name " << op_name_ << "):" << std::endl;
//t.print();
// Perform the Input2d operation
tensor input2dResult = Input2d(t);
// Store the result in variables under the key expr_id_
variables[expr_id_] = input2dResult;
}
std::shared_ptr<eval_op> eval_input2d::clone(const expression &expr) {
return std::make_shared<eval_input2d>(expr);
}

24
src/eval_input2d.h Executable file
View file

@ -0,0 +1,24 @@
#ifndef EVAL_INPUT2D_H
#define EVAL_INPUT2D_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_input2d: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_input2d () {}
eval_input2d(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Input2d") == proto_map.end());
proto_map["Input2d"] = std::make_shared<eval_input2d>(); // where is expr?
}
private:
int height;
int width;
}; // class eval_input2d
#endif

27
src/eval_linear.cpp Executable file
View file

@ -0,0 +1,27 @@
#include "eval_linear.h"
eval_linear::eval_linear(const expression &expr): eval_op(expr),weight(expr.get_op_param_tensor("weight")),bias(expr.get_op_param_tensor("bias")) {
}
void eval_linear::eval(vars_type &variables, const kwargs_type &kwargs) {
if (inputs_.empty()) {
throw std::runtime_error("No input tensor for eval_linear.");
}
int inputExprId = inputs_[0];
if (variables.find(inputExprId) == variables.end()) {
throw std::runtime_error("Input tensor not found in variables map.");
}
tensor x = variables.at(inputExprId);
// Now you have x, weight, and bias available to perform the Linear operation
tensor linearResult = Linear(weight, x, bias);
// Process the result...
variables[expr_id_] = linearResult;
}
std::shared_ptr<eval_op> eval_linear::clone(const expression &expr) {
return std::make_shared<eval_linear>(expr);
}

24
src/eval_linear.h Executable file
View file

@ -0,0 +1,24 @@
#ifndef EVAL_LINEAR_H
#define EVAL_LINEAR_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_linear: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_linear () {}
eval_linear(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Linear") == proto_map.end());
proto_map["Linear"] = std::make_shared<eval_linear>(); // where is expr?
}
private:
tensor weight; // Add weight tensor
tensor bias; // Add bias tensor
}; // class eval_linear
#endif

32
src/eval_maxpool2d.cpp Executable file
View file

@ -0,0 +1,32 @@
#include "eval_maxpool2d.h"
eval_maxpool2d::eval_maxpool2d(const expression &expr): eval_op(expr), kernel_size(expr.get_op_param_int("kernel_size")), stride(expr.get_op_param_int("stride")){
// Retrieve kernel size and stride from the expression
//int kernel_size = expr.get_ker_and_str_value("kernel_size");
//int stride = expr.get_ker_and_str_value("stride");
}
void eval_maxpool2d::eval(vars_type &variables, const kwargs_type &kwargs) {
// Ensure that there is at least one input (the input tensor)
if (inputs_.empty()) {
throw std::runtime_error("No input tensor for eval_maxpool2d.");
}
int inputExprId = inputs_[0];
if (variables.find(inputExprId) == variables.end()) {
throw std::runtime_error("Input tensor not found in variables map.");
}
// Get the input tensor
tensor input_tensor = variables.at(inputExprId);
// Perform the max pooling operation
tensor pooled_tensor = MaxPool2d(input_tensor, kernel_size, stride);
// Store the result in the variables map using the expression ID
variables[expr_id_] = pooled_tensor;
}
std::shared_ptr<eval_op> eval_maxpool2d::clone(const expression &expr) {
return std::make_shared<eval_maxpool2d>(expr);
}

24
src/eval_maxpool2d.h Executable file
View file

@ -0,0 +1,24 @@
#ifndef EVAL_MAXPOOL2D_H
#define EVAL_MAXPOOL2D_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_maxpool2d: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_maxpool2d () {}
eval_maxpool2d(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("MaxPool2d") == proto_map.end());
proto_map["MaxPool2d"] = std::make_shared<eval_maxpool2d>(); // where is expr?
}
private:
int kernel_size;
int stride;
}; // class eval_linear
#endif

20
src/eval_mul.cpp Executable file
View file

@ -0,0 +1,20 @@
#include "eval_mul.h"
tensor eval_mul::compute(const tensor &a, const tensor &b) {
// these use our tensor operator* overload
tensor c = a * b;
return c;
}
void eval_mul::eval(vars_type &variables, const kwargs_type &kwargs) {
assert(inputs_.size() == 2);
auto ita = variables.find(inputs_[0]);
auto itb = variables.find(inputs_[1]);
// handle errors for ita and itb
// we handle these in the operator* override instead of here
variables[expr_id_] = compute(ita->second,itb->second);
}
std::shared_ptr<eval_op> eval_mul::clone(const expression &expr) {
return std::make_shared<eval_mul>(expr);
}

22
src/eval_mul.h Executable file
View file

@ -0,0 +1,22 @@
#ifndef EVAL_MUL_H
#define EVAL_MUL_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_mul: public eval_op {
tensor compute(const tensor &a, const tensor &b);
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_mul() {}
eval_mul(const expression &expr): eval_op(expr) {}
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Mul") == proto_map.end());
proto_map["Mul"] = std::make_shared<eval_mul>(); // where is expr?
}
}; // class eval_mul
#endif

20
src/eval_op.cpp Executable file
View file

@ -0,0 +1,20 @@
#include "eval_op.h"
eval_op::eval_op(const expression &expr)
{
// set all of our member vars one time using expr getters
expr_id_ = expr.get_expr_id();
op_name_ = expr.get_op_name();
op_type_ = expr.get_op_type();
inputs_ = expr.get_inputs();
}
void eval_op::eval(vars_type & variables, const kwargs_type & kwargs)
{
assert(false); // should be provided by derived classes
}
eval_op::~eval_op() {
}

27
src/eval_op.h Executable file
View file

@ -0,0 +1,27 @@
#ifndef EVAL_OP_H
#define EVAL_OP_H
#include <map>
#include "expression.h"
#include "assert.h"
#include <memory>
// typedef introduces alias for types.
typedef std::map<int, tensor> vars_type;
typedef std::map<std::string, tensor> kwargs_type;
class eval_op {
protected:
friend class evaluation;
eval_op(): expr_id_(-1) {}
eval_op(const expression &expr);
int expr_id_;
std::string op_name_, op_type_;
std::vector<int> inputs_;
public:
virtual ~eval_op();
virtual void eval(vars_type &variables, const kwargs_type &kwargs) = 0;
virtual std::shared_ptr<eval_op> clone(const expression &expr) = 0;
}; //class eval_op
#endif

46
src/eval_op_prototypes.cpp Executable file
View file

@ -0,0 +1,46 @@
#include "eval_op_prototypes.h"
eval_op_prototypes::eval_op_prototypes() {
eval_const::store_prototype(proto_map_);
eval_input::store_prototype(proto_map_);
eval_add::store_prototype(proto_map_);
eval_sub::store_prototype(proto_map_);
eval_mul::store_prototype(proto_map_);
eval_relu::store_prototype(proto_map_);
eval_flatten::store_prototype(proto_map_);
eval_input2d::store_prototype(proto_map_);
eval_linear::store_prototype(proto_map_);
eval_maxpool2d::store_prototype(proto_map_);
eval_conv2d::store_prototype(proto_map_);
//log_proto_types(); // used this when troubleshooting. uncomment to enable log
}
void eval_op_prototypes::log_proto_types() {
std::cout << "eval_op_prototypes: registering prototypes begin:" << std::endl;
std::cout << "----------------------" << std::endl;
for (const auto& pair : proto_map_) {
std::cout << "eval_op_prototypes: registered prototype: " << pair.first << std::endl;
}
std::cout << "----------------------" << std::endl;
std::cout << "eval_op_prototypes: registering prototypes complete" << std::endl;
}
std::shared_ptr<eval_op> eval_op_prototypes::locate(std::string name)
{
auto found = proto_map_.find(name);
if (found != proto_map_.end()) {
return found->second;
} else {
std::string error_message = "eval_op_prototypes: prototype for operator '" + name + "' not found.";
std::cerr << error_message << std::endl;
throw std::runtime_error(error_message);
}
}
eval_op_prototypes &eval_op_prototypes::instance()
{
static eval_op_prototypes instance; // the only instance
return instance;
}

25
src/eval_op_prototypes.h Executable file
View file

@ -0,0 +1,25 @@
#include "eval_op.h"
#include "eval_input.h"
#include "eval_input2d.h"
#include "eval_add.h"
#include "eval_mul.h"
#include "eval_sub.h"
#include "eval_const.h"
#include "eval_flatten.h"
#include "eval_relu.h"
#include "eval_linear.h"
#include "eval_maxpool2d.h"
#include "eval_conv2d.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_op_prototypes {
// prevent creation of additional instances
eval_op_prototypes(const eval_op_prototypes &)=delete;
eval_op_prototypes();
eval_op_proto_map proto_map_;
void log_proto_types();
public:
std::shared_ptr<eval_op> locate(std::string name);
static eval_op_prototypes &instance(); // access the only instance
}; // class eval_op_prototypes

27
src/eval_relu.cpp Executable file
View file

@ -0,0 +1,27 @@
#include "eval_relu.h"
eval_relu::eval_relu(const expression &expr): eval_op(expr) {
}
void eval_relu::eval(vars_type &variables, const kwargs_type &kwargs) {
// Check that inputs_ is not empty
assert(!inputs_.empty());
// Retrieve the tensor from variables using the first input expression ID
const int inputExprId = inputs_[0];
tensor t = variables.at(inputExprId);
// For debugging: print the input tensor
std::cout << "Input Tensor for ReLU (expr_id " << inputExprId << "):" << std::endl;
//t.print();
// Perform the ReLU operation
tensor reluResult = ReLU(t);
// Store the result in variables under the key expr_id_
variables[expr_id_] = reluResult;
}
std::shared_ptr<eval_op> eval_relu::clone(const expression &expr) {
return std::make_shared<eval_relu>(expr);
}

21
src/eval_relu.h Executable file
View file

@ -0,0 +1,21 @@
#ifndef EVAL_RELU_H
#define EVAL_RELU_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_relu: public eval_op {
tensor value_;
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_relu () {}
eval_relu(const expression &expr);
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("ReLU") == proto_map.end());
proto_map["ReLU"] = std::make_shared<eval_relu>(); // where is expr?
}
}; // class eval_relu
#endif

18
src/eval_sub.cpp Executable file
View file

@ -0,0 +1,18 @@
#include "eval_sub.h"
tensor eval_sub::compute(const tensor &a, const tensor &b) {
// these use our tensor operator- overload
tensor c = a - b;
return c;
}
void eval_sub::eval(vars_type &variables, const kwargs_type &kwargs) {
assert(inputs_.size() == 2);
auto ita = variables.find(inputs_[0]);
auto itb = variables.find(inputs_[1]);
variables[expr_id_] = compute(ita->second,itb->second);
}
std::shared_ptr<eval_op> eval_sub::clone(const expression &expr) {
return std::make_shared<eval_sub>(expr);
}

22
src/eval_sub.h Executable file
View file

@ -0,0 +1,22 @@
#ifndef EVAL_SUB_H
#define EVAL_SUB_H
#include "eval_op.h"
typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
class eval_sub: public eval_op {
tensor compute(const tensor &a, const tensor &b);
std::shared_ptr<eval_op> clone(const expression &expr) override;
public:
eval_sub() {}
eval_sub(const expression &expr): eval_op(expr) {}
void eval(vars_type &variables, const kwargs_type &kwargs) override;
static void store_prototype(eval_op_proto_map &proto_map) {
assert(proto_map.find("Sub") == proto_map.end());
proto_map["Sub"] = std::make_shared<eval_sub>(); // where is expr?
}
}; // class eval_sub
#endif

60
src/evaluation.cpp Executable file
View file

@ -0,0 +1,60 @@
#include "evaluation.h"
#include "eval_op_prototypes.h"
evaluation::evaluation(const std::vector<expression> &exprs) {
try {
std::cout << "Starting evaluation construction with " << exprs.size() << " expressions." << std::endl;
for (auto &expr: exprs) {
std::cout << "Processing expression with ID: " << expr.get_expr_id() << std::endl;
std::shared_ptr<eval_op> p = eval_op_prototypes::instance().locate(expr.get_op_type());
ops_.push_back(p->clone(expr));
std::cout << "Expression with ID: " << expr.get_expr_id() << " processed successfully." << std::endl;
}
std::cout << "Evaluation construction completed successfully." << std::endl;
} catch (const std::runtime_error& e) {
std::cerr << "Failed to construct evaluation: " << e.what() << std::endl;
}
}
void evaluation::add_kwargs_double(
const char *key,
double value)
{
tensor scalar(value);
kwargs_[key] = scalar;
}
void evaluation::add_kwargs_ndarray(
const char *key,
int dim,
size_t shape[],
double data[])
{
tensor ndarray(dim, shape, data);
kwargs_[key] = ndarray;
}
tensor &evaluation::get_result()
{
if (!variables_.empty()) {
int last_expr_id = std::prev(variables_.end())->first;
std::cout << "Returning result from the last expression with ID: " << last_expr_id << std::endl;
return variables_[last_expr_id];
}
throw std::runtime_error("No result available.");
}
int evaluation::execute()
{
std::cout << "Executing evaluation with " << ops_.size() << " operations.\n";
variables_.clear();
for (auto &op : ops_) {
std::cout << "Evaluating operation of type: " << typeid(*op).name() << "\n";
op->eval(variables_, kwargs_);
std::cout << "Operation evaluation complete. Variables size: " << variables_.size() << "\n";
}
std::cout << "Evaluation complete. Total variables evaluated: " << variables_.size() << "\n";
return 0;
}

33
src/evaluation.h Executable file
View file

@ -0,0 +1,33 @@
#ifndef EVALUATION_H
#define EVALUATION_H
#include "expression.h"
#include "eval_op.h"
class evaluation
{
public:
evaluation(const std::vector<expression> &exprs);
void add_kwargs_double(
const char *key,
double value);
void add_kwargs_ndarray(
const char *key,
int dim,
size_t shape[],
double data[]);
// return 0 for success
int execute();
tensor &get_result();
private:
std::vector<std::shared_ptr<eval_op>> ops_; // instead of exprs_
std::map<int, tensor> variables_;
std::map<std::string, tensor> kwargs_;
}; // class evaluation
#endif // EVALUATION_H

112
src/expression.cpp Executable file
View file

@ -0,0 +1,112 @@
#include "expression.h"
#include "evaluation.h"
expression::expression(
int expr_id,
const char *op_name,
const char *op_type,
int *inputs,
int num_inputs)
: expr_id_(expr_id), op_name_(op_name), op_type_(op_type)
{
// initialize inputs_ with the given inputs from our expression.
for (int i = 0; i < num_inputs; ++i)
{
inputs_.push_back(inputs[i]);
}
}
// getter methods
int expression::get_expr_id() const
{
return expr_id_;
}
const std::string& expression::get_op_name() const
{
return op_name_;
}
const std::string& expression::get_op_type() const
{
return op_type_;
}
const std::vector<int>& expression::get_inputs() const
{
return inputs_;
}
tensor expression::get_op_param_tensor(const std::string& key) const {
auto it = op_params_tensor_.find(key);
if (it != op_params_tensor_.end()) {
return it->second;
} else {
throw std::out_of_range("No operation parameter tensor found for key: " + key);
}
}
double expression::get_op_param_double(const std::string& key) const {
auto it = op_params_double_.find(key);
if (it != op_params_double_.end()) {
return it->second;
} else {
return 0.0;
}
}
int expression::get_op_param_int(const std::string& key) const {
auto it = op_params_int_.find(key);
if (it != op_params_int_.end()) {
return it->second;
} else {
return 0;
}
}
void expression::add_op_param_double(
const char *key,
double value)
{
std::cout << "Adding operation parameter (double) with key: " << key << std::endl;
const char *w = "weight";
const char *b = "bias";
const char *v = "value"; // for backwards compatibility with p3 tests
const char *k = "kernel_size";
const char *s = "stride";
const char *i = "in_channels";
const char *o = "out_channels";
const char *p = "padding";
if((*key) == (*w)|| (*key) == (*b) || (*key) == (*v)){ // handle weight, bias, value tensor values
op_params_tensor_[key] = tensor(value);
} else if ((*key) == (*k) || (*key) == (*s) || (*key) == (*i) || (*key) == (*o)) { // handle kernel_size, stride, in_channels, out_channels int values
op_params_int_[key] = int(value);
} else if ((*key) == (*p)){ // handle padding double value
op_params_double_[key] = value;
}else{
op_params_tensor_[key] = tensor(value);
}
}
void expression::add_op_param_ndarray(
const char *key,
int dim,
size_t shape[],
double data[])
{
std::cout << "Adding operation parameter (ndarray) with key: " << key << std::endl;
const char *w = "weight";
const char *b = "bias";
if((*key) == (*w)|| (*key) == (*b) ){
op_params_tensor_[key] = tensor(dim, shape, data);
}else{
op_params_tensor_[key] = tensor(dim, shape, data);
}
}

51
src/expression.h Executable file
View file

@ -0,0 +1,51 @@
#ifndef EXPRESSION_H
#define EXPRESSION_H
#include <vector>
#include <string>
#include <map>
#include "tensor.h"
class evaluation;
class expression
{
friend class evaluation;
public:
expression(
int expr_id,
const char *op_name,
const char *op_type,
int *inputs,
int num_inputs);
// Getter methods for expression attributes
int get_expr_id() const;
const std::string& get_op_name() const;
const std::string& get_op_type() const;
const std::vector<int>& get_inputs() const;
tensor get_op_param_tensor(const std::string& key) const;
double get_op_param_double(const std::string& key) const;
int get_op_param_int(const std::string& key) const;
void add_op_param_double(
const char *key,
double value);
void add_op_param_ndarray(
const char *key,
int dim,
size_t shape[],
double data[]);
private:
int expr_id_;
std::string op_name_;
std::string op_type_;
std::vector<int> inputs_;
std::map<std::string, tensor> op_params_tensor_;
std::map<std::string,double> op_params_double_;
std::map<std::string,int> op_params_int_;
}; // class expression
#endif // EXPRESSION_H

109
src/libeasynn.cpp Executable file
View file

@ -0,0 +1,109 @@
#include <stdio.h>
#include "libeasynn.h"
#include "program.h"
#include "evaluation.h"
program *create_program()
{
program *prog = new program;
printf("created_program: program %p\n", prog);
return prog;
}
void append_expression(
program *prog,
int expr_id,
const char *op_name,
const char *op_type,
int inputs[],
int num_inputs)
{
printf("append_expression: program %p, expr_id %d, op_name %s, op_type %s, inputs %d (",
prog, expr_id, op_name, op_type, num_inputs);
for (int i = 0; i != num_inputs; ++i)
printf("%d,", inputs[i]);
printf(")\n");
prog->append_expression(expr_id, op_name, op_type, inputs, num_inputs);
}
int add_op_param_double(
program *prog,
const char *key,
double value)
{
printf("program %p, key %s, value %f\n",
prog, key, value);
return prog->add_op_param_double(key, value);
}
int add_op_param_ndarray(
program *prog,
const char *key,
int dim,
size_t shape[],
double data[])
{
printf("program %p, key %s, value %p dim %d (",
prog, key, data, dim);
for (int i = 0; i != dim; ++i)
printf("%zu,", shape[i]);
printf(")\n");
return prog->add_op_param_ndarray(key, dim, shape, data);
}
evaluation *build(program *prog)
{
evaluation *eval = prog->build();
printf("evaluation %p\n", eval);
return eval;
}
void add_kwargs_double(
evaluation *eval,
const char *key,
double value)
{
printf("evaluation %p, key %s, value %f\n",
eval, key, value);
eval->add_kwargs_double(key, value);
}
void add_kwargs_ndarray(
evaluation *eval,
const char *key,
int dim,
size_t shape[],
double data[])
{
printf("evaluation %p, key %s, value %p dim %d (",
eval, key, data, dim);
for (int i = 0; i != dim; ++i)
printf("%zu,", shape[i]);
printf(")\n");
eval->add_kwargs_ndarray(key, dim, shape, data);
}
int execute(
evaluation *eval,
int *p_dim,
size_t **p_shape,
double **p_data)
{
printf("evaluation %p, p_dim %p, p_shape %p, p_data %p\n",
eval, p_dim, p_shape, p_data);
int ret = eval->execute();
if (ret != 0) {
printf("execution failed with return code %d\n", ret);
return ret;
}
tensor &res = eval->get_result();
*p_dim = res.get_dim();
*p_shape = res.get_shape_array();
*p_data = res.get_data_array();
//res.print(); // uncomment to view tensor data output
printf("execution completed succesfully. result: dim=%d\n",*p_dim);
fflush(stdout);
return 0;
}

63
src/libeasynn.h Executable file
View file

@ -0,0 +1,63 @@
#ifndef LIBEASYNN_H
#define LIBEASYNN_H
class program;
class evaluation;
// return program handle
extern "C"
program *create_program();
extern "C"
void append_expression(
program *prog,
int expr_id,
const char *op_name,
const char *op_type,
int inputs[],
int num_inputs);
// return 0 for success
extern "C"
int add_op_param_double(
program *prog,
const char *key,
double value);
// return 0 for success
extern "C"
int add_op_param_ndarray(
program *prog,
const char *key,
int dim,
size_t shape[],
double data[]);
// return evaluation handle
extern "C"
evaluation *build(
program *prog);
extern "C"
void add_kwargs_double(
evaluation *eval,
const char *key,
double value);
extern "C"
void add_kwargs_ndarray(
evaluation *eval,
const char *key,
int dim,
size_t shape[],
double data[]);
// return 0 for success
extern "C"
int execute(
evaluation *eval,
int *p_dim,
size_t **p_shape,
double **p_data);
#endif // LIBEASYNN_H

47
src/program.cpp Executable file
View file

@ -0,0 +1,47 @@
#include "program.h"
#include "evaluation.h"
program::program()
{
}
void program::append_expression(
int expr_id,
const char *op_name,
const char *op_type,
int inputs[],
int num_inputs)
{
// create an expression object with the provided parameters
expression expr(expr_id, op_name, op_type, inputs, num_inputs);
// append the expression to the expressions vector
expressions_.push_back(expr);
}
int program::add_op_param_double(
const char *key,
double value)
{
// get end of expressions_ values and send to our add_op_param_double
expressions_.back().add_op_param_double(key, value);
return 0;
}
int program::add_op_param_ndarray(
const char *key,
int dim,
size_t shape[],
double data[])
{
// get end of expressions_ values and send to our add_op_param_ndarray
expressions_.back().add_op_param_ndarray(key, dim, shape, data);
return 0;
}
evaluation *program::build()
{
evaluation *eval = new evaluation(expressions_);
return eval;
}

38
src/program.h Executable file
View file

@ -0,0 +1,38 @@
#ifndef PROGRAM_H
#define PROGRAM_H
#include "expression.h"
class evaluation;
class program
{
public:
program();
void append_expression(
int expr_id,
const char *op_name,
const char *op_type,
int inputs[],
int num_inputs);
// return 0 for success
int add_op_param_double(
const char *key,
double value);
// return 0 for success
int add_op_param_ndarray(
const char *key,
int dim,
size_t shape[],
double data[]);
evaluation *build();
private:
std::vector<expression> expressions_; // Store the expressions.
}; // class program
#endif // PROGRAM_H

430
src/tensor.cpp Executable file
View file

@ -0,0 +1,430 @@
#include "tensor.h"
#include <assert.h>
#include <stdexcept>
#include <algorithm>
#include <numeric>
#include <math.h>
tensor::tensor() {}
tensor::tensor(double v) {
shape_.push_back(1);
data_.push_back(v);
}
tensor::tensor(int dim, std::initializer_list<size_t> shape, std::initializer_list<double> data) {
shape_.assign(shape.begin(), shape.end());
data_.assign(data.begin(), data.end());
dim_ = dim;
}
size_t tensor::getTotalSize() const {
size_t totalSize = 1;
for (size_t dim : shape_) {
if (totalSize > std::numeric_limits<size_t>::max() / dim)
throw std::overflow_error("Size overflow detected");
totalSize *= dim;
}
return totalSize;
}
tensor::tensor(int dim, size_t shape[], double data[]):
shape_(shape, shape+dim) {
dim_ = dim;
// calc N has shape[0]*shape[1]*...*shape[dim-1]
// calc N as the product of elements in the shape array
size_t N = 1;
for (int i=0; i < dim; ++i) {
N = shape[i] * N;
}
data_.assign(data, data+N);
}
int tensor::get_dim() const {
return shape_.size();
}
double tensor::item() const {
if (shape_.empty()) {
return data_[0];
} else {
throw std::runtime_error("item() called on a non-scalar tensor.");
}
}
double &tensor::item() {
if (shape_.empty()) {
return data_[0];
} else {
throw std::runtime_error("item() called on a non-scalar tensor.");
}
}
double tensor::at(size_t i) const {
assert(get_dim() == 1);
assert(i < shape_[0]);
return data_[i];
}
double tensor::at(size_t i, size_t j) const {
assert(get_dim() == 2);
if (shape_.size() != 2)
throw std::runtime_error("Tensor is not 2-dimensional");
assert((i < shape_[0]) && (j < shape_[1]));
return data_[i * shape_[1] + j];
}
double tensor::at(size_t i, size_t j, size_t k, size_t l) const {
assert(get_dim() == 4); // Ensure the tensor is 4D
assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]);
size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l;
return data_[index];
}
double& tensor::at(size_t i, size_t j, size_t k, size_t l) {
assert(get_dim() == 4); // Ensure the tensor is 4D
assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]);
size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l;
return data_[index];
}
size_t* tensor::get_shape_array() {
return shape_.empty()? nullptr: &shape_[0];
}
double* tensor::get_data_array() {
return &data_[0];
}
// This can be used for debugging across the entire program.
// prints the values of the tensor. Be careful though because
// it will clutter the screen output.
void tensor::print() const {
std::cout << "Tensor - Dim: " << dim_ << ", Shape: {";
for (const auto& s : shape_) std::cout << s << " ";
std::cout << "}, Data: {";
for (const auto& d : data_) std::cout << d << " ";
std::cout << "}\n";
}
// helper template
template <class T> tensor helper(const tensor &lhs, const tensor &rhs) {
tensor result;
result.shape_ = lhs.shape_;
size_t totalSize = lhs.getTotalSize();
result.data_.reserve(totalSize);
std::transform(lhs.data_.begin(), lhs.data_.begin() + totalSize,
rhs.data_.begin(), std::back_inserter(result.data_), T());
return result;
}
// operator overloads
tensor operator+(const tensor &lhs, const tensor &rhs) {
tensor tensor_a = tensor(lhs);
tensor tensor_b = tensor(rhs);
size_t size_a = lhs.getTotalSize();
std::vector<double> result;
for (size_t i =0; i< size_a; ++i) {
double sum_array = tensor_a.get_data_array()[i] + tensor_b.get_data_array()[i];
result.push_back(sum_array);
}
tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]);
return c;
}
tensor operator-(const tensor &lhs, const tensor &rhs) {
tensor tensor_a = tensor(lhs);
tensor tensor_b = tensor(rhs);
size_t size_a = lhs.getTotalSize();
std::vector<double> result;
for (size_t i =0; i< size_a; ++i) {
double sum_array = tensor_a.get_data_array()[i] - tensor_b.get_data_array()[i];
result.push_back(sum_array);
}
tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]);
return c;
}
tensor operator*(const tensor &lhs, const tensor &rhs)
{
try {
// scalar * scalar multiplication
if (lhs.getTotalSize() == 1 && rhs.getTotalSize() == 1) {
return tensor(lhs.data_[0] * rhs.data_[0]);
}
if (lhs.shape_.empty() || rhs.shape_.empty()) {
return tensor(); // for p3-q6, passing in None
}
tensor result;
size_t lhsSize = lhs.getTotalSize();
size_t rhsSize = rhs.getTotalSize();
// scalar * tensor multiplication
if (lhsSize == 1 || rhsSize == 1) {
double scalar = (lhsSize == 1 ? lhs : rhs).data_[0];
const tensor &t = lhsSize == 1 ? rhs : lhs;
result.shape_ = t.shape_;
result.data_.reserve(lhsSize * rhsSize);
std::transform(t.data_.begin(), t.data_.end(),
std::back_inserter(result.data_),
[&scalar](double x) { return x * scalar; });
} else {
// matrix multiplication
size_t I = lhs.shape_[0];
size_t J = rhs.shape_[1];
size_t K = lhs.shape_[1];
result.shape_ = {I, J};
size_t totalSize = result.getTotalSize();
result.data_.resize(totalSize);
for (size_t i = 0; i < I; ++i) {
for (size_t j = 0; j < J; ++j) {
for (size_t k = 0; k < K; ++k) {
result.data_[J * i + j] +=
lhs.data_[K * i + k] * rhs.data_[J * k + j];
}
}
}
}
return result;
} catch (const std::bad_alloc&) {
throw std::runtime_error("Memory allocation failed in tensor multiplication.");
}
}
tensor ReLU(const tensor &x){
tensor result;
size_t xSize = x.getTotalSize();
result.shape_ = x.shape_;
for(size_t i = 0; i < xSize; i++ ){
result.data_.push_back(x.data_[i]*(x.data_[i]>0));
}
return result;
}
tensor Flatten(const tensor &x){
tensor result;
size_t xSize = x.getTotalSize();
size_t shape_1 = xSize/x.shape_[0];
std::vector<size_t> shape_;
shape_.push_back(x.shape_[0]);
shape_.push_back(shape_1);
result.shape_ = shape_;
result.data_ = x.data_;
return result;
}
tensor Input2d(const tensor &x){
tensor result;
std::vector<size_t> shape_;
shape_.push_back(x.shape_[0]);
shape_.push_back(x.shape_[3]);
shape_.push_back(x.shape_[1]);
shape_.push_back(x.shape_[2]);
result.shape_ = shape_;
for(size_t i = 0; i < x.shape_[0]; i++ ){
for(size_t j =0; j < x.shape_[3];j++){
for(size_t k = 0; k <x.shape_[1]*x.shape_[2]; k++){
result.data_.push_back(x.data_[j + k*x.shape_[3] + i*x.shape_[3]*x.shape_[1]*x.shape_[2]]);
}
}
}
return result;
}
tensor Linear(const tensor &w, const tensor &x, const tensor &b){
tensor result_wT;
std::vector<size_t> shape_0;
shape_0.push_back(x.shape_[1]);
shape_0.push_back(w.shape_[0]);
size_t totalSize_w = w.getTotalSize();
result_wT.data_.reserve(totalSize_w);
result_wT.shape_ = shape_0;
for(size_t i = 0; i < w.shape_[1]; i++){
for(size_t j = 0; j < w.shape_[0]; j++){
result_wT.data_.push_back(w.data_[i + j*w.shape_[1]]);
}
}
tensor result_xwT;
std::vector<size_t> shape_xwT;
shape_xwT.push_back(x.shape_[0]);
shape_xwT.push_back(result_wT.shape_[1]);
size_t totalSize_xwT = x.shape_[0]*result_wT.shape_[1];
result_xwT.data_.resize(totalSize_xwT);
result_xwT.shape_ = shape_xwT;
result_xwT = x * result_wT;
tensor result_bBrodcast;
std::vector<size_t> shape_1;
shape_1.push_back(b.shape_[0]);
shape_1.push_back(1*x.shape_[0]);
result_bBrodcast.shape_ = shape_1;
for(size_t i = 0; i < result_bBrodcast.shape_[0]; i++){
for(size_t j = 0; j < result_bBrodcast.shape_[1]; j++){
result_bBrodcast.data_.push_back(b.data_[i]);
}
}
tensor result_bBroadcastT;
std::vector<size_t> shape_2;
size_t totalSize_b = result_bBrodcast.getTotalSize();
result_bBrodcast.data_.resize(totalSize_b);
shape_2.push_back(result_bBrodcast.shape_[1]);
shape_2.push_back(result_bBrodcast.shape_[0]);
result_bBroadcastT.shape_ = shape_2;
for(size_t i = 0; i < result_bBrodcast.shape_[1]; i++){
for(size_t j = 0; j < result_bBrodcast.shape_[0]; j++){
result_bBroadcastT.data_.push_back(result_bBrodcast.data_[i + j*result_bBrodcast.shape_[1]]);
}
}
tensor result;
result.data_.resize(result_xwT.shape_[0]*result_xwT.shape_[1]);
result = result_xwT + result_bBroadcastT;
return result;
}
std::vector<double> maxpooling(std::vector<std::vector<double> > map, int k_w, int k_h, int s_w, int s_h)
{
int row = map.size(); int col = map[0].size();
int out_row = 1+(row-k_h)/s_h, out_col = 1+(col-k_w)/s_w;
int mod_row = row%s_h, mod_col = col%s_w;
if (mod_row != 0) out_row++;
if (mod_col != 0) out_col++;
std::vector<std::vector<double> > res(out_row, std::vector<double>(out_col, 0));
std::vector<std::vector<double> > pad_map(map);
for (int i=0;i<row;i++)
for (int j = 0; j < k_h - mod_row; j++)
{
pad_map[i].push_back(map[i][col - 1]);
}
for (int j = 0; j < k_w - mod_col; j++)
{
pad_map.push_back(pad_map[row - 1]);
}
for(int i=0;i<out_row;i++)
for (int j = 0; j < out_col; j++)
{
int start_x = j*s_w;
int start_y = i*s_h;
std::vector<double> temp;
for(int ii=0;ii<k_w;ii++)
for (int jj = 0; jj < k_h; jj++)
{
temp.push_back(pad_map[start_y + jj][start_x + ii]);
}
sort(temp.begin(), temp.end());
res[i][j] = temp[temp.size() - 1];
}
std::vector<double> res_;
for (auto l : res)
{
for (auto ll : l)
{
res_.push_back(ll);
}
}
return res_;
}
tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride){
std::cout <<"======1================="<<std::endl;
std::cout << x.shape_[0] <<"_"<< x.shape_[1] << x.shape_[2] << x.shape_[3] << std::endl;
std::cout << kernel_size << std::endl;
std::cout << stride << std::endl;
std::cout <<"======2================="<<std::endl;
tensor result;
size_t xSize = x.getTotalSize();
std::cout << xSize << std::endl;
std::vector<size_t> shape_;
shape_.push_back(x.shape_[0]);
shape_.push_back(x.shape_[1]);
shape_.push_back(floor(x.shape_[2]/kernel_size));
shape_.push_back(floor(x.shape_[3]/kernel_size));
result.shape_ = shape_;
for(size_t i = 0; i < x.shape_[0]; i++ ){
for(size_t j =0; j < x.shape_[1];j++){
std::vector<std::vector<double> > tmp;
for(size_t k =0; k < x.shape_[2];k++){
std::vector<double> tmp_;
for(size_t l =0; l < x.shape_[3];l++){
double int_data = x.data_[l + k*x.shape_[3] + j*x.shape_[3]*x.shape_[2] + i*x.shape_[3]*x.shape_[2]*x.shape_[1] ];
tmp_.push_back(int_data);
}
tmp.push_back(tmp_);
}
auto res = maxpooling(tmp,kernel_size,kernel_size,stride,stride);
for(size_t i = 0; i< res.size(); i++){
result.data_.push_back(res[i]);
}
}
}
return result;
}
tensor Conv2D(const tensor &input_tensor,
const tensor &kernel_tensor,
const tensor &bias_tensor,
int in_channels,
int out_channels,
int kernel_size)
{
std::cout << "In Channels: " << in_channels << ", Out Channels: " << out_channels << std::endl;
std::cout << "Kernel Size: " << kernel_size << std::endl;
// Calculate output dimensions
int out_height = input_tensor.shape_[2] - kernel_size + 1;
int out_width = input_tensor.shape_[3] - kernel_size + 1;
// Ensure out_height and out_width are non-negative
out_height = std::max(out_height, 0);
out_width = std::max(out_width, 0);
// Initialize shape array for output tensor
size_t output_shape[] = {input_tensor.shape_[0], static_cast<size_t>(out_channels), static_cast<size_t>(out_height), static_cast<size_t>(out_width)};
// Initialize data array for output tensor (fill with zeros)
std::vector<double> output_data(output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3], 0);
// Create output tensor
tensor output_tensor(4, output_shape, output_data.data());
// Convolution operation
for (size_t n = 0; n < input_tensor.shape_[0]; ++n) { // for each batch
for (int m = 0; m < out_channels; ++m) { // for each filter
for (int y = 0; y < out_height; ++y) {
for (int x = 0; x < out_width; ++x) {
double sum = 0.0;
for (int c = 0; c < in_channels; ++c) { // for each channel
for (int ky = 0; ky < kernel_size; ++ky) {
for (int kx = 0; kx < kernel_size; ++kx) {
size_t ix = x + kx;
size_t iy = y + ky;
if (ix < input_tensor.shape_[3] && iy < input_tensor.shape_[2]) {
sum += input_tensor.at(n, c, iy, ix) * kernel_tensor.at(m, c, ky, kx);
}
}
}
}
sum += bias_tensor.at(m); // add bias
output_tensor.at(n, m, y, x) = sum;
}
}
}
}
return output_tensor;
}

59
src/tensor.h Executable file
View file

@ -0,0 +1,59 @@
#ifndef TENSOR_H
#define TENSOR_H
#include <stdio.h>
#include <vector>
#include <iostream>
class tensor {
public:
tensor(); // scalar 0
explicit tensor(double v); // scalar v
tensor(int dim, size_t shape[], double data[]); // From C
tensor(int dim, std::initializer_list<size_t> shape, std::initializer_list<double> data); // Conv2d
int get_dim() const;
// scalar only
double item() const;
double &item();
// more accessors
double at(size_t i) const;
double at(size_t i, size_t j) const;
double at(size_t i, size_t j, size_t k, size_t l) const;
double& at(size_t i, size_t j, size_t k, size_t l);
// passing tensor back to C code
size_t *get_shape_array();
double *get_data_array();
// easy way to print out the tensor for logging
void print() const;
size_t getTotalSize() const;
// friend is better than member, because it allows
// double + tensor (implicit conversion)
// operator overloads
friend tensor operator+(const tensor &lhs, const tensor &rhs);
friend tensor operator-(const tensor &lhs, const tensor &rhs);
friend tensor operator*(const tensor &lhs, const tensor &rhs);
template <class T>
friend tensor helper(const tensor &lhs, const tensor &rhs);
friend tensor ReLU(const tensor &x);
friend tensor Flatten(const tensor &x);
friend tensor Input2d(const tensor &x);
friend tensor Linear(const tensor &w, const tensor &x, const tensor &b);
friend tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride);
friend tensor Conv2D(const tensor &input_tensor, const tensor &kernel_tensor, const tensor &bias_tensor, int in_channels, int out_channels, int kernel_size);
private:
int dim_;
std::vector<size_t> shape_;
std::vector<double> data_;
}; // class tensor
#endif // TENSOR_H;