diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..eaa6648 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +.PHONY: all libeasynn easynn_test + +all: libeasynn easynn_test + +libeasynn: + g++ -Wall src/*.cpp -fPIC -O -g -shared -o libeasynn.so + +easynn_test: libeasynn + g++ -Wall easynn_test.cpp -g -lm -L. -Wl,-rpath=. -leasynn -o easynn_test \ No newline at end of file diff --git a/easynn.py b/easynn.py new file mode 100755 index 0000000..239d50e --- /dev/null +++ b/easynn.py @@ -0,0 +1,145 @@ +class Expr: + next_id = 0 + + def __init__(self, op, inputs): + self.op = op + self.inputs = inputs + self.id = Expr.next_id + Expr.next_id += 1 + + if not isinstance(op, Op): + raise Exception("Not an operator: %s" % op) + + def __dfs_post(self, ids, visitor): + ids[self.id] = True + for expr in self.inputs: + if expr.id in ids: + continue + expr.__dfs_post(ids, visitor) + visitor(self) + + def statements(self): + lines = [] + self.__dfs_post({}, lambda that: lines.append("%s" % that)) + return "\n".join(lines) + + def __str__(self): + args = ",".join(["t%d" % expr.id for expr in self.inputs]) + return "t%d = %s(%s)" % (self.id, self.op, args) + + def __promote(r): + if isinstance(r, Expr): + return r + else: + return Const(r) + + def __add__(self, r): + return Op("", "Add", 2, {})(self, Expr.__promote(r)) + + def __sub__(self, r): + return Op("", "Sub", 2, {})(self, Expr.__promote(r)) + + def __mul__(self, r): + return Op("", "Mul", 2, {})(self, Expr.__promote(r)) + + def __neg__(self): + return Op("", "Neg", 1, {})(self) + + def compile(self, builder): + self.__dfs_post({}, lambda that: builder.append(that)) + return builder.build() + + def resolve(self, parameters): + self.__dfs_post({}, lambda that: that.op.resolve(parameters)) + return self + + +class Op: + def __init__(self, name, op_type, num_args, parameters): + self.name = name + self.op_type = op_type + self.num_args = num_args + self.parameters = parameters + + def __call__(self, *inputs): + if self.num_args >= 0 and self.num_args != len(inputs): + raise Exception("%s: need %d arguments but found %d" % (self, self.num_args, len(inputs))) + for i, expr in enumerate(inputs): + if not isinstance(expr, Expr): + raise Exception("%s: arg %d is not an expression: %s" % (self, i, expr)) + return Expr(self, inputs) + + def __str__(self): + name = "%s.%s" % (self.name, self.op_type) + if len(self.parameters) == 0: + return name + params = ",".join(["%s=%s" % (k, v.shape if hasattr(v, "shape") else v) for k, v in self.parameters.items()]) + return "%s[%s]" % (name, params) + + def resolve(self, parameters): + if self.name == "": + return + for k, v in parameters.items(): + if k.startswith(self.name+"."): + self.parameters[k[len(self.name)+1:]] = v + + +def Const(c): + if isinstance(c, (int, float)): + c = float(c) + elif hasattr(c, "shape"): + c = c.astype(float) + else: + raise Exception("Const must be float or int or ndarray: %s" % c) + + return Expr(Op("", "Const", 0, { + "value": c + }), []) + + +def Input(n): + return Expr(Op(n, "Input", 0, {}), []) + + +def Input2d(n, h, w, ic): + return Expr(Op(n, "Input2d", 0, { + "height": h, + "width": w, + "in_channels": ic + }), []) + + +def MaxPool2d(k, s): + return Op("", "MaxPool2d", 1, { + "kernel_size": k, + "stride": s + }) + + +def ReLU(): + return Op("", "ReLU", 1, {}) + + +def Flatten(): + return Op("", "Flatten", 1, {}) + + +def Conv2d(n, ic, oc, k, p = 0): + return Op(n, "Conv2d", 1, { + "in_channels": ic, + "out_channels": oc, + "kernel_size": k, + "padding": p + }) + + +def Linear(n, i, o): + return Op(n, "Linear", 1, { + "in_features": i, + "out_features": o + }) + + +def Show(): + return Op("", "Show", 1, {}) + diff --git a/easynn_cpp.py b/easynn_cpp.py new file mode 100755 index 0000000..50bb673 --- /dev/null +++ b/easynn_cpp.py @@ -0,0 +1,130 @@ +import numpy as np +import ctypes +import time + +_libeasynn = ctypes.CDLL("./libeasynn.so") + +_libeasynn.create_program.restype = ctypes.c_void_p + +_libeasynn.append_expression.argtypes = [ + ctypes.c_void_p, + ctypes.c_int, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_void_p, + ctypes.c_int +] + +_libeasynn.add_op_param_double.argtypes = [ + ctypes.c_void_p, + ctypes.c_char_p, + ctypes.c_double +] + +_libeasynn.add_op_param_ndarray.argtypes = [ + ctypes.c_void_p, + ctypes.c_char_p, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p +] + +_libeasynn.build.restype = ctypes.c_void_p +_libeasynn.build.argtypes = [ + ctypes.c_void_p +] + +_libeasynn.add_kwargs_double.argtypes = [ + ctypes.c_void_p, + ctypes.c_char_p, + ctypes.c_double +] + +_libeasynn.add_kwargs_ndarray.argtypes = [ + ctypes.c_void_p, + ctypes.c_char_p, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p +] + +_libeasynn.execute.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_void_p +] + + +def check_and_raise(ret): + if ret != 0: + raise Exception("libeasynn error: code %d" % ret) + + +def to_float_or_ndarray(c_dim, c_shape, c_data): + if c_dim.value == 0: + return c_data[0] + shape = [c_shape[k] for k in range(c_dim.value)] + N = 1 + for s in shape: + N *= s + print("shape =", shape, "N =", N) + flat = np.array([c_data[i] for i in range(N)]) + return flat.reshape(shape) + + +class Eval: + def __init__(self, evaluation): + self.evaluation = evaluation + + def __call__(self, **kwargs): + start = time.time() + for k, v in kwargs.items(): + if isinstance(v, (int, float)): + _libeasynn.add_kwargs_double( + self.evaluation, k.encode(), ctypes.c_double(v)) + elif hasattr(v, "shape"): + v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A']) + _libeasynn.add_kwargs_ndarray( + self.evaluation, k.encode(), v.ndim, v.ctypes.shape, + v.ctypes.data) + else: + raise Exception("%s: kwargs must be float or int or ndarray" % k) + c_dim = ctypes.c_int() + c_shape = ctypes.POINTER(ctypes.c_size_t)() + c_data = ctypes.POINTER(ctypes.c_double)() + check_and_raise(_libeasynn.execute(self.evaluation, + ctypes.byref(c_dim), ctypes.byref(c_shape), ctypes.byref(c_data))) + res = to_float_or_ndarray(c_dim, c_shape, c_data) + t = time.time()-start + if t > 0.1: + print("c++ time %.2f" % t) + return res + + +class Builder: + def __init__(self): + self.program = _libeasynn.create_program() + + def append(self, expr): + inputs = [ex.id for ex in expr.inputs] + num_inputs = len(inputs) + op = expr.op + _libeasynn.append_expression( + self.program, expr.id, + op.name.encode(), op.op_type.encode(), + (ctypes.c_int*num_inputs)(*inputs), num_inputs) + for k, v in op.parameters.items(): + if isinstance(v, (int, float)): + check_and_raise(_libeasynn.add_op_param_double( + self.program, k.encode(), ctypes.c_double(v))) + elif hasattr(v, "shape"): + v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A']) + check_and_raise(_libeasynn.add_op_param_ndarray( + self.program, k.encode(), v.ndim, v.ctypes.shape, + v.ctypes.data)) + else: + raise Exception("%s: op params must be float or int or ndarray: %s" % (expr, k)) + + def build(self): + return Eval(_libeasynn.build(self.program)) diff --git a/easynn_golden.py b/easynn_golden.py new file mode 100755 index 0000000..cf620ef --- /dev/null +++ b/easynn_golden.py @@ -0,0 +1,189 @@ +import numpy as np +import time + + +def Input(expr, op, args, **kwargs): + if op.name in kwargs: + c = kwargs[op.name] + if isinstance(c, (int, float)): + return float(c) + elif hasattr(c, "shape"): + return c.astype(float) + else: + raise Exception("%s: Input must be float or int or ndarray: %s" % (expr, c)) + else: + raise Exception("%s: missing input" % expr) + + +def Input2d(expr, op, args, **kwargs): + if not op.name in kwargs: + raise Exception("%s: missing input" % expr) + imgs = kwargs[op.name] + if not hasattr(imgs, "shape"): + raise Exception("%s: Input must be ndarray: %s" % (expr, imgs)) + if any([ + len(imgs.shape) != 4, + imgs.shape[1] != op.parameters["height"], + imgs.shape[2] != op.parameters["width"], + imgs.shape[3] != op.parameters["in_channels"]]): + raise Exception("%s: Invalid input size: %s" % (expr, imgs.shape)) + # NHWC => NCHW + return imgs.astype(float).transpose(0,3,1,2) + + +def Const(expr, op, args, **kwargs): + return op.parameters["value"] + + +def Neg(expr, op, args, **kwargs): + return -args[0] + + +def Add(expr, op, args, **kwargs): + a = args[0] + b = args[1] + if not hasattr(a, "shape") and not hasattr(b, "shape"): + return a+b + elif hasattr(a, "shape") and hasattr(b, "shape"): + if a.shape != b.shape: + raise Exception("%s: size mismatch: %s+%s" % (expr, a.shape, b.shape)) + return a+b + else: + raise Exception("%s: cannot mix scalar and ndarray" % expr) + + +def Sub(expr, op, args, **kwargs): + a = args[0] + b = args[1] + if not hasattr(a, "shape") and not hasattr(b, "shape"): + return a-b + elif hasattr(a, "shape") and hasattr(b, "shape"): + if a.shape != b.shape: + raise Exception("%s: size mismatch: %s-%s" % (expr, a.shape, b.shape)) + return a-b + else: + raise Exception("%s: cannot mix scalar and ndarray" % expr) + + +def Mul(expr, op, args, **kwargs): + a = args[0] + b = args[1] + if not hasattr(a, "shape") or not hasattr(b, "shape"): + return a*b + else: + if len(a.shape) != 2 or len(b.shape) != 2: + raise Exception("%s: matmul only: %s*%s" % (expr, a.shape, b.shape)) + if a.shape[1] != b.shape[0]: + raise Exception("%s: size mismatch: %s*%s" % (expr, a.shape, b.shape)) + return np.matmul(a, b) + + +def Flatten(expr, op, args, **kwargs): + x = args[0] + if not hasattr(x, "shape"): + raise Exception("%s: ndarray only: %s" % (expr, imgs)) + return x.reshape((x.shape[0], -1)) + + +def ReLU(expr, op, args, **kwargs): + x = args[0] + return x*(x > 0) + + +def Linear(expr, op, args, **kwargs): + x = args[0] + if not hasattr(x, "shape"): + raise Exception("%s: ndarray only: %s" % (expr, x)) + if "weight" not in op.parameters or "bias" not in op.parameters: + raise Exception("%s: missing weight or bias" % expr) + weight = op.parameters["weight"] + bias = op.parameters["bias"] + if not hasattr(weight, "shape") or not hasattr(bias, "shape"): + raise Exception("%s: ndarray only for weight or bias" % expr) + in_features = op.parameters["in_features"] + out_features = op.parameters["out_features"] + if any([ + len(x.shape) != 2, + x.shape[1] != in_features, + weight.shape != (out_features, in_features), + bias.shape != (out_features,)]): + raise Exception("%s: size mismatch: %s*%s+%s" % (expr, weight.shape, x.shape, bias.shape)) + return np.einsum("ni,oi->no", x, weight, + optimize = "optimal")+bias.reshape((1, out_features)) + + +def MaxPool2d(expr, op, args, **kwargs): + x = args[0] + if not hasattr(x, "shape"): + raise Exception("%s: ndarray only: %s" % (expr, x)) + kernel_size = op.parameters["kernel_size"] + stride = op.parameters["stride"] + if kernel_size != stride: + raise Exception("%s: kernel_size != stride" % expr) + if any([ + len(x.shape) != 4, + x.shape[2]%stride != 0, + x.shape[3]%stride != 0]): + raise Exception("%s: size mismatch: %s" % (expr, x.shape)) + new_shape = (x.shape[0], x.shape[1], x.shape[2]//stride, stride, x.shape[3]//stride, stride) + return np.nanmax(x.reshape(new_shape), axis = (3,5)) + + +def Conv2d(expr, op, args, **kwargs): + x = args[0] + if not hasattr(x, "shape"): + raise Exception("%s: ndarray only: %s" % (expr, x)) + if "weight" not in op.parameters or "bias" not in op.parameters: + raise Exception("%s: missing weight or bias" % expr) + weight = op.parameters["weight"] + bias = op.parameters["bias"] + in_channels = op.parameters["in_channels"] + out_channels = op.parameters["out_channels"] + kernel_size = op.parameters["kernel_size"] + padding = op.parameters["padding"] + if any([ + len(x.shape) != 4, + x.shape[1] != in_channels, + weight.shape != (out_channels, in_channels, kernel_size, kernel_size), + bias.shape != (out_channels,)]): + raise Exception("%s: size mismatch: %s" % (expr, x.shape)) + if padding != 0: + tmp = np.zeros((x.shape[0], x.shape[1], x.shape[2]+2*padding, x.shape[3]+2*padding)) + tmp[:, :, 1:-2, 1:-2] = x + x = tmp + conv_shape = x.shape[:2]+(x.shape[2]+1-kernel_size, x.shape[3]+1-kernel_size, kernel_size, kernel_size) + conv_strides = x.strides+x.strides[2:] + conv = np.lib.stride_tricks.as_strided(x, shape = conv_shape, strides = conv_strides, writeable = False) + return np.einsum("nihwyx,oiyx->nohw", conv, weight, + optimize = "optimal")+bias.reshape((1, out_channels, 1, 1)) + + +class Eval: + def __init__(self, program): + self.program = program + + def __call__(self, **kwargs): + start = time.time() + values = {} + for expr in self.program: + args = [values[ex.id] for ex in expr.inputs] + if expr.op.op_type not in globals(): + raise Exception("%s: not implemented" % expr) + values[expr.id] = globals()[expr.op.op_type](expr, expr.op, args, **kwargs) + #print("numpy op", expr.op.op_type, "time %.2f" % (time.time()-start)) + res = values[self.program[-1].id] + t = time.time()-start + if t > 0.1: + print("numpy time %.2f" % t) + return res + + +class Builder: + def __init__(self): + self.program = [] + + def append(self, expr): + self.program.append(expr) + + def build(self): + return Eval(self.program) diff --git a/easynn_test.cpp b/easynn_test.cpp new file mode 100755 index 0000000..db9a061 --- /dev/null +++ b/easynn_test.cpp @@ -0,0 +1,47 @@ +/** + * A simple test program helps you to debug your easynn implementation. + */ + +#include +#include "src/libeasynn.h" + +int main() +{ + program *prog = create_program(); + + int inputs0[] = {}; + append_expression(prog, 0, "a", "Input", inputs0, 0); + + //int inputs1[] = {0, 0}; + //append_expression(prog, 1, "", "Add", inputs1, 2); + + //int inputs2[] = {2, 2}; + //append_expression(prog, 2, "", "Mul", inputs2, 2); + + //int inputs3[] = {10, 5}; + //append_expression(prog, 3, "", "Sub", inputs3, 2); + + + evaluation *eval = build(prog); + add_kwargs_double(eval, "a", 5); + + + // tensor tests + // it is easier for me + // to just compile the library and run the python grading tests than to use this program + int dim = 0; + size_t *shape = nullptr; + double *data = nullptr; + if (execute(eval, &dim, &shape, &data) != 0) + { + printf("evaluation fails\n"); + return -1; + } + + if (dim == 0) + printf("res = %f\n", data[0]); + else + printf("result as tensor is not supported yet\n"); + + return 0; +} diff --git a/ece449.code-workspace b/ece449.code-workspace new file mode 100755 index 0000000..aeba41b --- /dev/null +++ b/ece449.code-workspace @@ -0,0 +1,12 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "files.associations": { + "array": "cpp" + } + } +} \ No newline at end of file diff --git a/grade.py b/grade.py new file mode 100755 index 0000000..911939f --- /dev/null +++ b/grade.py @@ -0,0 +1,30 @@ +import traceback +import multiprocessing + +def grade_all(name, begin, end, funcs): + grade = 0 + for q in range(begin, end): + def func(): + exit(0 if funcs["grade_Q%d" % q]() else 1) + try: + p = multiprocessing.Process(target = func) + p.start() + p.join(300) + if p.is_alive(): + p.terminate() + print("============Q%d timeout!============\n" % q, flush = True) + elif p.exitcode == 0: + print("============Q%d passed!============\n" % q, flush = True) + grade += 1 + else: + print("============Q%d failed!============\n" % q, flush = True) + except Exception as e: + print("============Q%d failed!============" % q, flush = True) + print(traceback.format_exc()) + print("Grading result: %d functions passed" % grade) + + print("*************************************************************") + print("* You may receive 0 points unless your code tests correctly *") + print("* Please commit and push your code at least daily. *") + print("* DO NOT CHANGE any grade_px.py for grading *") + print("*************************************************************") diff --git a/grade_p1.py b/grade_p1.py new file mode 100755 index 0000000..d4c0a20 --- /dev/null +++ b/grade_p1.py @@ -0,0 +1,104 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import grade +import prj01 + + +# create +def grade_Q1(): + a = prj01.Q1() + if not hasattr(a, "shape"): + return False + if a.shape != (10, 5): + return False + return all([a[i, j] == i+j for i in range(10) for j in range(5)]) + + +# add +def grade_Q2(): + a = np.random.rand(100, 10) + b = np.random.rand(100, 10) + c = prj01.Q2(a, b) + if not hasattr(c, "shape"): + return False + if c.shape != (100, 10): + return False + return all([abs(c[i, j]-a[i, j]-b[i, j]) < 1e-9 for i in range(100) for j in range(10)]) + + +# mul +def grade_Q3(): + a = np.array([[i for i in range(10)]]) + b = a.transpose() + c = prj01.Q3(a, b) + d = prj01.Q3(b, a) + if not hasattr(c, "shape") or not hasattr(d, "shape"): + return False + if c.shape != (1, 1) or d.shape != (10, 10): + return False + return c[0, 0] == 285 and all([d[i, j] == i*j for i in range(10) for j in range(10)]) + + +# max +def grade_Q4(): + a = np.random.rand(100, 10) + b = prj01.Q4(a) + if len(b) != 100: + return False + return all([a[i, b[i]] >= a[i, j] for i in range(100) for j in range(10)]) + + +# solve +def grade_Q5(): + A = -np.random.rand(100, 100)+np.diag([100]*100) + b = np.random.rand(100, 1) + x = prj01.Q5(A, b) + Ax = prj01.Q3(A, x) + return np.allclose(Ax, b) + + +# a+b +def grade_Q6(): + a = np.random.rand(100, 10) + b = np.random.rand(100, 10) + c = prj01.Q6().compile(golden.Builder())(a = a, b = b) + return np.allclose(c, prj01.Q2(a, b)) + + +# a+b*c +def grade_Q7(): + a = np.random.rand(100, 50) + b = np.random.rand(100, 10) + c = np.random.rand(10, 50) + d = prj01.Q7().compile(golden.Builder())(a = a, b = b, c = c) + return np.allclose(d, prj01.Q2(a, prj01.Q3(b, c))) + + +# Ax+b +def grade_Q8(): + A = np.random.rand(100, 100) + x = np.random.rand(100, 1) + b = np.random.rand(100, 1) + y = prj01.Q8(A, b).compile(golden.Builder())(x = x) + return np.allclose(y, prj01.Q2(prj01.Q3(A, x), b)) + + +# x**n +def grade_Q9(): + x = np.random.rand(100, 100) + y = prj01.Q9(8).compile(golden.Builder())(x = x) + x2 = prj01.Q3(x, x) + x4 = prj01.Q3(x2, x2) + x8 = prj01.Q3(x4, x4) + return np.allclose(y, x8) + + +# |x| +def grade_Q10(): + x = np.random.randn(100, 100) + y = prj01.Q10().compile(golden.Builder())(x = x) + return np.allclose(y, np.abs(x)) + + +grade.grade_all("p1", 1, 11, globals()) diff --git a/grade_p2.py b/grade_p2.py new file mode 100755 index 0000000..f7ea4f2 --- /dev/null +++ b/grade_p2.py @@ -0,0 +1,89 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import easynn_cpp as cpp +import grade + + +def random_kwargs(kwargs): + return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()} + + +def is_same(p, n, **kwargs): + e0 = p.compile(golden.Builder()) + e1 = p.compile(cpp.Builder()) + nkwargs = [random_kwargs(kwargs) for i in range(n)] + return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)]) + + +def grade_Q1(): + x = nn.Input("x") + return is_same(x, 1, x = None) + + +def grade_Q2(): + c = nn.Const(np.random.random()) + return is_same(c, 1) + + +def grade_Q3(): + x = nn.Input("x") + c = nn.Const(np.random.random()) + y = x+c + return is_same(y, 1, x = None) + + +def grade_Q4(): + x = nn.Input("x") + c = nn.Const(np.random.random()) + y = x*c + return is_same(y, 1, x = None) + + +def grade_Q5(): + x = nn.Input("x") + y = nn.Input("y") + z = x+y + return is_same(z, 1, x = None, y = None) + + +def grade_Q6(): + x = nn.Input("x") + y = nn.Input("y") + z = x*y + return is_same(z, 1, x = None, y = None) + + +def grade_Q7(): + x = nn.Input("x") + y = nn.Input("y") + z = nn.Input("z") + r = x+y*z + return is_same(r, 1, x = None, y = None, z = None) + + +def grade_Q8(): + x = nn.Input("x") + y = nn.Input("y") + z = nn.Input("z") + r = (x-y)*z + return is_same(r, 1, x = None, y = None, z = None) + + +def grade_Q9(): + x = nn.Input("x") + y = nn.Input("y") + c = nn.Const(np.random.random()) + z = x*y-x*c-y*c+c*c + return is_same(z, 10, x = None, y = None) + + +def grade_Q10(): + x = nn.Input("x") + y = nn.Input("y") + c = nn.Const(np.random.random()) + z = x*y-x*c-y*c+c*c + return is_same(z, 10, x = None, y = None) + + +grade.grade_all("p2", 1, 11, globals()) diff --git a/grade_p3.py b/grade_p3.py new file mode 100755 index 0000000..2ae9d35 --- /dev/null +++ b/grade_p3.py @@ -0,0 +1,99 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import easynn_cpp as cpp +import grade + + +def random_kwargs(kwargs): + return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()} + + +def is_same(p, n, **kwargs): + e0 = p.compile(golden.Builder()) + e1 = p.compile(cpp.Builder()) + nkwargs = [random_kwargs(kwargs) for i in range(n)] + return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)]) + + +def grade_Q1(): + x = nn.Input("x") + return is_same(x, 1, x = (9,)) and is_same(x, 1, x = (9, 9)) + + +def grade_Q2(): + c1 = nn.Const(np.random.random((10,))) + c2 = nn.Const(np.random.random((10, 10))) + return is_same(c1, 1) and is_same(c2, 1) + + +def grade_Q3(): + x = nn.Input("x") + y = nn.Input("y") + z = x+y + return all([ + is_same(z, 1, x = (11,), y = (11,)), + is_same(z, 1, x = (11, 12), y = (11, 12)), + is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)), + is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))]) + + +def grade_Q4(): + x = nn.Input("x") + y = nn.Input("y") + z = x-y + return all([ + is_same(z, 1, x = (11,), y = (11,)), + is_same(z, 1, x = (11, 12), y = (11, 12)), + is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)), + is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))]) + + +def grade_Q5(): + x = nn.Input("x") + y = nn.Input("y") + z = x*y + return is_same(z, 1, x = (11, 12), y = (12, 13)) + + +def grade_Q6(): + x = nn.Input("x") + y = nn.Input("y") + z = x*y + return is_same(z, 1, x = None, y = (12, 13)) and is_same(z, 1, x = (11, 12), y = None) + + +def grade_Q7(): + x = nn.Input("x") + y = nn.Input("y") + z = nn.Input("z") + r = x+y*z + return is_same(r, 1, x = (11, 13), y = (11, 12), z = (12, 13)) + + +def grade_Q8(): + x = nn.Input("x") + y = nn.Input("y") + z = nn.Input("z") + r = (x-y)*z + return is_same(r, 1, x = (11, 12), y = (11, 12), z = (12, 13)) + + +def grade_Q9(): + x = nn.Input("x") + y = nn.Input("y") + z = nn.Input("z") + r = x*y*z + return is_same(r, 1, x = (11, 12), y = (12, 13), z = (13, 14)) + + +def grade_Q10(): + x = nn.Input("x") + y = nn.Input("y") + c1 = nn.Const(np.random.random((11, 12))) + c2 = nn.Const(np.random.random((12, 13))) + z = x*y-x*c2-c1*y+c1*c2 + return is_same(z, 1, x = (11, 12), y = (12, 13)) + + +grade.grade_all("p3", 1, 11, globals()) diff --git a/grade_p4.py b/grade_p4.py new file mode 100755 index 0000000..90f7a4e --- /dev/null +++ b/grade_p4.py @@ -0,0 +1,151 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import easynn_cpp as cpp +import grade + + +def random_kwargs(kwargs): + return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()} + + +def is_same(p, n, **kwargs): + e0 = p.compile(golden.Builder()) + e1 = p.compile(cpp.Builder()) + nkwargs = [random_kwargs(kwargs) for i in range(n)] + return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)]) + + +def grade_Q1(): + relu = nn.ReLU() + x = relu(nn.Input("x")) + return is_same(x, 1, x = (10, 11, 12, 13)) + + +def grade_Q2(): + flatten = nn.Flatten() + x = flatten(nn.Input("x")) + return is_same(x, 1, x = (10, 11, 12, 13)) + + +def grade_Q3(): + x = nn.Input2d("images", 10, 11, 3) + return is_same(x, 1, images = (50, 10, 11, 3)) + + +def grade_Q4(): + f = nn.Linear("f", 100, 10) + x = f(nn.Input("x")) + x.resolve({ + "f.weight": np.random.random((10, 100)), + "f.bias": np.random.random((10,))}) + return is_same(x, 1, x = (50, 100)) + + +def grade_Q5(): + pool = nn.MaxPool2d(3, 3) + x = pool(nn.Input2d("x", 12, 15, 3)) + return is_same(x, 1, x = (10, 12, 15, 3)) + + +def grade_Q6(): + c = nn.Conv2d("c", 3, 16, 5) + x = c(nn.Input2d("x", 15, 20, 3)) + x.resolve({ + "c.weight": np.random.random((16, 3, 5, 5)), + "c.bias": np.random.random((16,)) + }) + return is_same(x, 1, x = (10, 15, 20, 3)) + + +def grade_Q7(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 100) + f2 = nn.Linear("f2", 100, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("msimple_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"][:20] + + infer0 = x.compile(golden.Builder()) + infer1 = x.compile(cpp.Builder()) + logit0 = infer0(images = images) + logit1 = infer1(images = images) + return np.allclose(logit0, logit1) + + +def grade_Q8(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 100) + f2 = nn.Linear("f2", 100, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("msimple_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"][:1000] + + infer0 = x.compile(golden.Builder()) + infer1 = x.compile(cpp.Builder()) + label0 = infer0(images = images).argmax(axis = 1) + label1 = infer1(images = images).argmax(axis = 1) + return np.allclose(label0, label1) + + +def grade_Q9(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26 + c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24 + x = pool(relu(c2(relu(c1(x))))) # 24->12 + c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10 + c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8 + x = pool(relu(c4(relu(c3(x))))) # 8->4 + f = nn.Linear("f", 16*4*4, 10) + x = f(flatten(x)) + + x.resolve(np.load("mnist_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"][:20] + + infer0 = x.compile(golden.Builder()) + infer1 = x.compile(cpp.Builder()) + logit0 = infer0(images = images) + logit1 = infer1(images = images) + return np.allclose(logit0, logit1) + + +def grade_Q10(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26 + c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24 + x = pool(relu(c2(relu(c1(x))))) # 24->12 + c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10 + c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8 + x = pool(relu(c4(relu(c3(x))))) # 8->4 + f = nn.Linear("f", 16*4*4, 10) + x = f(flatten(x)) + + x.resolve(np.load("mnist_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"][:1000] + + infer0 = x.compile(golden.Builder()) + infer1 = x.compile(cpp.Builder()) + label0 = infer0(images = images).argmax(axis = 1) + label1 = infer1(images = images).argmax(axis = 1) + return np.allclose(label0, label1) + + +grade.grade_all("p4", 1, 11, globals()) diff --git a/grade_p5.py b/grade_p5.py new file mode 100755 index 0000000..eecc8a0 --- /dev/null +++ b/grade_p5.py @@ -0,0 +1,79 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import grade + +def grade_Q1(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 32) + f2 = nn.Linear("f2", 32, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("p5_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + print(count) + return count > 8500 + +def grade_Q2(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 32) + f2 = nn.Linear("f2", 32, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("p5_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9000 + +def grade_Q3(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 32) + f2 = nn.Linear("f2", 32, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("p5_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9300 + +def grade_Q4(): + relu = nn.ReLU() + flatten = nn.Flatten() + f1 = nn.Linear("f1", 28*28, 32) + f2 = nn.Linear("f2", 32, 10) + x = nn.Input2d("images", 28, 28, 1) + x = flatten(x) + x = f2(relu(f1(x))) + x.resolve(np.load("p5_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9500 + +grade.grade_all("p5", 1, 5, globals()) diff --git a/grade_p6.py b/grade_p6.py new file mode 100755 index 0000000..3b8e2b5 --- /dev/null +++ b/grade_p6.py @@ -0,0 +1,91 @@ +import numpy as np +import easynn as nn +import easynn_golden as golden +import grade + +def grade_Q1(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24 + x = pool(relu(c1(x))) # 24->12 + c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8 + x = pool(relu(c2(x))) # 8->4 + f = nn.Linear("f", 8*4*4, 10) + x = f(flatten(x)) + x.resolve(np.load("p6_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + print(count) + return count > 8500 + +def grade_Q2(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24 + x = pool(relu(c1(x))) # 24->12 + c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8 + x = pool(relu(c2(x))) # 8->4 + f = nn.Linear("f", 8*4*4, 10) + x = f(flatten(x)) + x.resolve(np.load("p6_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9000 + +def grade_Q3(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24 + x = pool(relu(c1(x))) # 24->12 + c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8 + x = pool(relu(c2(x))) # 8->4 + f = nn.Linear("f", 8*4*4, 10) + x = f(flatten(x)) + x.resolve(np.load("p6_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9300 + +def grade_Q4(): + pool = nn.MaxPool2d(2, 2) + relu = nn.ReLU() + flatten = nn.Flatten() + x = nn.Input2d("images", 28, 28, 1) + c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24 + x = pool(relu(c1(x))) # 24->12 + c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8 + x = pool(relu(c2(x))) # 8->4 + f = nn.Linear("f", 8*4*4, 10) + x = f(flatten(x)) + x.resolve(np.load("p6_params.npz")) + mnist_test = np.load("mnist_test.npz") + images = mnist_test["images"] + labels = mnist_test["labels"] + + infer = x.compile(golden.Builder()) + pred_labels = infer(images = images).argmax(axis = 1) + count = sum(labels == pred_labels) + return count > 9500 + +grade.grade_all("p6", 1, 5, globals()) diff --git a/mnist_params.npz b/mnist_params.npz new file mode 100755 index 0000000..56bfc1f Binary files /dev/null and b/mnist_params.npz differ diff --git a/mnist_test.npz b/mnist_test.npz new file mode 100755 index 0000000..cd0e111 Binary files /dev/null and b/mnist_test.npz differ diff --git a/mnist_train.npz b/mnist_train.npz new file mode 100755 index 0000000..9067929 Binary files /dev/null and b/mnist_train.npz differ diff --git a/msimple_params.npz b/msimple_params.npz new file mode 100755 index 0000000..9860fe8 Binary files /dev/null and b/msimple_params.npz differ diff --git a/p5_params.npz b/p5_params.npz new file mode 100755 index 0000000..17814a2 Binary files /dev/null and b/p5_params.npz differ diff --git a/prj01.py b/prj01.py new file mode 100755 index 0000000..8df0ca5 --- /dev/null +++ b/prj01.py @@ -0,0 +1,76 @@ +import numpy as np +import easynn as nn + +# Create a numpy array of 10 rows and 5 columns. +# Set the element at row i and column j to be i+j. +def Q1(): + array = np.zeros((10, 5)) + + for i in range(10): + for j in range(5): + array[i, j] = i+j + + return array + +# Add two numpy arrays together. +def Q2(a, b): + return a + b + +# Multiply two 2D numpy arrays using matrix multiplication. +def Q3(a, b): + return np.matmul(a, b) + +# For each row of a 2D numpy array, find the column index +# with the maximum element. Return all these column indices. +def Q4(a): + return [ np.argmax(row) for row in a ] + +# Solve Ax = b. +def Q5(A, b): + return np.linalg.solve(A, b) + +# Return an EasyNN expression for a+b. +def Q6(): + a = nn.Input('a') + b = nn.Input('b') + c = a+b + return c + +# Return an EasyNN expression for a+b*c. +def Q7(): + a = nn.Input('a') + b = nn.Input('b') + c = nn.Input('c') + d = a+b*c + return d + +# Given A and b, return an EasyNN expression for Ax+b. +def Q8(A, b): + A_const = nn.Const(A) + b_const = nn.Const(b) + x_input = nn.Input("x") + + expr = ((A_const*x_input) + b_const) + + return expr + + +# Given n, return an EasyNN expression for x**n. +def Q9(n): + x = nn.Input("x") + n_const = nn.Const(n) + + expr = x + for _ in range(n - 1): + expr = expr * x + + return expr + +# the element-wise absolute value |x|. +def Q10(): + x = nn.Input("x") + zero = nn.Const(0) + max = nn.ReLU()(x) + min = x - max + abs = max - min + return abs diff --git a/prj05.py b/prj05.py new file mode 100755 index 0000000..cd2b6c9 --- /dev/null +++ b/prj05.py @@ -0,0 +1,274 @@ +import numpy as np +import time +import pdb + +from matplotlib import pyplot + +# save theta to p5_params.npz that can be used by easynn +def save_theta(theta): + f1_W, f1_b, f2_W, f2_b = theta + + np.savez_compressed("p5_params.npz", **{ + "f1.weight": f1_W, + "f1.bias": f1_b, + "f2.weight": f2_W, + "f2.bias": f2_b + }) + + +# initialize theta using uniform distribution [-bound, bound] +# return theta as (f1_W, f1_b, f2_W, f2_b) +def initialize_theta(bound): + f1_W = np.random.uniform(-bound, bound, (32, 784)) + f1_b = np.random.uniform(-bound, bound, 32) + f2_W = np.random.uniform(-bound, bound, (10, 32)) + f2_b = np.random.uniform(-bound, bound, 10) + return (f1_W, f1_b, f2_W, f2_b) + + +# forward: +# x = Flatten(images) +# g = Linear_f1(x) +# h = ReLU(g) +# z = Linear_f2(h) +# return (z, h, g, x) +def forward(images, theta): + # number of samples + N = images.shape[0] + + # unpack theta into f1 and f2 + f1_W, f1_b, f2_W, f2_b = theta + + # x = Flatten(images) + x = images.astype(float).transpose(0,3,1,2).reshape((N, -1)) + + # g = Linear_f1(x) + g = np.zeros((N, f1_b.shape[0])) + for i in range(N): + g[i, :] = np.matmul(f1_W, x[i])+f1_b + + # h = ReLU(g) + h = g*(g > 0) + + # z = Linear_f2(h) + z = np.zeros((N, f2_b.shape[0])) + for i in range(N): + z[i, :] = np.matmul(f2_W, h[i])+f2_b + + return (z, h, g, x) + + +# backprop: +# J = cross entropy between labels and softmax(z) +# return nabla_J +def backprop(labels, theta, z, h, g, x): + # number of samples + N = labels.shape[0] + + # unpack theta into f1 and f2 + f1_W, f1_b, f2_W, f2_b = theta + + # nabla_J consists of partial J to partial f1_W, f1_b, f2_W, f2_b + p_f1_W = np.zeros(f1_W.shape) + p_f1_b = np.zeros(f1_b.shape) + p_f2_W = np.zeros(f2_W.shape) + p_f2_b = np.zeros(f2_b.shape) + + for i in range(N): + # compute the contribution to nabla_J for sample i + + # cross entropy and softmax + # compute partial J to partial z[i] + # scale by 1/N for averaging + expz = np.exp(z[i]-max(z[i])) + p_z = expz/sum(expz)/N + p_z[labels[i]] -= 1/N + + # z = Linear_f2(h) + # compute partial J to partial h[i] + # accumulate partial J to partial f2_W, f2_b + p_h = np.dot(f2_W.T, p_z) + p_f2_W += np.outer(p_z, h[i]) + p_f2_b += p_z + + # h = ReLU(g) + # compute partial J to partial g[i] + p_g = p_h * (g[i] > 0) + + + # g = Linear_f1(x) + # accumulate partial J to partial f1_W, f1_b + p_f1_W += np.outer(p_g, x[i]) + p_f1_b += p_g + + return (p_f1_W, p_f1_b, p_f2_W, p_f2_b) + + +# apply SGD to update theta by nabla_J and the learning rate epsilon +# return updated theta +def update_theta(theta, nabla_J, epsilon): + # ToDo: modify code below as needed + #updated_theta = theta + #return updated_theta + f1_W, f1_b, f2_W, f2_b = theta + p_f1_W, p_f1_b, p_f2_W, p_f2_b = nabla_J + + # update the weights and biases for the first layer (f1) + f1_W_updated = f1_W - epsilon * p_f1_W + f1_b_updated = f1_b - epsilon * p_f1_b + + # update the weights and biases for the second layer (f2) + f2_W_updated = f2_W - epsilon * p_f2_W + f2_b_updated = f2_b - epsilon * p_f2_b + + return (f1_W_updated, f1_b_updated, f2_W_updated, f2_b_updated) + +def print_training_hyperparams_for_session(epsilon, batch_size, bound): + print("Starting training session with 10 epochs:") + print("") + print("Hyperparameters:") + print(f"epsilon: {epsilon}") + print(f"bound: {bound}") + print(f"batch_size: {batch_size}") + print("") + print("Results:") + +def plot_epoch(epochs, accuracies, epsilon, batch_size, bound): + pyplot.figure(figsize=(10, 6)) + pyplot.plot(epochs, accuracies, label=f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}") + pyplot.xlabel('Epoch') + pyplot.ylabel('Accuracy') + pyplot.title('Training Accuracy over Epochs') + pyplot.legend() + pyplot.grid(True) + pyplot.show() + +def plot_all_epochs(training_results): + pyplot.figure(figsize=(12, 8)) + + for epochs, accuracies, epsilon, batch_size, bound in training_results: + label = f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}" + pyplot.plot(epochs, accuracies, label=label) + + pyplot.xlabel('Epoch') + pyplot.ylabel('Accuracy') + pyplot.title('Training Accuracy over Epochs for Different Hyperparameters') + pyplot.legend() + pyplot.grid(True) + pyplot.show() + +def plot_table(training_results): + # Setting up the data for the table + cell_text = [] + columns = ['Epoch', 'Accuracy', 'Epsilon', 'Batch Size', 'Bound'] + for result in training_results: + epochs, accuracies, epsilon, batch_size, bound = result + for epoch, accuracy in zip(epochs, accuracies): + cell_text.append([epoch, f"{accuracy:.3f}", epsilon, batch_size, bound]) + + # Determine the figure size needed for the table + figsize = (10, len(cell_text) * 0.2) + fig, ax = pyplot.subplots(figsize=figsize) + ax.axis('tight') + ax.axis('off') + + # Create the table + table = ax.table(cellText=cell_text, colLabels=columns, loc='center', cellLoc='center') + + # Adjust table scale + table.auto_set_font_size(False) + table.set_fontsize(8) + table.auto_set_column_width(col=list(range(len(columns)))) + + pyplot.show() + + +def start_training(epsilon, batch_size, bound, mnist_train): + + # ToDo: set numpy random seed to the last 8 digits of your CWID + np.random.seed(20497299) + + validation_images = mnist_train["images"][:1000] + validation_labels = mnist_train["labels"][:1000] + training_images = mnist_train["images"][1000:] + training_labels = mnist_train["labels"][1000:] + + # hyperparameters + # we can experiment with these values to see if increasing or decreasing + # these values can influence our accuracy + # default values + #bound = 1 # initial weight range + #epsilon = 0.00001 # learning rate + + #print_training_hyperparams_for_session(epsilon, batch_size, bound) + # start training + + accuracies = [] + epochs = [] + start = time.time() + theta = initialize_theta(bound) + batches = training_images.shape[0]//batch_size + for epoch in range(10): + indices = np.arange(training_images.shape[0]) + np.random.shuffle(indices) + for i in range(batches): + batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]] + batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]] + + z, h, g, x = forward(batch_images, theta) + nabla_J = backprop(batch_labels, theta, z, h, g, x) + theta = update_theta(theta, nabla_J, epsilon) + + # check accuracy using validation examples + z, _, _, _ = forward(validation_images, theta) + pred_labels = z.argmax(axis = 1) + accuracy = sum(pred_labels == validation_labels) / validation_images.shape[0] + accuracies.append(accuracy) + epochs.append(epoch) + #count = sum(pred_labels == validation_labels) + print("epoch %d, accuracy %.3f, time %.2f" % ( + epoch, accuracy, time.time()-start)) + + #plot_epoch(epochs, accuracies, epsilon, batch_size, bound) + + # save the weights to be submitted + save_theta(theta) + + # return this data so we can plot it with matplotlib + return epochs, accuracies + +def main(): + training_results = [] + mnist_train = np.load("mnist_train.npz") + + training_results = [] + + # load training data once + mnist_train = np.load("mnist_train.npz") + + # we can add to this list if we want to test combinations of hyperparameters + hyperparams = [ + (0.00001, 1, 4), # default params + (0.00001, 0.1, 4), + (0.00001, 0.5, 4), + (0.00001, 0.7, 4), + (0.00001, 0.01, 4), + (0.00001, 0.01, 3), + (0.00001, 0.01, 2), + (0.000013, 0.012, 1), + (0.000013, 0.012002899999999983, 1), + (0.000013, 0.01200591999999996, 1), + ] + + for epsilon, bound, batch_size in hyperparams: + epochs, accuracies = start_training(epsilon, batch_size, bound, mnist_train) + training_results.append((epochs, accuracies, epsilon, batch_size, bound)) + + # uncomment if you would like to see plotted results + #plot_all_epochs(training_results) + + # plot table + #plot_table(training_results[9]) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/prj06.py b/prj06.py new file mode 100755 index 0000000..41c6ee2 --- /dev/null +++ b/prj06.py @@ -0,0 +1,374 @@ +import numpy as np +import time +import random +import matplotlib.pyplot as plt + + +# save theta to p6_params.npz that can be used by easynn +def save_theta(theta): + c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta + + np.savez_compressed("p6_params.npz", **{ + "c1.weight": c1_W, + "c1.bias": c1_b, + "c2.weight": c2_W, + "c2.bias": c2_b, + "f.weight": f_W, + "f.bias": f_b + }) + + +# initialize theta using uniform distribution [-bound, bound] +# return theta as (c1_W, c1_b, c2_W, c2_b, f_W, f_b) +def initialize_theta(bound): + c1_W = np.random.uniform(-bound, bound, (8, 1, 5, 5)) + c1_b = np.random.uniform(-bound, bound, 8) + c2_W = np.random.uniform(-bound, bound, (8, 8, 5, 5)) + c2_b = np.random.uniform(-bound, bound, 8) + f_W = np.random.uniform(-bound, bound, (10, 128)) + f_b = np.random.uniform(-bound, bound, 10) + return (c1_W, c1_b, c2_W, c2_b, f_W, f_b) + + +# return out_nchw +def Conv2d(in_nchw, kernel_W, kernel_b): + OC, IC, KH, KW = kernel_W.shape + N, C, H, W = in_nchw.shape + if C != IC or kernel_b.shape != (OC,): + raise Exception("Conv2d size mismatch: %s @ (%s, %s)" % ( + in_nchw.shape, kernel_W.shape, kernel_b.shape)) + + # view in_nchw as a 6D tensor + shape = (N, IC, H-KH+1, W-KW+1, KH, KW) + strides = in_nchw.strides+in_nchw.strides[2:] + data = np.lib.stride_tricks.as_strided(in_nchw, + shape = shape, strides = strides, writeable = False) + # np.einsum("nihwyx,oiyx->nohw", data, kernel_W) + nhwo = np.tensordot(data, kernel_W, ((1,4,5), (1,2,3))) + return nhwo.transpose(0,3,1,2)+kernel_b.reshape((1, OC, 1, 1)) + +# return p_b for the whole batch +def Conv2d_backprop_b(p_out, in_nchw, kernel_W, kernel_b): + OC, IC, KH, KW = kernel_W.shape + N, C, H, W = in_nchw.shape + if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1): + raise Exception("Conv2d_backprop_b size mismatch: %s = %s @ (%s, %s)" % ( + p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape)) + + return np.einsum("nohw->o", p_out, optimize = "optimal")/N + +# return p_W for the whole batch +def Conv2d_backprop_W(p_out, in_nchw, kernel_W, kernel_b): + OC, IC, KH, KW = kernel_W.shape + N, C, H, W = in_nchw.shape + if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1): + raise Exception("Conv2d_backprop_W size mismatch: %s = %s @ (%s, %s)" % ( + p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape)) + + # view in_nchw as a 6D tensor + shape = (N, IC, KH, KW, H-KH+1, W-KW+1) + strides = in_nchw.strides+in_nchw.strides[2:] + data = np.lib.stride_tricks.as_strided(in_nchw, + shape = shape, strides = strides, writeable = False) + # np.einsum("nohw,niyxhw->oiyx", p_out, data) + return np.tensordot(p_out, data, ((0,2,3), (0,4,5)))/N + +# return p_in for the whole batch +def Conv2d_backprop_in(p_out, in_nchw, kernel_W, kernel_b): + OC, IC, KH, KW = kernel_W.shape + N, C, H, W = in_nchw.shape + if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1): + raise Exception("Conv2d_backprop_in size mismatch: %s = %s @ (%s, %s)" % ( + p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape)) + + # view p_out as a padded 6D tensor + padded = np.zeros((N, OC, H+KH-1, W+KW-1)) + padded[:, :, KH-1:-KH+1, KW-1:-KW+1] = p_out + shape = (N, IC, H, W, KH, KW) + strides = padded.strides+padded.strides[2:] + data = np.lib.stride_tricks.as_strided(padded, + shape = shape, strides = strides, writeable = False) + # np.einsum("nohwyx,oiyx->nihw", data, kernel_W) + nhwi = np.tensordot(data, kernel_W, ((1,4,5), (0,2,3))) + return nhwi.transpose((0,3,1,2)) + + +# return out_nchw +def MaxPool_2by2(in_nchw): + N, C, H, W = in_nchw.shape + shape = (N, C, H//2, 2, W//2, 2) + return np.nanmax(in_nchw.reshape(shape), axis = (3,5)) + +# return p_in for the whole batch +def MaxPool_2by2_backprop(p_out, out_nchw, in_nchw): + p_in = np.zeros(in_nchw.shape) + p_in[:, :, 0::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 0::2]) + p_in[:, :, 0::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 1::2]) + p_in[:, :, 1::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 0::2]) + p_in[:, :, 1::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 1::2]) + return p_in + + +# forward: +# x = NCHW(images) +# cx = Conv2d_c1(x) +# rx = ReLU(cx) +# y = MaxPool(rx) +# cy = Conv2d_c2(hx) +# ry = ReLU(cy) +# g = MaxPool(ry) +# h = Flatten(g) +# z = Linear_f(h) +# return (z, h, g, ry, cy, y, rx, cx, x) +def forward(images, theta): + # number of samples + N = images.shape[0] + + # unpack theta into c1, c2, and f + c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta + + # x = NCHW(images) + x = images.astype(float).transpose(0,3,1,2) + + # cx = Conv2d_c1(x) + cx = Conv2d(x, c1_W, c1_b) + + # rx = ReLU(cx) + rx = cx*(cx > 0) + + # y = MaxPool(rx) + y = MaxPool_2by2(rx) + + # cy = Conv2d_c2(y) + cy = Conv2d(y, c2_W, c2_b) + + # ry = ReLU(cy) + ry = cy*(cy > 0) + + # g = MaxPool(ry) + g = MaxPool_2by2(ry) + + # h = Flatten(g) + h = g.reshape((N, -1)) + + # z = Linear_f(h) + z = np.zeros((N, f_b.shape[0])) + for i in range(N): + z[i, :] = np.matmul(f_W, h[i])+f_b + + return (z, h, g, ry, cy, y, rx, cx, x) + +def backprop(labels, theta, z, h, g, ry, cy, y, rx, cx, x): + # Number of samples + N = labels.shape[0] + + # Unpack theta into c1, c2, and f + c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta + + # Initialize gradients for each parameter + p_c1_W, p_c1_b = np.zeros_like(c1_W), np.zeros_like(c1_b) + p_c2_W, p_c2_b = np.zeros_like(c2_W), np.zeros_like(c2_b) + p_f_W, p_f_b = np.zeros_like(f_W), np.zeros_like(f_b) + # sample-by-sample from z to h + #p_f_W = np.zeros(f_W.shape) + #p_f_b = np.zeros(f_b.shape) + p_h = np.zeros(h.shape) + + # Step 1: Backprop through linear layer + for i in range(N): + # Softmax and cross-entropy gradient + expz = np.exp(z[i] - np.max(z[i])) + softmax = expz / np.sum(expz) + softmax[labels[i]] -= 1 + p_z = softmax / N + + # Backprop through linear layer to compute gradients + p_h[i, :] = np.dot(f_W.T, p_z) + p_f_W += np.outer(p_h[i, :], p_z).T + p_f_b += p_z + + # Average the gradients over the batch + p_f_W /= N + p_f_b /= N + + # Step 2: Backprop through flatten operation + p_g = p_h.reshape(g.shape) + + # Step 3: Backprop through MaxPool layer + p_ry = MaxPool_2by2_backprop(p_g, g, ry) + + # Step 4: Backprop through ReLU layer + p_cy = (ry > 0) * p_ry + + # Step 5: Backprop through second convolutional layer + p_c2_W = Conv2d_backprop_W(p_cy, y, c2_W, c2_b) + p_c2_b = Conv2d_backprop_b(p_cy, y, c2_W, c2_b) + p_y = Conv2d_backprop_in(p_cy, y, c2_W, c2_b) + + # Step 6: Backprop through first MaxPool layer + p_rx = MaxPool_2by2_backprop(p_y, y, rx) + + # Step 7: Backprop through first ReLU layer + p_cx = (rx > 0) * p_rx + + # Step 8: Backprop through first convolutional layer + p_c1_W = Conv2d_backprop_W(p_cx, x, c1_W, c1_b) + p_c1_b = Conv2d_backprop_b(p_cx, x, c1_W, c1_b) + + return (p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b) + +# apply SGD to update theta by nabla_J and the learning rate epsilon +# return updated theta + +def update_theta(theta, nabla_J, epsilon): + # Unpack the current parameters and their corresponding gradients + c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta + p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b = nabla_J + + # Update each parameter by subtracting the gradient times the learning rate + c1_W_updated = c1_W - epsilon * p_c1_W + c1_b_updated = c1_b - epsilon * p_c1_b + c2_W_updated = c2_W - epsilon * p_c2_W + c2_b_updated = c2_b - epsilon * p_c2_b + f_W_updated = f_W - epsilon * p_f_W + f_b_updated = f_b - epsilon * p_f_b + + # Return the updated parameters as a tuple + updated_theta = (c1_W_updated, c1_b_updated, c2_W_updated, c2_b_updated, f_W_updated, f_b_updated) + return updated_theta + +def sample_attempts(accuracies, num_samples=10): + total_attempts = len(accuracies) + if total_attempts <= num_samples: + return accuracies + + # Always include first and last attempts + sampled_indices = [0, total_attempts - 1] + + # Sample additional attempts, excluding the first and last + additional_samples = random.sample(range(1, total_attempts - 1), num_samples - 2) + sampled_indices.extend(additional_samples) + + # Sort the indices to maintain chronological order + sampled_indices.sort() + + # Get the sampled attempts + sampled_accuracies = [accuracies[i] for i in sampled_indices] + return sampled_accuracies + +# hyperparameters +# default params +#bound = 1 # initial weight range +#epsilon = 0.00001 # learning rate +#batch_size = 4 + +# start training +def start_training(mnist_train, bound, epsilon, batch_size): + + print(f"bound: {bound}, epsilon: {epsilon}, batch_size: {batch_size}") + validation_images = mnist_train["images"][:1000] + validation_labels = mnist_train["labels"][:1000] + training_images = mnist_train["images"][1000:21000] + training_labels = mnist_train["labels"][1000:21000] + #training_images = mnist_train["images"][1000:11000] + #training_labels = mnist_train["labels"][1000:11000] + + + start = time.time() + theta = initialize_theta(bound) + batches = training_images.shape[0]//batch_size + + accuracies = [] + + for epoch in range(5): + indices = np.arange(training_images.shape[0]) + np.random.shuffle(indices) + for i in range(batches): + batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]] + batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]] + + z, h, g, ry, cy, y, rx, cx, x = forward(batch_images, theta) + nabla_J = backprop(batch_labels, theta, z, h, g, ry, cy, y, rx, cx, x) + theta = update_theta(theta, nabla_J, epsilon) + + # check accuracy using validation examples + z, _, _, _, _, _, _, _, _ = forward(validation_images, theta) + pred_labels = z.argmax(axis = 1) + count = sum(pred_labels == validation_labels) + accuracy = count/validation_images.shape[0] + accuracies.append(accuracy) + print("epoch %d, accuracy %.3f, time %.2f" % ( + epoch, accuracy, time.time()-start)) + + + # save the weights to be submitted + save_theta(theta) + return accuracies + +def plot_accuracies(accuracies): + plt.figure(figsize=(12, 6)) + + for attempt, acc_list in accuracies: + plt.plot(acc_list, label=f'Attempt {attempt+1}') + + plt.title('Model Accuracy over Epochs for Sampled Attempts') + plt.xlabel('Epoch') + plt.ylabel('Accuracy') + plt.legend() + plt.grid(True) + plt.show() + +def main(): + # list of (attempt, accuracies_for_run) + accuracies = [] + + # ToDo: set numpy random seed to the last 8 digits of your CWID + np.random.seed(20497299) + + # load training data once + mnist_train = np.load("mnist_train.npz") + + max_accuracy = 0.93 # Target accuracy + max_attempts = 999999 # Maximum number of attempts to reach the target accuracy + + # initial starting values + # notes from experimentation: + + # 90 percent accuracy + # attempt: 36 + # increase bound/epsilon *= 1.01 + bound = 0.01 + epsilon = 0.0001 + batch_size = 1 + + # 90 percent accuracy if we start at initial starting values. on 36th attempt + # note: if we start at these below values we do not hit 90. only if we start at 0.01/0.0001 and *= by 1.01 + #bound = 0.014166027560312683 + #epsilon = 0.0001416602756031268 + #batch_size = 1 + + attempt = 0 + while attempt < max_attempts: + print(f"Attempt: {attempt} out of: {max_attempts}") + accuracies_for_run = start_training(mnist_train, bound, epsilon, batch_size) + accuracies.append((attempt, accuracies_for_run)) + + if any(i >= max_accuracy for i in accuracies_for_run): + print(f"Reached Max Accuracy: {max_accuracy}") + print(f"bound: {bound}") + print(f"epsilon: {epsilon}") + print(f"batch_size: {batch_size}") + sampled_accuracies = sample_attempts(accuracies) + plot_accuracies(sampled_accuracies) + break + + bound *= 1.0005 # Increase by 0.05% + epsilon *= 1.0005 # Increase by 0.05% + + #bound *= 1.01 # Increase by 1% + #epsilon *= 1.01 # Increase by 1% + + attempt += 1 + +if __name__ == "__main__": + main() diff --git a/src/eval_add.cpp b/src/eval_add.cpp new file mode 100755 index 0000000..ba1ca61 --- /dev/null +++ b/src/eval_add.cpp @@ -0,0 +1,17 @@ +#include "eval_add.h" + +tensor eval_add::compute(const tensor &a, const tensor &b) { + tensor c = a + b; + return c; +} + +void eval_add::eval(vars_type &variables, const kwargs_type &kwargs) { + assert(inputs_.size() == 2); + auto ita = variables.find(inputs_[0]); + auto itb = variables.find(inputs_[1]); + variables[expr_id_] = compute(ita->second,itb->second); +} + +std::shared_ptr eval_add::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_add.h b/src/eval_add.h new file mode 100755 index 0000000..0657512 --- /dev/null +++ b/src/eval_add.h @@ -0,0 +1,21 @@ +#ifndef EVAL_ADD_H +#define EVAL_ADD_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_add: public eval_op { + tensor compute(const tensor &a, const tensor &b); + std::shared_ptr clone(const expression &expr) override; +public: + eval_add() {} + eval_add(const expression &expr): eval_op(expr) {} + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Add") == proto_map.end()); + proto_map["Add"] = std::make_shared(); // where is expr? + } +}; // class eval_add + +#endif \ No newline at end of file diff --git a/src/eval_const.cpp b/src/eval_const.cpp new file mode 100755 index 0000000..efedd40 --- /dev/null +++ b/src/eval_const.cpp @@ -0,0 +1,17 @@ +#include "eval_const.h" + +eval_const::eval_const(const expression &expr): eval_op(expr) { + auto params = expr.get_op_param_tensor("value"); + value_ = params; + // value_.print(); // uncomment for viewing logging details of tensor object + +} + +void eval_const::eval(vars_type &variables, const kwargs_type &kwargs) { + variables[expr_id_] = value_; + // value_.print(); // uncomment for viewing logging details of tensor object +} + +std::shared_ptr eval_const::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_const.h b/src/eval_const.h new file mode 100755 index 0000000..21e5039 --- /dev/null +++ b/src/eval_const.h @@ -0,0 +1,21 @@ +#ifndef EVAL_CONST_H +#define EVAL_CONST_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_const: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_const() {} + eval_const(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Const") == proto_map.end()); + proto_map["Const"] = std::make_shared(); // where is expr? + } +}; // class eval_const + +#endif \ No newline at end of file diff --git a/src/eval_conv2d.cpp b/src/eval_conv2d.cpp new file mode 100755 index 0000000..42b972a --- /dev/null +++ b/src/eval_conv2d.cpp @@ -0,0 +1,36 @@ +#include "eval_conv2d.h" + +eval_conv2d::eval_conv2d(const expression &expr): eval_op(expr), + in_channels(expr.get_op_param_int("in_channels")), + kernel_size(expr.get_op_param_int("kernel_size")), + out_channels(expr.get_op_param_int("out_channels")), + padding(expr.get_op_param_double("padding")), + weight(expr.get_op_param_tensor("weight")), + bias(expr.get_op_param_tensor("bias")), + stride(expr.get_op_param_int("stride")) + { + + } + +void eval_conv2d::eval(vars_type &variables, const kwargs_type &kwargs) { + if (inputs_.empty()) { + throw std::runtime_error("No input tensor for eval_conv2d."); + } + + // Get the input tensor from variables using the expression ID of the input + int inputExprId = inputs_[0]; + if (variables.find(inputExprId) == variables.end()) { + throw std::runtime_error("Input tensor not found in variables map."); + } + tensor input_tensor = variables.at(inputExprId); + + // Perform the Conv2D operation + tensor conv2d_tensor = Conv2D(input_tensor, weight, bias, in_channels, out_channels, kernel_size); + + // Store the result in the variables map using the current expression ID + variables[expr_id_] = conv2d_tensor; +} + +std::shared_ptr eval_conv2d::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_conv2d.h b/src/eval_conv2d.h new file mode 100755 index 0000000..df0685f --- /dev/null +++ b/src/eval_conv2d.h @@ -0,0 +1,38 @@ +#ifndef EVAL_CONV2D_H +#define EVAL_CONV2D_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_conv2d: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_conv2d () {} + eval_conv2d(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Conv2d") == proto_map.end()); + proto_map["Conv2d"] = std::make_shared(); // where is expr? + } +private: + int in_channels; + int kernel_size; + int out_channels; + double padding; + tensor weight; + tensor bias; + int stride; + /* + tensor weight; + tensor bias; + int kernel_size; + int stride; + int in_channels; + int out_channels; + double padding; + */ +}; // class eval_conv2d + +#endif \ No newline at end of file diff --git a/src/eval_flatten.cpp b/src/eval_flatten.cpp new file mode 100755 index 0000000..74a8076 --- /dev/null +++ b/src/eval_flatten.cpp @@ -0,0 +1,27 @@ +#include "eval_flatten.h" + +eval_flatten::eval_flatten(const expression &expr): eval_op(expr) { +} + +void eval_flatten::eval(vars_type &variables, const kwargs_type &kwargs) { + // Check that inputs_ is not empty + assert(!inputs_.empty()); + + // Retrieve the tensor from variables using the first input expression ID + const int inputExprId = inputs_[0]; + tensor t = variables.at(inputExprId); + + // For debugging: print the input tensor + std::cout << "Input Tensor for Flatten (expr_id " << inputExprId << "):" << std::endl; + //t.print(); + + // Perform the Flatten operation + tensor flattenResult = Flatten(t); + + // Store the result in variables under the key expr_id_ + variables[expr_id_] = flattenResult; +} + +std::shared_ptr eval_flatten::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_flatten.h b/src/eval_flatten.h new file mode 100755 index 0000000..d535a57 --- /dev/null +++ b/src/eval_flatten.h @@ -0,0 +1,21 @@ +#ifndef EVAL_FLATTEN_H +#define EVAL_FLATTEN_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_flatten: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_flatten () {} + eval_flatten(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Flatten") == proto_map.end()); + proto_map["Flatten"] = std::make_shared(); // where is expr? + } +}; // class eval_flatten + +#endif \ No newline at end of file diff --git a/src/eval_input.cpp b/src/eval_input.cpp new file mode 100755 index 0000000..bd4796f --- /dev/null +++ b/src/eval_input.cpp @@ -0,0 +1,13 @@ +#include "eval_input.h" + +eval_input::eval_input(const expression &expr): eval_op(expr) { +} + + +void eval_input::eval(vars_type &variables, const kwargs_type &kwargs) { + variables[expr_id_] = kwargs.at(op_name_); +} + +std::shared_ptr eval_input::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_input.h b/src/eval_input.h new file mode 100755 index 0000000..44cead6 --- /dev/null +++ b/src/eval_input.h @@ -0,0 +1,21 @@ +#ifndef EVAL_INPUT_H +#define EVAL_INPUT_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_input: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_input () {} + eval_input(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Input") == proto_map.end()); + proto_map["Input"] = std::make_shared(); // where is expr? + } +}; // class eval_input + +#endif \ No newline at end of file diff --git a/src/eval_input2d.cpp b/src/eval_input2d.cpp new file mode 100755 index 0000000..43b3697 --- /dev/null +++ b/src/eval_input2d.cpp @@ -0,0 +1,23 @@ +#include "eval_input2d.h" + +eval_input2d::eval_input2d(const expression &expr): eval_op(expr), height(expr.get_op_param_int("height")), width(expr.get_op_param_int("width")) { +} + +void eval_input2d::eval(vars_type &variables, const kwargs_type &kwargs) { + // Retrieve the tensor directly from kwargs using op_name_ + tensor t = kwargs.at(op_name_); + + // For debugging: print the input tensor + std::cout << "Input Tensor for Input2d (op_name " << op_name_ << "):" << std::endl; + //t.print(); + + // Perform the Input2d operation + tensor input2dResult = Input2d(t); + + // Store the result in variables under the key expr_id_ + variables[expr_id_] = input2dResult; +} + +std::shared_ptr eval_input2d::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_input2d.h b/src/eval_input2d.h new file mode 100755 index 0000000..f509cb3 --- /dev/null +++ b/src/eval_input2d.h @@ -0,0 +1,24 @@ +#ifndef EVAL_INPUT2D_H +#define EVAL_INPUT2D_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_input2d: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_input2d () {} + eval_input2d(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Input2d") == proto_map.end()); + proto_map["Input2d"] = std::make_shared(); // where is expr? + } +private: + int height; + int width; +}; // class eval_input2d + +#endif \ No newline at end of file diff --git a/src/eval_linear.cpp b/src/eval_linear.cpp new file mode 100755 index 0000000..452233d --- /dev/null +++ b/src/eval_linear.cpp @@ -0,0 +1,27 @@ +#include "eval_linear.h" + +eval_linear::eval_linear(const expression &expr): eval_op(expr),weight(expr.get_op_param_tensor("weight")),bias(expr.get_op_param_tensor("bias")) { +} + +void eval_linear::eval(vars_type &variables, const kwargs_type &kwargs) { + if (inputs_.empty()) { + throw std::runtime_error("No input tensor for eval_linear."); + } + + int inputExprId = inputs_[0]; + if (variables.find(inputExprId) == variables.end()) { + throw std::runtime_error("Input tensor not found in variables map."); + } + + tensor x = variables.at(inputExprId); + + // Now you have x, weight, and bias available to perform the Linear operation + tensor linearResult = Linear(weight, x, bias); + + // Process the result... + variables[expr_id_] = linearResult; +} + +std::shared_ptr eval_linear::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_linear.h b/src/eval_linear.h new file mode 100755 index 0000000..c80278d --- /dev/null +++ b/src/eval_linear.h @@ -0,0 +1,24 @@ +#ifndef EVAL_LINEAR_H +#define EVAL_LINEAR_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_linear: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_linear () {} + eval_linear(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Linear") == proto_map.end()); + proto_map["Linear"] = std::make_shared(); // where is expr? + } +private: + tensor weight; // Add weight tensor + tensor bias; // Add bias tensor +}; // class eval_linear + +#endif \ No newline at end of file diff --git a/src/eval_maxpool2d.cpp b/src/eval_maxpool2d.cpp new file mode 100755 index 0000000..6a21cd3 --- /dev/null +++ b/src/eval_maxpool2d.cpp @@ -0,0 +1,32 @@ +#include "eval_maxpool2d.h" + +eval_maxpool2d::eval_maxpool2d(const expression &expr): eval_op(expr), kernel_size(expr.get_op_param_int("kernel_size")), stride(expr.get_op_param_int("stride")){ + // Retrieve kernel size and stride from the expression + //int kernel_size = expr.get_ker_and_str_value("kernel_size"); + //int stride = expr.get_ker_and_str_value("stride"); +} + +void eval_maxpool2d::eval(vars_type &variables, const kwargs_type &kwargs) { + // Ensure that there is at least one input (the input tensor) + if (inputs_.empty()) { + throw std::runtime_error("No input tensor for eval_maxpool2d."); + } + + int inputExprId = inputs_[0]; + if (variables.find(inputExprId) == variables.end()) { + throw std::runtime_error("Input tensor not found in variables map."); + } + + // Get the input tensor + tensor input_tensor = variables.at(inputExprId); + + // Perform the max pooling operation + tensor pooled_tensor = MaxPool2d(input_tensor, kernel_size, stride); + + // Store the result in the variables map using the expression ID + variables[expr_id_] = pooled_tensor; +} + +std::shared_ptr eval_maxpool2d::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_maxpool2d.h b/src/eval_maxpool2d.h new file mode 100755 index 0000000..f01eb4e --- /dev/null +++ b/src/eval_maxpool2d.h @@ -0,0 +1,24 @@ +#ifndef EVAL_MAXPOOL2D_H +#define EVAL_MAXPOOL2D_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_maxpool2d: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_maxpool2d () {} + eval_maxpool2d(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("MaxPool2d") == proto_map.end()); + proto_map["MaxPool2d"] = std::make_shared(); // where is expr? + } +private: + int kernel_size; + int stride; +}; // class eval_linear + +#endif \ No newline at end of file diff --git a/src/eval_mul.cpp b/src/eval_mul.cpp new file mode 100755 index 0000000..f7b071f --- /dev/null +++ b/src/eval_mul.cpp @@ -0,0 +1,20 @@ +#include "eval_mul.h" + +tensor eval_mul::compute(const tensor &a, const tensor &b) { + // these use our tensor operator* overload + tensor c = a * b; + return c; +} + +void eval_mul::eval(vars_type &variables, const kwargs_type &kwargs) { + assert(inputs_.size() == 2); + auto ita = variables.find(inputs_[0]); + auto itb = variables.find(inputs_[1]); + // handle errors for ita and itb + // we handle these in the operator* override instead of here + variables[expr_id_] = compute(ita->second,itb->second); +} + +std::shared_ptr eval_mul::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_mul.h b/src/eval_mul.h new file mode 100755 index 0000000..ced14bb --- /dev/null +++ b/src/eval_mul.h @@ -0,0 +1,22 @@ +#ifndef EVAL_MUL_H +#define EVAL_MUL_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_mul: public eval_op { + tensor compute(const tensor &a, const tensor &b); + std::shared_ptr clone(const expression &expr) override; +public: + eval_mul() {} + eval_mul(const expression &expr): eval_op(expr) {} + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Mul") == proto_map.end()); + proto_map["Mul"] = std::make_shared(); // where is expr? + } + +}; // class eval_mul + +#endif \ No newline at end of file diff --git a/src/eval_op.cpp b/src/eval_op.cpp new file mode 100755 index 0000000..27fbfec --- /dev/null +++ b/src/eval_op.cpp @@ -0,0 +1,20 @@ +#include "eval_op.h" + +eval_op::eval_op(const expression &expr) +{ + // set all of our member vars one time using expr getters + expr_id_ = expr.get_expr_id(); + op_name_ = expr.get_op_name(); + op_type_ = expr.get_op_type(); + inputs_ = expr.get_inputs(); +} + + +void eval_op::eval(vars_type & variables, const kwargs_type & kwargs) +{ + assert(false); // should be provided by derived classes +} + +eval_op::~eval_op() { + +} \ No newline at end of file diff --git a/src/eval_op.h b/src/eval_op.h new file mode 100755 index 0000000..b9f7c71 --- /dev/null +++ b/src/eval_op.h @@ -0,0 +1,27 @@ +#ifndef EVAL_OP_H +#define EVAL_OP_H + +#include +#include "expression.h" +#include "assert.h" +#include + +// typedef introduces alias for types. +typedef std::map vars_type; +typedef std::map kwargs_type; + +class eval_op { +protected: + friend class evaluation; + eval_op(): expr_id_(-1) {} + eval_op(const expression &expr); + int expr_id_; + std::string op_name_, op_type_; + std::vector inputs_; +public: + virtual ~eval_op(); + virtual void eval(vars_type &variables, const kwargs_type &kwargs) = 0; + virtual std::shared_ptr clone(const expression &expr) = 0; +}; //class eval_op + +#endif \ No newline at end of file diff --git a/src/eval_op_prototypes.cpp b/src/eval_op_prototypes.cpp new file mode 100755 index 0000000..b9e8109 --- /dev/null +++ b/src/eval_op_prototypes.cpp @@ -0,0 +1,46 @@ +#include "eval_op_prototypes.h" + +eval_op_prototypes::eval_op_prototypes() { + eval_const::store_prototype(proto_map_); + eval_input::store_prototype(proto_map_); + eval_add::store_prototype(proto_map_); + eval_sub::store_prototype(proto_map_); + eval_mul::store_prototype(proto_map_); + eval_relu::store_prototype(proto_map_); + eval_flatten::store_prototype(proto_map_); + eval_input2d::store_prototype(proto_map_); + eval_linear::store_prototype(proto_map_); + eval_maxpool2d::store_prototype(proto_map_); + eval_conv2d::store_prototype(proto_map_); + //log_proto_types(); // used this when troubleshooting. uncomment to enable log +} + +void eval_op_prototypes::log_proto_types() { + std::cout << "eval_op_prototypes: registering prototypes begin:" << std::endl; + std::cout << "----------------------" << std::endl; + for (const auto& pair : proto_map_) { + std::cout << "eval_op_prototypes: registered prototype: " << pair.first << std::endl; + } + std::cout << "----------------------" << std::endl; + std::cout << "eval_op_prototypes: registering prototypes complete" << std::endl; + +} + +std::shared_ptr eval_op_prototypes::locate(std::string name) +{ + auto found = proto_map_.find(name); + + if (found != proto_map_.end()) { + return found->second; + } else { + std::string error_message = "eval_op_prototypes: prototype for operator '" + name + "' not found."; + std::cerr << error_message << std::endl; + throw std::runtime_error(error_message); + } +} + +eval_op_prototypes &eval_op_prototypes::instance() +{ + static eval_op_prototypes instance; // the only instance + return instance; +} \ No newline at end of file diff --git a/src/eval_op_prototypes.h b/src/eval_op_prototypes.h new file mode 100755 index 0000000..3d57d1d --- /dev/null +++ b/src/eval_op_prototypes.h @@ -0,0 +1,25 @@ +#include "eval_op.h" +#include "eval_input.h" +#include "eval_input2d.h" +#include "eval_add.h" +#include "eval_mul.h" +#include "eval_sub.h" +#include "eval_const.h" +#include "eval_flatten.h" +#include "eval_relu.h" +#include "eval_linear.h" +#include "eval_maxpool2d.h" +#include "eval_conv2d.h" + +typedef std::map> eval_op_proto_map; + +class eval_op_prototypes { + // prevent creation of additional instances + eval_op_prototypes(const eval_op_prototypes &)=delete; + eval_op_prototypes(); + eval_op_proto_map proto_map_; + void log_proto_types(); +public: + std::shared_ptr locate(std::string name); + static eval_op_prototypes &instance(); // access the only instance +}; // class eval_op_prototypes \ No newline at end of file diff --git a/src/eval_relu.cpp b/src/eval_relu.cpp new file mode 100755 index 0000000..4fb2cf2 --- /dev/null +++ b/src/eval_relu.cpp @@ -0,0 +1,27 @@ +#include "eval_relu.h" + +eval_relu::eval_relu(const expression &expr): eval_op(expr) { +} + +void eval_relu::eval(vars_type &variables, const kwargs_type &kwargs) { + // Check that inputs_ is not empty + assert(!inputs_.empty()); + + // Retrieve the tensor from variables using the first input expression ID + const int inputExprId = inputs_[0]; + tensor t = variables.at(inputExprId); + + // For debugging: print the input tensor + std::cout << "Input Tensor for ReLU (expr_id " << inputExprId << "):" << std::endl; + //t.print(); + + // Perform the ReLU operation + tensor reluResult = ReLU(t); + + // Store the result in variables under the key expr_id_ + variables[expr_id_] = reluResult; +} + +std::shared_ptr eval_relu::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_relu.h b/src/eval_relu.h new file mode 100755 index 0000000..bdacac5 --- /dev/null +++ b/src/eval_relu.h @@ -0,0 +1,21 @@ +#ifndef EVAL_RELU_H +#define EVAL_RELU_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_relu: public eval_op { + tensor value_; + std::shared_ptr clone(const expression &expr) override; +public: + eval_relu () {} + eval_relu(const expression &expr); + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("ReLU") == proto_map.end()); + proto_map["ReLU"] = std::make_shared(); // where is expr? + } +}; // class eval_relu + +#endif \ No newline at end of file diff --git a/src/eval_sub.cpp b/src/eval_sub.cpp new file mode 100755 index 0000000..f3aa903 --- /dev/null +++ b/src/eval_sub.cpp @@ -0,0 +1,18 @@ +#include "eval_sub.h" + +tensor eval_sub::compute(const tensor &a, const tensor &b) { + // these use our tensor operator- overload + tensor c = a - b; + return c; +} + +void eval_sub::eval(vars_type &variables, const kwargs_type &kwargs) { + assert(inputs_.size() == 2); + auto ita = variables.find(inputs_[0]); + auto itb = variables.find(inputs_[1]); + variables[expr_id_] = compute(ita->second,itb->second); +} + +std::shared_ptr eval_sub::clone(const expression &expr) { + return std::make_shared(expr); +} \ No newline at end of file diff --git a/src/eval_sub.h b/src/eval_sub.h new file mode 100755 index 0000000..3085693 --- /dev/null +++ b/src/eval_sub.h @@ -0,0 +1,22 @@ +#ifndef EVAL_SUB_H +#define EVAL_SUB_H + +#include "eval_op.h" + +typedef std::map> eval_op_proto_map; + +class eval_sub: public eval_op { + tensor compute(const tensor &a, const tensor &b); + std::shared_ptr clone(const expression &expr) override; +public: + eval_sub() {} + eval_sub(const expression &expr): eval_op(expr) {} + void eval(vars_type &variables, const kwargs_type &kwargs) override; + static void store_prototype(eval_op_proto_map &proto_map) { + assert(proto_map.find("Sub") == proto_map.end()); + proto_map["Sub"] = std::make_shared(); // where is expr? + } + +}; // class eval_sub + +#endif \ No newline at end of file diff --git a/src/evaluation.cpp b/src/evaluation.cpp new file mode 100755 index 0000000..66aa3da --- /dev/null +++ b/src/evaluation.cpp @@ -0,0 +1,60 @@ +#include "evaluation.h" +#include "eval_op_prototypes.h" + +evaluation::evaluation(const std::vector &exprs) { + try { + std::cout << "Starting evaluation construction with " << exprs.size() << " expressions." << std::endl; + for (auto &expr: exprs) { + std::cout << "Processing expression with ID: " << expr.get_expr_id() << std::endl; + std::shared_ptr p = eval_op_prototypes::instance().locate(expr.get_op_type()); + ops_.push_back(p->clone(expr)); + std::cout << "Expression with ID: " << expr.get_expr_id() << " processed successfully." << std::endl; +} + std::cout << "Evaluation construction completed successfully." << std::endl; + } catch (const std::runtime_error& e) { + std::cerr << "Failed to construct evaluation: " << e.what() << std::endl; + } +} + +void evaluation::add_kwargs_double( + const char *key, + double value) +{ + tensor scalar(value); + kwargs_[key] = scalar; +} + +void evaluation::add_kwargs_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]) +{ + tensor ndarray(dim, shape, data); + kwargs_[key] = ndarray; +} + +tensor &evaluation::get_result() +{ + if (!variables_.empty()) { + int last_expr_id = std::prev(variables_.end())->first; + std::cout << "Returning result from the last expression with ID: " << last_expr_id << std::endl; + return variables_[last_expr_id]; + } + throw std::runtime_error("No result available."); +} + +int evaluation::execute() +{ + std::cout << "Executing evaluation with " << ops_.size() << " operations.\n"; + variables_.clear(); + + for (auto &op : ops_) { + std::cout << "Evaluating operation of type: " << typeid(*op).name() << "\n"; + op->eval(variables_, kwargs_); + std::cout << "Operation evaluation complete. Variables size: " << variables_.size() << "\n"; + } + + std::cout << "Evaluation complete. Total variables evaluated: " << variables_.size() << "\n"; + return 0; +} \ No newline at end of file diff --git a/src/evaluation.h b/src/evaluation.h new file mode 100755 index 0000000..3bb0058 --- /dev/null +++ b/src/evaluation.h @@ -0,0 +1,33 @@ +#ifndef EVALUATION_H +#define EVALUATION_H + +#include "expression.h" +#include "eval_op.h" + +class evaluation +{ +public: + evaluation(const std::vector &exprs); + + void add_kwargs_double( + const char *key, + double value); + + void add_kwargs_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]); + + // return 0 for success + int execute(); + + tensor &get_result(); + +private: + std::vector> ops_; // instead of exprs_ + std::map variables_; + std::map kwargs_; +}; // class evaluation + +#endif // EVALUATION_H diff --git a/src/expression.cpp b/src/expression.cpp new file mode 100755 index 0000000..07c5c05 --- /dev/null +++ b/src/expression.cpp @@ -0,0 +1,112 @@ +#include "expression.h" +#include "evaluation.h" + +expression::expression( + int expr_id, + const char *op_name, + const char *op_type, + int *inputs, + int num_inputs) + : expr_id_(expr_id), op_name_(op_name), op_type_(op_type) +{ + // initialize inputs_ with the given inputs from our expression. + for (int i = 0; i < num_inputs; ++i) + { + inputs_.push_back(inputs[i]); + } + +} + +// getter methods +int expression::get_expr_id() const +{ + return expr_id_; +} + +const std::string& expression::get_op_name() const +{ + return op_name_; +} + +const std::string& expression::get_op_type() const +{ + return op_type_; +} + +const std::vector& expression::get_inputs() const +{ + return inputs_; +} + +tensor expression::get_op_param_tensor(const std::string& key) const { + auto it = op_params_tensor_.find(key); + if (it != op_params_tensor_.end()) { + return it->second; + } else { + throw std::out_of_range("No operation parameter tensor found for key: " + key); + } +} + +double expression::get_op_param_double(const std::string& key) const { + auto it = op_params_double_.find(key); + if (it != op_params_double_.end()) { + return it->second; + } else { + return 0.0; + } +} + +int expression::get_op_param_int(const std::string& key) const { + auto it = op_params_int_.find(key); + if (it != op_params_int_.end()) { + return it->second; + } else { + return 0; + } +} + +void expression::add_op_param_double( + const char *key, + double value) +{ + + std::cout << "Adding operation parameter (double) with key: " << key << std::endl; + + const char *w = "weight"; + const char *b = "bias"; + const char *v = "value"; // for backwards compatibility with p3 tests + const char *k = "kernel_size"; + const char *s = "stride"; + const char *i = "in_channels"; + const char *o = "out_channels"; + const char *p = "padding"; + + if((*key) == (*w)|| (*key) == (*b) || (*key) == (*v)){ // handle weight, bias, value tensor values + op_params_tensor_[key] = tensor(value); + } else if ((*key) == (*k) || (*key) == (*s) || (*key) == (*i) || (*key) == (*o)) { // handle kernel_size, stride, in_channels, out_channels int values + op_params_int_[key] = int(value); + } else if ((*key) == (*p)){ // handle padding double value + op_params_double_[key] = value; + }else{ + op_params_tensor_[key] = tensor(value); + } +} + +void expression::add_op_param_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]) +{ + std::cout << "Adding operation parameter (ndarray) with key: " << key << std::endl; + + const char *w = "weight"; + const char *b = "bias"; + + if((*key) == (*w)|| (*key) == (*b) ){ + op_params_tensor_[key] = tensor(dim, shape, data); + }else{ + op_params_tensor_[key] = tensor(dim, shape, data); + } + +} diff --git a/src/expression.h b/src/expression.h new file mode 100755 index 0000000..0a69142 --- /dev/null +++ b/src/expression.h @@ -0,0 +1,51 @@ +#ifndef EXPRESSION_H +#define EXPRESSION_H + +#include +#include +#include +#include "tensor.h" + +class evaluation; + +class expression +{ + friend class evaluation; +public: + expression( + int expr_id, + const char *op_name, + const char *op_type, + int *inputs, + int num_inputs); + + // Getter methods for expression attributes + int get_expr_id() const; + const std::string& get_op_name() const; + const std::string& get_op_type() const; + const std::vector& get_inputs() const; + tensor get_op_param_tensor(const std::string& key) const; + double get_op_param_double(const std::string& key) const; + int get_op_param_int(const std::string& key) const; + + void add_op_param_double( + const char *key, + double value); + + void add_op_param_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]); + +private: + int expr_id_; + std::string op_name_; + std::string op_type_; + std::vector inputs_; + std::map op_params_tensor_; + std::map op_params_double_; + std::map op_params_int_; +}; // class expression + +#endif // EXPRESSION_H diff --git a/src/libeasynn.cpp b/src/libeasynn.cpp new file mode 100755 index 0000000..f4c05df --- /dev/null +++ b/src/libeasynn.cpp @@ -0,0 +1,109 @@ +#include + +#include "libeasynn.h" +#include "program.h" +#include "evaluation.h" + +program *create_program() +{ + program *prog = new program; + printf("created_program: program %p\n", prog); + return prog; +} + +void append_expression( + program *prog, + int expr_id, + const char *op_name, + const char *op_type, + int inputs[], + int num_inputs) +{ + printf("append_expression: program %p, expr_id %d, op_name %s, op_type %s, inputs %d (", + prog, expr_id, op_name, op_type, num_inputs); + for (int i = 0; i != num_inputs; ++i) + printf("%d,", inputs[i]); + printf(")\n"); + prog->append_expression(expr_id, op_name, op_type, inputs, num_inputs); +} + +int add_op_param_double( + program *prog, + const char *key, + double value) +{ + printf("program %p, key %s, value %f\n", + prog, key, value); + return prog->add_op_param_double(key, value); +} + +int add_op_param_ndarray( + program *prog, + const char *key, + int dim, + size_t shape[], + double data[]) +{ + printf("program %p, key %s, value %p dim %d (", + prog, key, data, dim); + for (int i = 0; i != dim; ++i) + printf("%zu,", shape[i]); + printf(")\n"); + return prog->add_op_param_ndarray(key, dim, shape, data); +} + +evaluation *build(program *prog) +{ + evaluation *eval = prog->build(); + printf("evaluation %p\n", eval); + return eval; +} + +void add_kwargs_double( + evaluation *eval, + const char *key, + double value) +{ + printf("evaluation %p, key %s, value %f\n", + eval, key, value); + eval->add_kwargs_double(key, value); +} + +void add_kwargs_ndarray( + evaluation *eval, + const char *key, + int dim, + size_t shape[], + double data[]) +{ + printf("evaluation %p, key %s, value %p dim %d (", + eval, key, data, dim); + for (int i = 0; i != dim; ++i) + printf("%zu,", shape[i]); + printf(")\n"); + eval->add_kwargs_ndarray(key, dim, shape, data); +} + +int execute( + evaluation *eval, + int *p_dim, + size_t **p_shape, + double **p_data) +{ + printf("evaluation %p, p_dim %p, p_shape %p, p_data %p\n", + eval, p_dim, p_shape, p_data); + int ret = eval->execute(); + if (ret != 0) { + printf("execution failed with return code %d\n", ret); + return ret; + } + tensor &res = eval->get_result(); + *p_dim = res.get_dim(); + *p_shape = res.get_shape_array(); + *p_data = res.get_data_array(); + //res.print(); // uncomment to view tensor data output + + printf("execution completed succesfully. result: dim=%d\n",*p_dim); + fflush(stdout); + return 0; +} diff --git a/src/libeasynn.h b/src/libeasynn.h new file mode 100755 index 0000000..7b31d5c --- /dev/null +++ b/src/libeasynn.h @@ -0,0 +1,63 @@ +#ifndef LIBEASYNN_H +#define LIBEASYNN_H + +class program; +class evaluation; + +// return program handle +extern "C" +program *create_program(); + +extern "C" +void append_expression( + program *prog, + int expr_id, + const char *op_name, + const char *op_type, + int inputs[], + int num_inputs); + +// return 0 for success +extern "C" +int add_op_param_double( + program *prog, + const char *key, + double value); + +// return 0 for success +extern "C" +int add_op_param_ndarray( + program *prog, + const char *key, + int dim, + size_t shape[], + double data[]); + +// return evaluation handle +extern "C" +evaluation *build( + program *prog); + +extern "C" +void add_kwargs_double( + evaluation *eval, + const char *key, + double value); + +extern "C" +void add_kwargs_ndarray( + evaluation *eval, + const char *key, + int dim, + size_t shape[], + double data[]); + +// return 0 for success +extern "C" +int execute( + evaluation *eval, + int *p_dim, + size_t **p_shape, + double **p_data); + +#endif // LIBEASYNN_H diff --git a/src/program.cpp b/src/program.cpp new file mode 100755 index 0000000..b5c9964 --- /dev/null +++ b/src/program.cpp @@ -0,0 +1,47 @@ +#include "program.h" +#include "evaluation.h" + +program::program() +{ +} + +void program::append_expression( + int expr_id, + const char *op_name, + const char *op_type, + int inputs[], + int num_inputs) +{ + // create an expression object with the provided parameters + expression expr(expr_id, op_name, op_type, inputs, num_inputs); + + // append the expression to the expressions vector + expressions_.push_back(expr); +} + +int program::add_op_param_double( + const char *key, + double value) +{ + // get end of expressions_ values and send to our add_op_param_double + expressions_.back().add_op_param_double(key, value); + return 0; +} + +int program::add_op_param_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]) +{ + // get end of expressions_ values and send to our add_op_param_ndarray + expressions_.back().add_op_param_ndarray(key, dim, shape, data); + return 0; +} + + +evaluation *program::build() +{ + evaluation *eval = new evaluation(expressions_); + return eval; +} diff --git a/src/program.h b/src/program.h new file mode 100755 index 0000000..24e0ca0 --- /dev/null +++ b/src/program.h @@ -0,0 +1,38 @@ +#ifndef PROGRAM_H +#define PROGRAM_H + +#include "expression.h" + +class evaluation; + +class program +{ +public: + program(); + + void append_expression( + int expr_id, + const char *op_name, + const char *op_type, + int inputs[], + int num_inputs); + + // return 0 for success + int add_op_param_double( + const char *key, + double value); + + // return 0 for success + int add_op_param_ndarray( + const char *key, + int dim, + size_t shape[], + double data[]); + + evaluation *build(); + +private: + std::vector expressions_; // Store the expressions. +}; // class program + +#endif // PROGRAM_H diff --git a/src/tensor.cpp b/src/tensor.cpp new file mode 100755 index 0000000..af63e1e --- /dev/null +++ b/src/tensor.cpp @@ -0,0 +1,430 @@ +#include "tensor.h" +#include +#include +#include +#include +#include + +tensor::tensor() {} + +tensor::tensor(double v) { + shape_.push_back(1); + data_.push_back(v); +} + +tensor::tensor(int dim, std::initializer_list shape, std::initializer_list data) { + shape_.assign(shape.begin(), shape.end()); + data_.assign(data.begin(), data.end()); + dim_ = dim; +} + +size_t tensor::getTotalSize() const { + size_t totalSize = 1; + for (size_t dim : shape_) { + if (totalSize > std::numeric_limits::max() / dim) + throw std::overflow_error("Size overflow detected"); + totalSize *= dim; + } + return totalSize; +} + +tensor::tensor(int dim, size_t shape[], double data[]): + shape_(shape, shape+dim) { + dim_ = dim; + // calc N has shape[0]*shape[1]*...*shape[dim-1] + // calc N as the product of elements in the shape array + size_t N = 1; + for (int i=0; i < dim; ++i) { + N = shape[i] * N; + } + + data_.assign(data, data+N); +} + +int tensor::get_dim() const { + return shape_.size(); +} + +double tensor::item() const { + if (shape_.empty()) { + return data_[0]; + } else { + throw std::runtime_error("item() called on a non-scalar tensor."); + } +} + +double &tensor::item() { + if (shape_.empty()) { + return data_[0]; + } else { + throw std::runtime_error("item() called on a non-scalar tensor."); + } +} + +double tensor::at(size_t i) const { + assert(get_dim() == 1); + assert(i < shape_[0]); + return data_[i]; +} + +double tensor::at(size_t i, size_t j) const { + assert(get_dim() == 2); + if (shape_.size() != 2) + throw std::runtime_error("Tensor is not 2-dimensional"); + + assert((i < shape_[0]) && (j < shape_[1])); + return data_[i * shape_[1] + j]; +} + +double tensor::at(size_t i, size_t j, size_t k, size_t l) const { + assert(get_dim() == 4); // Ensure the tensor is 4D + assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]); + + size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l; + return data_[index]; +} + +double& tensor::at(size_t i, size_t j, size_t k, size_t l) { + assert(get_dim() == 4); // Ensure the tensor is 4D + assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]); + + size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l; + return data_[index]; +} + +size_t* tensor::get_shape_array() { + return shape_.empty()? nullptr: &shape_[0]; +} + +double* tensor::get_data_array() { + return &data_[0]; +} + +// This can be used for debugging across the entire program. +// prints the values of the tensor. Be careful though because +// it will clutter the screen output. +void tensor::print() const { + std::cout << "Tensor - Dim: " << dim_ << ", Shape: {"; + for (const auto& s : shape_) std::cout << s << " "; + std::cout << "}, Data: {"; + for (const auto& d : data_) std::cout << d << " "; + std::cout << "}\n"; +} + +// helper template +template tensor helper(const tensor &lhs, const tensor &rhs) { + tensor result; + result.shape_ = lhs.shape_; + size_t totalSize = lhs.getTotalSize(); + result.data_.reserve(totalSize); + std::transform(lhs.data_.begin(), lhs.data_.begin() + totalSize, + rhs.data_.begin(), std::back_inserter(result.data_), T()); + return result; +} + +// operator overloads +tensor operator+(const tensor &lhs, const tensor &rhs) { + tensor tensor_a = tensor(lhs); + tensor tensor_b = tensor(rhs); + + size_t size_a = lhs.getTotalSize(); + std::vector result; + for (size_t i =0; i< size_a; ++i) { + double sum_array = tensor_a.get_data_array()[i] + tensor_b.get_data_array()[i]; + result.push_back(sum_array); + } + tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]); + return c; +} + +tensor operator-(const tensor &lhs, const tensor &rhs) { + tensor tensor_a = tensor(lhs); + tensor tensor_b = tensor(rhs); + + size_t size_a = lhs.getTotalSize(); + std::vector result; + for (size_t i =0; i< size_a; ++i) { + double sum_array = tensor_a.get_data_array()[i] - tensor_b.get_data_array()[i]; + result.push_back(sum_array); + } + tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]); + return c; +} + +tensor operator*(const tensor &lhs, const tensor &rhs) +{ + try { + + // scalar * scalar multiplication + if (lhs.getTotalSize() == 1 && rhs.getTotalSize() == 1) { + return tensor(lhs.data_[0] * rhs.data_[0]); + } + if (lhs.shape_.empty() || rhs.shape_.empty()) { + return tensor(); // for p3-q6, passing in None + } + + tensor result; + size_t lhsSize = lhs.getTotalSize(); + size_t rhsSize = rhs.getTotalSize(); + + // scalar * tensor multiplication + if (lhsSize == 1 || rhsSize == 1) { + double scalar = (lhsSize == 1 ? lhs : rhs).data_[0]; + const tensor &t = lhsSize == 1 ? rhs : lhs; + result.shape_ = t.shape_; + result.data_.reserve(lhsSize * rhsSize); + std::transform(t.data_.begin(), t.data_.end(), + std::back_inserter(result.data_), + [&scalar](double x) { return x * scalar; }); + } else { + // matrix multiplication + size_t I = lhs.shape_[0]; + size_t J = rhs.shape_[1]; + size_t K = lhs.shape_[1]; + result.shape_ = {I, J}; + size_t totalSize = result.getTotalSize(); + result.data_.resize(totalSize); + + for (size_t i = 0; i < I; ++i) { + for (size_t j = 0; j < J; ++j) { + for (size_t k = 0; k < K; ++k) { + result.data_[J * i + j] += + lhs.data_[K * i + k] * rhs.data_[J * k + j]; + } + } + } + } + return result; + } catch (const std::bad_alloc&) { + throw std::runtime_error("Memory allocation failed in tensor multiplication."); + } +} + +tensor ReLU(const tensor &x){ + tensor result; + size_t xSize = x.getTotalSize(); + result.shape_ = x.shape_; + for(size_t i = 0; i < xSize; i++ ){ + result.data_.push_back(x.data_[i]*(x.data_[i]>0)); + } + return result; + } + +tensor Flatten(const tensor &x){ + tensor result; + size_t xSize = x.getTotalSize(); + size_t shape_1 = xSize/x.shape_[0]; + std::vector shape_; + shape_.push_back(x.shape_[0]); + shape_.push_back(shape_1); + result.shape_ = shape_; + result.data_ = x.data_; + return result; + } + +tensor Input2d(const tensor &x){ + tensor result; + std::vector shape_; + shape_.push_back(x.shape_[0]); + shape_.push_back(x.shape_[3]); + shape_.push_back(x.shape_[1]); + shape_.push_back(x.shape_[2]); + result.shape_ = shape_; + for(size_t i = 0; i < x.shape_[0]; i++ ){ + for(size_t j =0; j < x.shape_[3];j++){ + for(size_t k = 0; k shape_0; + shape_0.push_back(x.shape_[1]); + shape_0.push_back(w.shape_[0]); + size_t totalSize_w = w.getTotalSize(); + result_wT.data_.reserve(totalSize_w); + result_wT.shape_ = shape_0; + for(size_t i = 0; i < w.shape_[1]; i++){ + for(size_t j = 0; j < w.shape_[0]; j++){ + result_wT.data_.push_back(w.data_[i + j*w.shape_[1]]); + } + } + tensor result_xwT; + std::vector shape_xwT; + shape_xwT.push_back(x.shape_[0]); + shape_xwT.push_back(result_wT.shape_[1]); + size_t totalSize_xwT = x.shape_[0]*result_wT.shape_[1]; + result_xwT.data_.resize(totalSize_xwT); + result_xwT.shape_ = shape_xwT; + result_xwT = x * result_wT; + + tensor result_bBrodcast; + std::vector shape_1; + shape_1.push_back(b.shape_[0]); + shape_1.push_back(1*x.shape_[0]); + result_bBrodcast.shape_ = shape_1; + for(size_t i = 0; i < result_bBrodcast.shape_[0]; i++){ + for(size_t j = 0; j < result_bBrodcast.shape_[1]; j++){ + result_bBrodcast.data_.push_back(b.data_[i]); + } + } + + tensor result_bBroadcastT; + std::vector shape_2; + size_t totalSize_b = result_bBrodcast.getTotalSize(); + result_bBrodcast.data_.resize(totalSize_b); + shape_2.push_back(result_bBrodcast.shape_[1]); + shape_2.push_back(result_bBrodcast.shape_[0]); + result_bBroadcastT.shape_ = shape_2; + for(size_t i = 0; i < result_bBrodcast.shape_[1]; i++){ + for(size_t j = 0; j < result_bBrodcast.shape_[0]; j++){ + result_bBroadcastT.data_.push_back(result_bBrodcast.data_[i + j*result_bBrodcast.shape_[1]]); + } + } + tensor result; + result.data_.resize(result_xwT.shape_[0]*result_xwT.shape_[1]); + result = result_xwT + result_bBroadcastT; + return result; +} + + +std::vector maxpooling(std::vector > map, int k_w, int k_h, int s_w, int s_h) +{ + int row = map.size(); int col = map[0].size(); + int out_row = 1+(row-k_h)/s_h, out_col = 1+(col-k_w)/s_w; + int mod_row = row%s_h, mod_col = col%s_w; + if (mod_row != 0) out_row++; + if (mod_col != 0) out_col++; + + std::vector > res(out_row, std::vector(out_col, 0)); + std::vector > pad_map(map); + + for (int i=0;i temp; + for(int ii=0;ii res_; + for (auto l : res) + { + for (auto ll : l) + { + res_.push_back(ll); + } + } + return res_; +} + +tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride){ + std::cout <<"======1================="< shape_; + shape_.push_back(x.shape_[0]); + shape_.push_back(x.shape_[1]); + shape_.push_back(floor(x.shape_[2]/kernel_size)); + shape_.push_back(floor(x.shape_[3]/kernel_size)); + result.shape_ = shape_; + for(size_t i = 0; i < x.shape_[0]; i++ ){ + for(size_t j =0; j < x.shape_[1];j++){ + std::vector > tmp; + for(size_t k =0; k < x.shape_[2];k++){ + std::vector tmp_; + for(size_t l =0; l < x.shape_[3];l++){ + double int_data = x.data_[l + k*x.shape_[3] + j*x.shape_[3]*x.shape_[2] + i*x.shape_[3]*x.shape_[2]*x.shape_[1] ]; + tmp_.push_back(int_data); + } + tmp.push_back(tmp_); + } + auto res = maxpooling(tmp,kernel_size,kernel_size,stride,stride); + for(size_t i = 0; i< res.size(); i++){ + result.data_.push_back(res[i]); + } + } + } + return result; + } + +tensor Conv2D(const tensor &input_tensor, + const tensor &kernel_tensor, + const tensor &bias_tensor, + int in_channels, + int out_channels, + int kernel_size) + +{ + std::cout << "In Channels: " << in_channels << ", Out Channels: " << out_channels << std::endl; + std::cout << "Kernel Size: " << kernel_size << std::endl; + + // Calculate output dimensions + int out_height = input_tensor.shape_[2] - kernel_size + 1; + int out_width = input_tensor.shape_[3] - kernel_size + 1; + + // Ensure out_height and out_width are non-negative + out_height = std::max(out_height, 0); + out_width = std::max(out_width, 0); + + // Initialize shape array for output tensor + size_t output_shape[] = {input_tensor.shape_[0], static_cast(out_channels), static_cast(out_height), static_cast(out_width)}; + + // Initialize data array for output tensor (fill with zeros) + std::vector output_data(output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3], 0); + + // Create output tensor + tensor output_tensor(4, output_shape, output_data.data()); + + // Convolution operation + for (size_t n = 0; n < input_tensor.shape_[0]; ++n) { // for each batch + for (int m = 0; m < out_channels; ++m) { // for each filter + for (int y = 0; y < out_height; ++y) { + for (int x = 0; x < out_width; ++x) { + double sum = 0.0; + for (int c = 0; c < in_channels; ++c) { // for each channel + for (int ky = 0; ky < kernel_size; ++ky) { + for (int kx = 0; kx < kernel_size; ++kx) { + size_t ix = x + kx; + size_t iy = y + ky; + if (ix < input_tensor.shape_[3] && iy < input_tensor.shape_[2]) { + sum += input_tensor.at(n, c, iy, ix) * kernel_tensor.at(m, c, ky, kx); + } + } + } + } + sum += bias_tensor.at(m); // add bias + output_tensor.at(n, m, y, x) = sum; + } + } + } + } + + return output_tensor; +} \ No newline at end of file diff --git a/src/tensor.h b/src/tensor.h new file mode 100755 index 0000000..6c604b4 --- /dev/null +++ b/src/tensor.h @@ -0,0 +1,59 @@ +#ifndef TENSOR_H +#define TENSOR_H + +#include +#include +#include + +class tensor { +public: + tensor(); // scalar 0 + explicit tensor(double v); // scalar v + tensor(int dim, size_t shape[], double data[]); // From C + tensor(int dim, std::initializer_list shape, std::initializer_list data); // Conv2d + + int get_dim() const; + + // scalar only + double item() const; + double &item(); + + // more accessors + double at(size_t i) const; + double at(size_t i, size_t j) const; + double at(size_t i, size_t j, size_t k, size_t l) const; + double& at(size_t i, size_t j, size_t k, size_t l); + + // passing tensor back to C code + size_t *get_shape_array(); + double *get_data_array(); + + // easy way to print out the tensor for logging + void print() const; + + size_t getTotalSize() const; + + // friend is better than member, because it allows + // double + tensor (implicit conversion) + // operator overloads + friend tensor operator+(const tensor &lhs, const tensor &rhs); + friend tensor operator-(const tensor &lhs, const tensor &rhs); + friend tensor operator*(const tensor &lhs, const tensor &rhs); + + template + friend tensor helper(const tensor &lhs, const tensor &rhs); + + friend tensor ReLU(const tensor &x); + friend tensor Flatten(const tensor &x); + friend tensor Input2d(const tensor &x); + friend tensor Linear(const tensor &w, const tensor &x, const tensor &b); + friend tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride); + friend tensor Conv2D(const tensor &input_tensor, const tensor &kernel_tensor, const tensor &bias_tensor, int in_channels, int out_channels, int kernel_size); + +private: + int dim_; + std::vector shape_; + std::vector data_; +}; // class tensor + +#endif // TENSOR_H; \ No newline at end of file