Merge branch 'master' into 'main'

project 3 complete. 10/10 tests passing. p1 an p2 tests still pass.... See merge request jtpmcdevitt/oop_and_ml!1
2024-02-09 02:34:32 +00:00 · 2024-02-09 02:34:32 +00:00 · 893918163a
commit 893918163a
parent bf6d5a631b e2e2356499
57 changed files with 3535 additions and 0 deletions
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 .PHONY: all libeasynn easynn_test
 all: libeasynn easynn_test
 libeasynn:
 	g++ -Wall src/*.cpp -fPIC -O -g -shared -o libeasynn.so
 easynn_test: libeasynn
 	g++ -Wall easynn_test.cpp -g -lm -L. -Wl,-rpath=. -leasynn -o easynn_test
--- a/easynn.py
+++ b/easynn.py
@ -0,0 +1,145 @@
 class Expr:
    next_id = 0
    def __init__(self, op, inputs):
        self.op = op
        self.inputs = inputs
        self.id = Expr.next_id
        Expr.next_id += 1
        if not isinstance(op, Op):
            raise Exception("Not an operator: %s" % op)
    def __dfs_post(self, ids, visitor):
        ids[self.id] = True
        for expr in self.inputs:
            if expr.id in ids:
                continue
            expr.__dfs_post(ids, visitor)
        visitor(self)
    def statements(self):
        lines = []
        self.__dfs_post({}, lambda that: lines.append("%s" % that))
        return "\n".join(lines)
    def __str__(self):
        args = ",".join(["t%d" % expr.id for expr in self.inputs])
        return "t%d = %s(%s)" % (self.id, self.op, args)
    def __promote(r):
        if isinstance(r, Expr):
            return r
        else:
            return Const(r)
    def __add__(self, r):
        return Op("", "Add", 2, {})(self, Expr.__promote(r))
    def __sub__(self, r):
        return Op("", "Sub", 2, {})(self, Expr.__promote(r))
    def __mul__(self, r):
        return Op("", "Mul", 2, {})(self, Expr.__promote(r))
    def __neg__(self):
        return Op("", "Neg", 1, {})(self)
    def compile(self, builder):
        self.__dfs_post({}, lambda that: builder.append(that))
        return builder.build()
    def resolve(self, parameters):
        self.__dfs_post({}, lambda that: that.op.resolve(parameters))
        return self
 class Op:
    def __init__(self, name, op_type, num_args, parameters):
        self.name = name
        self.op_type = op_type
        self.num_args = num_args
        self.parameters = parameters
    def __call__(self, *inputs):
        if self.num_args >= 0 and self.num_args != len(inputs):
            raise Exception("%s: need %d arguments but found %d" % (self, self.num_args, len(inputs)))
        for i, expr in enumerate(inputs):
            if not isinstance(expr, Expr):
                raise Exception("%s: arg %d is not an expression: %s" % (self, i, expr))
        return Expr(self, inputs)
    def __str__(self):
        name = "%s.%s" % (self.name, self.op_type)
        if len(self.parameters) == 0:
            return name
        params = ",".join(["%s=%s" % (k, v.shape if hasattr(v, "shape") else v) for k, v in self.parameters.items()])
        return "%s[%s]" % (name, params)
    def resolve(self, parameters):
        if self.name == "":
            return
        for k, v in parameters.items():
            if k.startswith(self.name+"."):
                self.parameters[k[len(self.name)+1:]] = v
 def Const(c):
    if isinstance(c, (int, float)):
        c = float(c)
    elif hasattr(c, "shape"):
        c = c.astype(float)
    else:
        raise Exception("Const must be float or int or ndarray: %s" % c)
    return Expr(Op("", "Const", 0, {
        "value": c
    }), [])
 def Input(n):
    return Expr(Op(n, "Input", 0, {}), [])
 def Input2d(n, h, w, ic):
    return Expr(Op(n, "Input2d", 0, {
        "height": h,
        "width": w,
        "in_channels": ic
    }), [])
 def MaxPool2d(k, s):
    return Op("", "MaxPool2d", 1, {
        "kernel_size": k,
        "stride": s
    })
 def ReLU():
    return Op("", "ReLU", 1, {})
 def Flatten():
    return Op("", "Flatten", 1, {})
 def Conv2d(n, ic, oc, k, p = 0):
    return Op(n, "Conv2d", 1, {
        "in_channels": ic,
        "out_channels": oc,
        "kernel_size": k,
        "padding": p
    })
 def Linear(n, i, o):
    return Op(n, "Linear", 1, {
        "in_features": i,
        "out_features": o
    })
 def Show():
    return Op("", "Show", 1, {})
--- a/easynn_cpp.py
+++ b/easynn_cpp.py
@ -0,0 +1,130 @@
 import numpy as np
 import ctypes
 import time
 _libeasynn = ctypes.CDLL("./libeasynn.so")
 _libeasynn.create_program.restype = ctypes.c_void_p
 _libeasynn.append_expression.argtypes = [
    ctypes.c_void_p,
    ctypes.c_int,
    ctypes.c_char_p,
    ctypes.c_char_p,
    ctypes.c_void_p,
    ctypes.c_int
 ]
 _libeasynn.add_op_param_double.argtypes = [
    ctypes.c_void_p,
    ctypes.c_char_p,
    ctypes.c_double
 ]
 _libeasynn.add_op_param_ndarray.argtypes = [
    ctypes.c_void_p,
    ctypes.c_char_p,
    ctypes.c_int,
    ctypes.c_void_p,
    ctypes.c_void_p
 ]
 _libeasynn.build.restype = ctypes.c_void_p
 _libeasynn.build.argtypes = [
    ctypes.c_void_p
 ]
 _libeasynn.add_kwargs_double.argtypes = [
    ctypes.c_void_p,
    ctypes.c_char_p,
    ctypes.c_double
 ]
 _libeasynn.add_kwargs_ndarray.argtypes = [
    ctypes.c_void_p,
    ctypes.c_char_p,
    ctypes.c_int,
    ctypes.c_void_p,
    ctypes.c_void_p
 ]
 _libeasynn.execute.argtypes = [
    ctypes.c_void_p,
    ctypes.c_void_p,
    ctypes.c_void_p,
    ctypes.c_void_p
 ]
 def check_and_raise(ret):
    if ret != 0:
        raise Exception("libeasynn error: code %d" % ret)
 def to_float_or_ndarray(c_dim, c_shape, c_data):
    if c_dim.value == 0:
        return c_data[0]
    shape = [c_shape[k] for k in range(c_dim.value)]
    N = 1
    for s in shape:
        N *= s
    print("shape =", shape, "N =", N)
    flat = np.array([c_data[i] for i in range(N)])
    return flat.reshape(shape)
 class Eval:
    def __init__(self, evaluation):
        self.evaluation = evaluation
    def __call__(self, **kwargs):
        start = time.time()
        for k, v in kwargs.items():
            if isinstance(v, (int, float)):
                _libeasynn.add_kwargs_double(
                    self.evaluation, k.encode(), ctypes.c_double(v))
            elif hasattr(v, "shape"):
                v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A'])
                _libeasynn.add_kwargs_ndarray(
                    self.evaluation, k.encode(), v.ndim, v.ctypes.shape,
                    v.ctypes.data)
            else:
                raise Exception("%s: kwargs must be float or int or ndarray" % k)
        c_dim = ctypes.c_int()
        c_shape = ctypes.POINTER(ctypes.c_size_t)()
        c_data = ctypes.POINTER(ctypes.c_double)()
        check_and_raise(_libeasynn.execute(self.evaluation,
            ctypes.byref(c_dim), ctypes.byref(c_shape), ctypes.byref(c_data)))
        res = to_float_or_ndarray(c_dim, c_shape, c_data)
        t = time.time()-start
        if t > 0.1:
            print("c++ time %.2f" % t)
        return res
 class Builder:
    def __init__(self):
        self.program = _libeasynn.create_program()
    def append(self, expr):
        inputs = [ex.id for ex in expr.inputs]
        num_inputs = len(inputs)
        op = expr.op
        _libeasynn.append_expression(
            self.program, expr.id,
            op.name.encode(), op.op_type.encode(),
            (ctypes.c_int*num_inputs)(*inputs), num_inputs)
        for k, v in op.parameters.items():
            if isinstance(v, (int, float)):
                check_and_raise(_libeasynn.add_op_param_double(
                    self.program, k.encode(), ctypes.c_double(v)))
            elif hasattr(v, "shape"):
                v = np.require(v, dtype=np.double, requirements=['C', 'O', 'W', 'A'])
                check_and_raise(_libeasynn.add_op_param_ndarray(
                    self.program, k.encode(), v.ndim, v.ctypes.shape,
                    v.ctypes.data))
            else:
                raise Exception("%s: op params must be float or int or ndarray: %s" % (expr, k))
    def build(self):
        return Eval(_libeasynn.build(self.program))
--- a/easynn_golden.py
+++ b/easynn_golden.py
@ -0,0 +1,189 @@
 import numpy as np
 import time
 def Input(expr, op, args, **kwargs):
    if op.name in kwargs:
        c  = kwargs[op.name]
        if isinstance(c, (int, float)):
            return float(c)
        elif hasattr(c, "shape"):
            return c.astype(float)
        else:
            raise Exception("%s: Input must be float or int or ndarray: %s" % (expr, c))
    else:
        raise Exception("%s: missing input" % expr)
 def Input2d(expr, op, args, **kwargs):
    if not op.name in kwargs:
        raise Exception("%s: missing input" % expr)
    imgs = kwargs[op.name]
    if not hasattr(imgs, "shape"):
        raise Exception("%s: Input must be ndarray: %s" % (expr, imgs))
    if any([
        len(imgs.shape) != 4,
        imgs.shape[1] != op.parameters["height"],
        imgs.shape[2] != op.parameters["width"],
        imgs.shape[3] != op.parameters["in_channels"]]):
        raise Exception("%s: Invalid input size: %s" % (expr, imgs.shape))
    # NHWC => NCHW
    return imgs.astype(float).transpose(0,3,1,2)
 def Const(expr, op, args, **kwargs):
    return op.parameters["value"]
 def Neg(expr, op, args, **kwargs):
    return -args[0]
 def Add(expr, op, args, **kwargs):
    a = args[0]
    b = args[1]
    if not hasattr(a, "shape") and not hasattr(b, "shape"):
        return a+b
    elif hasattr(a, "shape") and hasattr(b, "shape"):
        if a.shape != b.shape:
            raise Exception("%s: size mismatch: %s+%s" % (expr, a.shape, b.shape))
        return a+b
    else:
        raise Exception("%s: cannot mix scalar and ndarray" % expr)
 def Sub(expr, op, args, **kwargs):
    a = args[0]
    b = args[1]
    if not hasattr(a, "shape") and not hasattr(b, "shape"):
        return a-b
    elif hasattr(a, "shape") and hasattr(b, "shape"):
        if a.shape != b.shape:
            raise Exception("%s: size mismatch: %s-%s" % (expr, a.shape, b.shape))
        return a-b
    else:
        raise Exception("%s: cannot mix scalar and ndarray" % expr)
 def Mul(expr, op, args, **kwargs):
    a = args[0]
    b = args[1]
    if not hasattr(a, "shape") or not hasattr(b, "shape"):
        return a*b
    else:
        if len(a.shape) != 2 or len(b.shape) != 2:
            raise Exception("%s: matmul only: %s*%s" % (expr, a.shape, b.shape))
        if a.shape[1] != b.shape[0]:
            raise Exception("%s: size mismatch: %s*%s" % (expr, a.shape, b.shape))
        return np.matmul(a, b)
 def Flatten(expr, op, args, **kwargs):
    x = args[0]
    if not hasattr(x, "shape"):
        raise Exception("%s: ndarray only: %s" % (expr, imgs))
    return x.reshape((x.shape[0], -1))
 def ReLU(expr, op, args, **kwargs):
    x = args[0]
    return x*(x > 0)
 def Linear(expr, op, args, **kwargs):
    x = args[0]
    if not hasattr(x, "shape"):
        raise Exception("%s: ndarray only: %s" % (expr, x))
    if "weight" not in op.parameters or "bias" not in op.parameters:
        raise Exception("%s: missing weight or bias" % expr)
    weight = op.parameters["weight"]
    bias = op.parameters["bias"]
    if not hasattr(weight, "shape") or not hasattr(bias, "shape"):
        raise Exception("%s: ndarray only for weight or bias" % expr)
    in_features = op.parameters["in_features"]
    out_features = op.parameters["out_features"]
    if any([
        len(x.shape) != 2,
        x.shape[1] != in_features,
        weight.shape != (out_features, in_features),
        bias.shape != (out_features,)]):
        raise Exception("%s: size mismatch: %s*%s+%s" % (expr, weight.shape, x.shape, bias.shape))
    return np.einsum("ni,oi->no", x, weight,
        optimize = "optimal")+bias.reshape((1, out_features))
 def MaxPool2d(expr, op, args, **kwargs):
    x = args[0]
    if not hasattr(x, "shape"):
        raise Exception("%s: ndarray only: %s" % (expr, x))
    kernel_size = op.parameters["kernel_size"]
    stride = op.parameters["stride"]
    if kernel_size != stride:
        raise Exception("%s: kernel_size != stride" % expr)
    if any([
        len(x.shape) != 4,
        x.shape[2]%stride != 0,
        x.shape[3]%stride != 0]):
        raise Exception("%s: size mismatch: %s" % (expr, x.shape))
    new_shape = (x.shape[0], x.shape[1], x.shape[2]//stride, stride, x.shape[3]//stride, stride)
    return np.nanmax(x.reshape(new_shape), axis = (3,5))
 def Conv2d(expr, op, args, **kwargs):
    x = args[0]
    if not hasattr(x, "shape"):
        raise Exception("%s: ndarray only: %s" % (expr, x))
    if "weight" not in op.parameters or "bias" not in op.parameters:
        raise Exception("%s: missing weight or bias" % expr)
    weight = op.parameters["weight"]
    bias = op.parameters["bias"]
    in_channels = op.parameters["in_channels"]
    out_channels = op.parameters["out_channels"]
    kernel_size = op.parameters["kernel_size"]
    padding = op.parameters["padding"]
    if any([
        len(x.shape) != 4,
        x.shape[1] != in_channels,
        weight.shape != (out_channels, in_channels, kernel_size, kernel_size),
        bias.shape != (out_channels,)]):
        raise Exception("%s: size mismatch: %s" % (expr, x.shape))
    if padding != 0:
        tmp = np.zeros((x.shape[0], x.shape[1], x.shape[2]+2*padding, x.shape[3]+2*padding))
        tmp[:, :, 1:-2, 1:-2] = x
        x = tmp
    conv_shape = x.shape[:2]+(x.shape[2]+1-kernel_size, x.shape[3]+1-kernel_size, kernel_size, kernel_size)
    conv_strides = x.strides+x.strides[2:]
    conv = np.lib.stride_tricks.as_strided(x, shape = conv_shape, strides = conv_strides, writeable = False)
    return np.einsum("nihwyx,oiyx->nohw", conv, weight,
        optimize = "optimal")+bias.reshape((1, out_channels, 1, 1))
 class Eval:
    def __init__(self, program):
        self.program = program
    def __call__(self, **kwargs):
        start = time.time()
        values = {}
        for expr in self.program:
            args = [values[ex.id] for ex in expr.inputs]
            if expr.op.op_type not in globals():
                raise Exception("%s: not implemented" % expr)
            values[expr.id] = globals()[expr.op.op_type](expr, expr.op, args, **kwargs)
            #print("numpy op", expr.op.op_type, "time %.2f" % (time.time()-start))
        res = values[self.program[-1].id]
        t = time.time()-start
        if t > 0.1:
            print("numpy time %.2f" % t)
        return res
 class Builder:
    def __init__(self):
        self.program = []
    def append(self, expr):
        self.program.append(expr)
    def build(self):
        return Eval(self.program)
--- a/easynn_test.cpp
+++ b/easynn_test.cpp
@ -0,0 +1,47 @@
 /**
 * A simple test program helps you to debug your easynn implementation.
 */
 #include <stdio.h>
 #include "src/libeasynn.h"
 int main()
 {
    program *prog = create_program();
    int inputs0[] = {};
    append_expression(prog, 0, "a", "Input", inputs0, 0);
    //int inputs1[] = {0, 0};
    //append_expression(prog, 1, "", "Add", inputs1, 2);
    //int inputs2[] = {2, 2};
    //append_expression(prog, 2, "", "Mul", inputs2, 2);
    //int inputs3[] = {10, 5};
    //append_expression(prog, 3, "", "Sub", inputs3, 2);
    evaluation *eval = build(prog);
    add_kwargs_double(eval, "a", 5);
    // tensor tests
    // it is easier for me 
    // to just compile the library and run the python grading tests than to use this program
    int dim = 0;
    size_t *shape = nullptr;
    double *data = nullptr;
    if (execute(eval, &dim, &shape, &data) != 0)
    {
        printf("evaluation fails\n");
        return -1;
    }
    if (dim == 0)
        printf("res = %f\n", data[0]);
    else
        printf("result as tensor is not supported yet\n");
    return 0;
 }
--- a/ece449.code-workspace
+++ b/ece449.code-workspace
@ -0,0 +1,12 @@
 {
 	"folders": [
 		{
 			"path": "."
 		}
 	],
 	"settings": {
 		"files.associations": {
 			"array": "cpp"
 		}
 	}
 }
--- a/grade.py
+++ b/grade.py
@ -0,0 +1,30 @@
 import traceback
 import multiprocessing
 def grade_all(name, begin, end, funcs):
    grade = 0
    for q in range(begin, end):
        def func():
            exit(0 if funcs["grade_Q%d" % q]() else 1)
        try:
            p = multiprocessing.Process(target = func)
            p.start()
            p.join(300)
            if p.is_alive():
                p.terminate()
                print("============Q%d timeout!============\n" % q, flush = True)
            elif p.exitcode == 0:
                print("============Q%d passed!============\n" % q, flush = True)
                grade += 1
            else:
                print("============Q%d failed!============\n" % q, flush = True)
        except Exception as e:
            print("============Q%d failed!============" % q, flush = True)
            print(traceback.format_exc())
    print("Grading result: %d functions passed" % grade)
    print("*************************************************************")
    print("* You may receive 0 points unless your code tests correctly *")
    print("* Please commit and push your code at least daily.          *")
    print("* DO NOT CHANGE any grade_px.py for grading                 *")
    print("*************************************************************")
--- a/grade_p1.py
+++ b/grade_p1.py
@ -0,0 +1,104 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import grade
 import prj01
 # create
 def grade_Q1():
    a = prj01.Q1()
    if not hasattr(a, "shape"):
        return False
    if a.shape != (10, 5):
        return False
    return all([a[i, j] == i+j for i in range(10) for j in range(5)])
 # add
 def grade_Q2():
    a = np.random.rand(100, 10)
    b = np.random.rand(100, 10)
    c = prj01.Q2(a, b)
    if not hasattr(c, "shape"):
        return False
    if c.shape != (100, 10):
        return False
    return all([abs(c[i, j]-a[i, j]-b[i, j]) < 1e-9 for i in range(100) for j in range(10)])
 # mul
 def grade_Q3():
    a = np.array([[i for i in range(10)]])
    b = a.transpose()
    c = prj01.Q3(a, b)
    d = prj01.Q3(b, a)
    if not hasattr(c, "shape") or not hasattr(d, "shape"):
        return False
    if c.shape != (1, 1) or d.shape != (10, 10):
        return False
    return c[0, 0] == 285 and all([d[i, j] == i*j for i in range(10) for j in range(10)])
 # max
 def grade_Q4():
    a = np.random.rand(100, 10)
    b = prj01.Q4(a)
    if len(b) != 100:
        return False
    return all([a[i, b[i]] >= a[i, j] for i in range(100) for j in range(10)])
 # solve
 def grade_Q5():
    A = -np.random.rand(100, 100)+np.diag([100]*100)
    b = np.random.rand(100, 1)
    x = prj01.Q5(A, b)
    Ax = prj01.Q3(A, x)
    return np.allclose(Ax, b)
 # a+b
 def grade_Q6():
    a = np.random.rand(100, 10)
    b = np.random.rand(100, 10)
    c = prj01.Q6().compile(golden.Builder())(a = a, b = b)
    return np.allclose(c, prj01.Q2(a, b))
 # a+b*c
 def grade_Q7():
    a = np.random.rand(100, 50)
    b = np.random.rand(100, 10)
    c = np.random.rand(10, 50)
    d = prj01.Q7().compile(golden.Builder())(a = a, b = b, c = c)
    return np.allclose(d, prj01.Q2(a, prj01.Q3(b, c)))
 # Ax+b
 def grade_Q8():
    A = np.random.rand(100, 100)
    x = np.random.rand(100, 1)
    b = np.random.rand(100, 1)
    y = prj01.Q8(A, b).compile(golden.Builder())(x = x)
    return np.allclose(y, prj01.Q2(prj01.Q3(A, x), b))
 # x**n
 def grade_Q9():
    x = np.random.rand(100, 100)
    y = prj01.Q9(8).compile(golden.Builder())(x = x)
    x2 = prj01.Q3(x, x)
    x4 = prj01.Q3(x2, x2)
    x8 = prj01.Q3(x4, x4)
    return np.allclose(y, x8)
 # |x|
 def grade_Q10():
    x = np.random.randn(100, 100)
    y = prj01.Q10().compile(golden.Builder())(x = x)
    return np.allclose(y, np.abs(x))
 grade.grade_all("p1", 1, 11, globals())
--- a/grade_p2.py
+++ b/grade_p2.py
@ -0,0 +1,89 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import easynn_cpp as cpp
 import grade
 def random_kwargs(kwargs):
    return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
 def is_same(p, n, **kwargs):
    e0 = p.compile(golden.Builder())
    e1 = p.compile(cpp.Builder())
    nkwargs = [random_kwargs(kwargs) for i in range(n)]
    return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
 def grade_Q1():
    x = nn.Input("x")
    return is_same(x, 1, x = None)
 def grade_Q2():
    c = nn.Const(np.random.random())
    return is_same(c, 1)
 def grade_Q3():
    x = nn.Input("x")
    c = nn.Const(np.random.random())
    y = x+c
    return is_same(y, 1, x = None)
 def grade_Q4():
    x = nn.Input("x")
    c = nn.Const(np.random.random())
    y = x*c
    return is_same(y, 1, x = None)
 def grade_Q5():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x+y
    return is_same(z, 1, x = None, y = None)
 def grade_Q6():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x*y
    return is_same(z, 1, x = None, y = None)
 def grade_Q7():
    x = nn.Input("x")
    y = nn.Input("y")
    z = nn.Input("z")
    r = x+y*z
    return is_same(r, 1, x = None, y = None, z = None)
 def grade_Q8():
    x = nn.Input("x")
    y = nn.Input("y")
    z = nn.Input("z")
    r = (x-y)*z
    return is_same(r, 1, x = None, y = None, z = None)
 def grade_Q9():
    x = nn.Input("x")
    y = nn.Input("y")
    c = nn.Const(np.random.random())
    z = x*y-x*c-y*c+c*c
    return is_same(z, 10, x = None, y = None)
 def grade_Q10():
    x = nn.Input("x")
    y = nn.Input("y")
    c = nn.Const(np.random.random())
    z = x*y-x*c-y*c+c*c
    return is_same(z, 10, x = None, y = None)
 grade.grade_all("p2", 1, 11, globals())
--- a/grade_p3.py
+++ b/grade_p3.py
@ -0,0 +1,99 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import easynn_cpp as cpp
 import grade
 def random_kwargs(kwargs):
    return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
 def is_same(p, n, **kwargs):
    e0 = p.compile(golden.Builder())
    e1 = p.compile(cpp.Builder())
    nkwargs = [random_kwargs(kwargs) for i in range(n)]
    return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
 def grade_Q1():
    x = nn.Input("x")
    return is_same(x, 1, x = (9,)) and is_same(x, 1, x = (9, 9))
 def grade_Q2():
    c1 = nn.Const(np.random.random((10,)))
    c2 = nn.Const(np.random.random((10, 10)))
    return is_same(c1, 1) and is_same(c2, 1)
 def grade_Q3():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x+y
    return all([
        is_same(z, 1, x = (11,), y = (11,)),
        is_same(z, 1, x = (11, 12), y = (11, 12)),
        is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)),
        is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))])
 def grade_Q4():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x-y
    return all([
        is_same(z, 1, x = (11,), y = (11,)),
        is_same(z, 1, x = (11, 12), y = (11, 12)),
        is_same(z, 1, x = (11, 12, 13), y = (11, 12, 13)),
        is_same(z, 1, x = (11, 12, 13, 14), y = (11, 12, 13, 14))])
 def grade_Q5():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x*y
    return is_same(z, 1, x = (11, 12), y = (12, 13))
 def grade_Q6():
    x = nn.Input("x")
    y = nn.Input("y")
    z = x*y
    return is_same(z, 1, x = None, y = (12, 13)) and is_same(z, 1, x = (11, 12), y = None)
 def grade_Q7():
    x = nn.Input("x")
    y = nn.Input("y")
    z = nn.Input("z")
    r = x+y*z
    return is_same(r, 1, x = (11, 13), y = (11, 12), z = (12, 13))
 def grade_Q8():
    x = nn.Input("x")
    y = nn.Input("y")
    z = nn.Input("z")
    r = (x-y)*z
    return is_same(r, 1, x = (11, 12), y = (11, 12), z = (12, 13))
 def grade_Q9():
    x = nn.Input("x")
    y = nn.Input("y")
    z = nn.Input("z")
    r = x*y*z
    return is_same(r, 1, x = (11, 12), y = (12, 13), z = (13, 14))
 def grade_Q10():
    x = nn.Input("x")
    y = nn.Input("y")
    c1 = nn.Const(np.random.random((11, 12)))
    c2 = nn.Const(np.random.random((12, 13)))
    z = x*y-x*c2-c1*y+c1*c2
    return is_same(z, 1, x = (11, 12), y = (12, 13))
 grade.grade_all("p3", 1, 11, globals())
--- a/grade_p4.py
+++ b/grade_p4.py
@ -0,0 +1,151 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import easynn_cpp as cpp
 import grade
 def random_kwargs(kwargs):
    return {k: np.random.random(shape) if shape != None else np.random.random() for k, shape in kwargs.items()}
 def is_same(p, n, **kwargs):
    e0 = p.compile(golden.Builder())
    e1 = p.compile(cpp.Builder())
    nkwargs = [random_kwargs(kwargs) for i in range(n)]
    return all([np.allclose(e0(**nkwargs[i]), e1(**nkwargs[i])) for i in range(n)])
 def grade_Q1():
    relu = nn.ReLU()
    x = relu(nn.Input("x"))
    return is_same(x, 1, x = (10, 11, 12, 13))
 def grade_Q2():
    flatten = nn.Flatten()
    x = flatten(nn.Input("x"))
    return is_same(x, 1, x = (10, 11, 12, 13))
 def grade_Q3():
    x = nn.Input2d("images", 10, 11, 3)
    return is_same(x, 1, images = (50, 10, 11, 3))
 def grade_Q4():
    f = nn.Linear("f", 100, 10)
    x = f(nn.Input("x"))
    x.resolve({
        "f.weight": np.random.random((10, 100)),
        "f.bias": np.random.random((10,))})
    return is_same(x, 1, x = (50, 100))
 def grade_Q5():
    pool = nn.MaxPool2d(3, 3)
    x = pool(nn.Input2d("x", 12, 15, 3))
    return is_same(x, 1, x = (10, 12, 15, 3))
 def grade_Q6():
    c = nn.Conv2d("c", 3, 16, 5)
    x = c(nn.Input2d("x", 15, 20, 3))
    x.resolve({
        "c.weight": np.random.random((16, 3, 5, 5)),
        "c.bias": np.random.random((16,))
    })
    return is_same(x, 1, x = (10, 15, 20, 3))
 def grade_Q7():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 100)
    f2 = nn.Linear("f2", 100, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("msimple_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"][:20]
    infer0 = x.compile(golden.Builder())
    infer1 = x.compile(cpp.Builder())
    logit0 = infer0(images = images)
    logit1 = infer1(images = images)
    return np.allclose(logit0, logit1)
 def grade_Q8():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 100)
    f2 = nn.Linear("f2", 100, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("msimple_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"][:1000]
    infer0 = x.compile(golden.Builder())
    infer1 = x.compile(cpp.Builder())
    label0 = infer0(images = images).argmax(axis = 1)
    label1 = infer1(images = images).argmax(axis = 1)
    return np.allclose(label0, label1)
 def grade_Q9():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26
    c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24
    x = pool(relu(c2(relu(c1(x))))) # 24->12
    c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10
    c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8
    x = pool(relu(c4(relu(c3(x))))) # 8->4
    f = nn.Linear("f", 16*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("mnist_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"][:20]
    infer0 = x.compile(golden.Builder())
    infer1 = x.compile(cpp.Builder())
    logit0 = infer0(images = images)
    logit1 = infer1(images = images)
    return np.allclose(logit0, logit1)
 def grade_Q10():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 3) # 28->26
    c2 = nn.Conv2d("c2", 8, 8, 3) # 26->24
    x = pool(relu(c2(relu(c1(x))))) # 24->12
    c3 = nn.Conv2d("c3", 8, 16, 3) # 12->10
    c4 = nn.Conv2d("c4", 16, 16, 3) # 10->8
    x = pool(relu(c4(relu(c3(x))))) # 8->4
    f = nn.Linear("f", 16*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("mnist_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"][:1000]
    infer0 = x.compile(golden.Builder())
    infer1 = x.compile(cpp.Builder())
    label0 = infer0(images = images).argmax(axis = 1)
    label1 = infer1(images = images).argmax(axis = 1)
    return np.allclose(label0, label1)
 grade.grade_all("p4", 1, 11, globals())
--- a/grade_p5.py
+++ b/grade_p5.py
@ -0,0 +1,79 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import grade
 def grade_Q1():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 32)
    f2 = nn.Linear("f2", 32, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("p5_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    print(count)
    return count > 8500
 def grade_Q2():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 32)
    f2 = nn.Linear("f2", 32, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("p5_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9000
 def grade_Q3():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 32)
    f2 = nn.Linear("f2", 32, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("p5_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9300
 def grade_Q4():
    relu = nn.ReLU()
    flatten = nn.Flatten()
    f1 = nn.Linear("f1", 28*28, 32)
    f2 = nn.Linear("f2", 32, 10)
    x = nn.Input2d("images", 28, 28, 1)
    x = flatten(x)
    x = f2(relu(f1(x)))
    x.resolve(np.load("p5_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9500
 grade.grade_all("p5", 1, 5, globals())
--- a/grade_p6.py
+++ b/grade_p6.py
@ -0,0 +1,91 @@
 import numpy as np
 import easynn as nn
 import easynn_golden as golden
 import grade
 def grade_Q1():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
    x = pool(relu(c1(x))) # 24->12
    c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
    x = pool(relu(c2(x))) # 8->4
    f = nn.Linear("f", 8*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("p6_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    print(count)
    return count > 8500
 def grade_Q2():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
    x = pool(relu(c1(x))) # 24->12
    c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
    x = pool(relu(c2(x))) # 8->4
    f = nn.Linear("f", 8*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("p6_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9000
 def grade_Q3():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
    x = pool(relu(c1(x))) # 24->12
    c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
    x = pool(relu(c2(x))) # 8->4
    f = nn.Linear("f", 8*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("p6_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9300
 def grade_Q4():
    pool = nn.MaxPool2d(2, 2)
    relu = nn.ReLU()
    flatten = nn.Flatten()
    x = nn.Input2d("images", 28, 28, 1)
    c1 = nn.Conv2d("c1", 1, 8, 5) # 28->24
    x = pool(relu(c1(x))) # 24->12
    c2 = nn.Conv2d("c2", 8, 8, 5) # 12->8
    x = pool(relu(c2(x))) # 8->4
    f = nn.Linear("f", 8*4*4, 10)
    x = f(flatten(x))
    x.resolve(np.load("p6_params.npz"))
    mnist_test = np.load("mnist_test.npz")
    images = mnist_test["images"]
    labels = mnist_test["labels"]
    infer = x.compile(golden.Builder())
    pred_labels = infer(images = images).argmax(axis = 1)
    count = sum(labels == pred_labels)
    return count > 9500
 grade.grade_all("p6", 1, 5, globals())
--- a/mnist_params.npz
+++ b/mnist_params.npz
--- a/mnist_test.npz
+++ b/mnist_test.npz
--- a/mnist_train.npz
+++ b/mnist_train.npz
--- a/msimple_params.npz
+++ b/msimple_params.npz
--- a/p5_params.npz
+++ b/p5_params.npz
--- a/prj01.py
+++ b/prj01.py
@ -0,0 +1,76 @@
 import numpy as np
 import easynn as nn
 # Create a numpy array of 10 rows and 5 columns.
 # Set the element at row i and column j to be i+j.
 def Q1():
    array = np.zeros((10, 5))
    for i in range(10):
        for j in range(5):
            array[i, j] = i+j
    return array
 # Add two numpy arrays together.
 def Q2(a, b):
    return a + b
 # Multiply two 2D numpy arrays using matrix multiplication.
 def Q3(a, b):
    return np.matmul(a, b)
 # For each row of a 2D numpy array, find the column index
 # with the maximum element. Return all these column indices.
 def Q4(a):
    return [ np.argmax(row) for row in a ]
 # Solve Ax = b.
 def Q5(A, b):
    return np.linalg.solve(A, b)
 # Return an EasyNN expression for a+b.
 def Q6():
    a = nn.Input('a')
    b = nn.Input('b')
    c = a+b
    return c
 # Return an EasyNN expression for a+b*c.
 def Q7():
    a = nn.Input('a')
    b = nn.Input('b')
    c = nn.Input('c')
    d = a+b*c
    return d
 # Given A and b, return an EasyNN expression for Ax+b.
 def Q8(A, b):
    A_const = nn.Const(A)
    b_const = nn.Const(b)
    x_input = nn.Input("x")
    expr = ((A_const*x_input) + b_const)
    return expr
 # Given n, return an EasyNN expression for x**n.
 def Q9(n):
    x = nn.Input("x")
    n_const = nn.Const(n)
    expr = x
    for _ in range(n - 1):
        expr = expr * x
    return expr
 # the element-wise absolute value |x|.
 def Q10():
    x = nn.Input("x")
    zero = nn.Const(0)
    max = nn.ReLU()(x)
    min = x - max
    abs = max - min
    return abs
--- a/prj05.py
+++ b/prj05.py
@ -0,0 +1,274 @@
 import numpy as np
 import time
 import pdb
 from matplotlib import pyplot
 # save theta to p5_params.npz that can be used by easynn
 def save_theta(theta):
    f1_W, f1_b, f2_W, f2_b = theta
    np.savez_compressed("p5_params.npz", **{
        "f1.weight": f1_W,
        "f1.bias": f1_b,
        "f2.weight": f2_W,
        "f2.bias": f2_b
    })
 # initialize theta using uniform distribution [-bound, bound]
 # return theta as (f1_W, f1_b, f2_W, f2_b)
 def initialize_theta(bound):
    f1_W = np.random.uniform(-bound, bound, (32, 784))
    f1_b = np.random.uniform(-bound, bound, 32)
    f2_W = np.random.uniform(-bound, bound, (10, 32))
    f2_b = np.random.uniform(-bound, bound, 10)
    return (f1_W, f1_b, f2_W, f2_b)
 # forward:
 #   x = Flatten(images)
 #   g = Linear_f1(x)
 #   h = ReLU(g)
 #   z = Linear_f2(h)
 # return (z, h, g, x)
 def forward(images, theta):
    # number of samples
    N = images.shape[0]
    # unpack theta into f1 and f2
    f1_W, f1_b, f2_W, f2_b = theta
    # x = Flatten(images)
    x = images.astype(float).transpose(0,3,1,2).reshape((N, -1))
    # g = Linear_f1(x)
    g = np.zeros((N, f1_b.shape[0]))
    for i in range(N):
        g[i, :] = np.matmul(f1_W, x[i])+f1_b
    # h = ReLU(g)
    h = g*(g > 0)
    # z = Linear_f2(h)
    z = np.zeros((N, f2_b.shape[0]))
    for i in range(N):
        z[i, :] = np.matmul(f2_W, h[i])+f2_b
    return (z, h, g, x)
 # backprop:
 #   J = cross entropy between labels and softmax(z)
 # return nabla_J
 def backprop(labels, theta, z, h, g, x):
    # number of samples
    N = labels.shape[0]
    # unpack theta into f1 and f2
    f1_W, f1_b, f2_W, f2_b = theta
    # nabla_J consists of partial J to partial f1_W, f1_b, f2_W, f2_b
    p_f1_W = np.zeros(f1_W.shape)
    p_f1_b = np.zeros(f1_b.shape)
    p_f2_W = np.zeros(f2_W.shape)
    p_f2_b = np.zeros(f2_b.shape)
    for i in range(N):
        # compute the contribution to nabla_J for sample i
        # cross entropy and softmax
        #   compute partial J to partial z[i]
        #   scale by 1/N for averaging
        expz = np.exp(z[i]-max(z[i]))
        p_z = expz/sum(expz)/N
        p_z[labels[i]] -= 1/N
        # z = Linear_f2(h)
        #   compute partial J to partial h[i]
        #   accumulate partial J to partial f2_W, f2_b
        p_h = np.dot(f2_W.T, p_z)
        p_f2_W += np.outer(p_z, h[i])
        p_f2_b += p_z
        # h = ReLU(g)
        #   compute partial J to partial g[i]
        p_g = p_h * (g[i] > 0)
        # g = Linear_f1(x)
        #   accumulate partial J to partial f1_W, f1_b
        p_f1_W += np.outer(p_g, x[i])
        p_f1_b += p_g
    return (p_f1_W, p_f1_b, p_f2_W, p_f2_b)
 # apply SGD to update theta by nabla_J and the learning rate epsilon
 # return updated theta
 def update_theta(theta, nabla_J, epsilon):
    # ToDo: modify code below as needed
    #updated_theta = theta
    #return updated_theta
    f1_W, f1_b, f2_W, f2_b = theta
    p_f1_W, p_f1_b, p_f2_W, p_f2_b = nabla_J
    # update the weights and biases for the first layer (f1)
    f1_W_updated = f1_W - epsilon * p_f1_W
    f1_b_updated = f1_b - epsilon * p_f1_b
    # update the weights and biases for the second layer (f2)
    f2_W_updated = f2_W - epsilon * p_f2_W
    f2_b_updated = f2_b - epsilon * p_f2_b
    return (f1_W_updated, f1_b_updated, f2_W_updated, f2_b_updated)
 def print_training_hyperparams_for_session(epsilon, batch_size, bound):
    print("Starting training session with 10 epochs:")
    print("")
    print("Hyperparameters:") 
    print(f"epsilon: {epsilon}")
    print(f"bound: {bound}")
    print(f"batch_size: {batch_size}")
    print("")
    print("Results:")
 def plot_epoch(epochs, accuracies, epsilon, batch_size, bound):
    pyplot.figure(figsize=(10, 6))
    pyplot.plot(epochs, accuracies, label=f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}")
    pyplot.xlabel('Epoch')
    pyplot.ylabel('Accuracy')
    pyplot.title('Training Accuracy over Epochs')
    pyplot.legend()
    pyplot.grid(True)
    pyplot.show()
 def plot_all_epochs(training_results):
    pyplot.figure(figsize=(12, 8))
    for epochs, accuracies, epsilon, batch_size, bound in training_results:
        label = f"Epsilon: {epsilon}, Batch Size: {batch_size}, Bound: {bound}"
        pyplot.plot(epochs, accuracies, label=label)
    pyplot.xlabel('Epoch')
    pyplot.ylabel('Accuracy')
    pyplot.title('Training Accuracy over Epochs for Different Hyperparameters')
    pyplot.legend()
    pyplot.grid(True)
    pyplot.show()
 def plot_table(training_results):
    # Setting up the data for the table
    cell_text = []
    columns = ['Epoch', 'Accuracy', 'Epsilon', 'Batch Size', 'Bound']
    for result in training_results:
        epochs, accuracies, epsilon, batch_size, bound = result
        for epoch, accuracy in zip(epochs, accuracies):
            cell_text.append([epoch, f"{accuracy:.3f}", epsilon, batch_size, bound])
    # Determine the figure size needed for the table
    figsize = (10, len(cell_text) * 0.2)
    fig, ax = pyplot.subplots(figsize=figsize)
    ax.axis('tight')
    ax.axis('off')
    # Create the table
    table = ax.table(cellText=cell_text, colLabels=columns, loc='center', cellLoc='center')
    # Adjust table scale
    table.auto_set_font_size(False)
    table.set_fontsize(8)
    table.auto_set_column_width(col=list(range(len(columns))))
    pyplot.show()
 def start_training(epsilon, batch_size, bound, mnist_train):
    # ToDo: set numpy random seed to the last 8 digits of your CWID
    np.random.seed(20497299)
    validation_images = mnist_train["images"][:1000]
    validation_labels = mnist_train["labels"][:1000]
    training_images = mnist_train["images"][1000:]
    training_labels = mnist_train["labels"][1000:]
    # hyperparameters
    # we can experiment with these values to see if increasing or decreasing
    # these values can influence our accuracy
    # default values
    #bound = 1 # initial weight range
    #epsilon = 0.00001 # learning rate
    #print_training_hyperparams_for_session(epsilon, batch_size, bound)
    # start training
    accuracies = []
    epochs = []
    start = time.time()
    theta = initialize_theta(bound)
    batches = training_images.shape[0]//batch_size
    for epoch in range(10): 
        indices = np.arange(training_images.shape[0])
        np.random.shuffle(indices)
        for i in range(batches):
            batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]]
            batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]]
            z, h, g, x = forward(batch_images, theta)
            nabla_J = backprop(batch_labels, theta, z, h, g, x)
            theta = update_theta(theta, nabla_J, epsilon)
        # check accuracy using validation examples
        z, _, _, _ = forward(validation_images, theta)
        pred_labels = z.argmax(axis = 1)
        accuracy = sum(pred_labels == validation_labels) / validation_images.shape[0]
        accuracies.append(accuracy)
        epochs.append(epoch)
        #count = sum(pred_labels == validation_labels)
        print("epoch %d, accuracy %.3f, time %.2f" % (
            epoch, accuracy, time.time()-start))
    #plot_epoch(epochs, accuracies, epsilon, batch_size, bound)
    # save the weights to be submitted
    save_theta(theta)
    # return this data so we can plot it with matplotlib
    return epochs, accuracies
 def main():
    training_results = []
    mnist_train = np.load("mnist_train.npz")
    training_results = []
    # load training data once
    mnist_train = np.load("mnist_train.npz")
    # we can add to this list if we want to test combinations of hyperparameters
    hyperparams = [
        (0.00001, 1, 4), # default params
        (0.00001, 0.1, 4),
        (0.00001, 0.5, 4),
        (0.00001, 0.7, 4),
        (0.00001, 0.01, 4),
        (0.00001, 0.01, 3),
        (0.00001, 0.01, 2),
        (0.000013, 0.012, 1), 
        (0.000013, 0.012002899999999983, 1), 
        (0.000013, 0.01200591999999996, 1), 
    ]
    for epsilon, bound, batch_size in hyperparams:
        epochs, accuracies = start_training(epsilon, batch_size, bound, mnist_train)
        training_results.append((epochs, accuracies, epsilon, batch_size, bound))
    # uncomment if you would like to see plotted results
    #plot_all_epochs(training_results)
    # plot table
    #plot_table(training_results[9])
 if __name__ == '__main__':
    main()
--- a/prj06.py
+++ b/prj06.py
@ -0,0 +1,374 @@
 import numpy as np
 import time
 import random
 import matplotlib.pyplot as plt
 # save theta to p6_params.npz that can be used by easynn
 def save_theta(theta):
    c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
    np.savez_compressed("p6_params.npz", **{
        "c1.weight": c1_W,
        "c1.bias": c1_b,
        "c2.weight": c2_W,
        "c2.bias": c2_b,
        "f.weight": f_W,
        "f.bias": f_b
    })
 # initialize theta using uniform distribution [-bound, bound]
 # return theta as (c1_W, c1_b, c2_W, c2_b, f_W, f_b)
 def initialize_theta(bound):
    c1_W = np.random.uniform(-bound, bound, (8, 1, 5, 5))
    c1_b = np.random.uniform(-bound, bound, 8)
    c2_W = np.random.uniform(-bound, bound, (8, 8, 5, 5))
    c2_b = np.random.uniform(-bound, bound, 8)
    f_W = np.random.uniform(-bound, bound, (10, 128))
    f_b = np.random.uniform(-bound, bound, 10)
    return (c1_W, c1_b, c2_W, c2_b, f_W, f_b)
 # return out_nchw
 def Conv2d(in_nchw, kernel_W, kernel_b):
    OC, IC, KH, KW = kernel_W.shape
    N, C, H, W = in_nchw.shape
    if C != IC or kernel_b.shape != (OC,):
        raise Exception("Conv2d size mismatch: %s @ (%s, %s)" % (
            in_nchw.shape, kernel_W.shape, kernel_b.shape))
    # view in_nchw as a 6D tensor
    shape = (N, IC, H-KH+1, W-KW+1, KH, KW)
    strides = in_nchw.strides+in_nchw.strides[2:]
    data = np.lib.stride_tricks.as_strided(in_nchw,
        shape = shape, strides = strides, writeable = False)
    # np.einsum("nihwyx,oiyx->nohw", data, kernel_W)
    nhwo = np.tensordot(data, kernel_W, ((1,4,5), (1,2,3)))
    return nhwo.transpose(0,3,1,2)+kernel_b.reshape((1, OC, 1, 1))
 # return p_b for the whole batch
 def Conv2d_backprop_b(p_out, in_nchw, kernel_W, kernel_b):
    OC, IC, KH, KW = kernel_W.shape
    N, C, H, W = in_nchw.shape
    if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
        raise Exception("Conv2d_backprop_b size mismatch: %s = %s @ (%s, %s)" % (
            p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
    return np.einsum("nohw->o", p_out, optimize = "optimal")/N
 # return p_W for the whole batch
 def Conv2d_backprop_W(p_out, in_nchw, kernel_W, kernel_b):
    OC, IC, KH, KW = kernel_W.shape
    N, C, H, W = in_nchw.shape
    if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
        raise Exception("Conv2d_backprop_W size mismatch: %s = %s @ (%s, %s)" % (
            p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
    # view in_nchw as a 6D tensor
    shape = (N, IC, KH, KW, H-KH+1, W-KW+1)
    strides = in_nchw.strides+in_nchw.strides[2:]
    data = np.lib.stride_tricks.as_strided(in_nchw,
        shape = shape, strides = strides, writeable = False)
    # np.einsum("nohw,niyxhw->oiyx", p_out, data)
    return np.tensordot(p_out, data, ((0,2,3), (0,4,5)))/N
 # return p_in for the whole batch
 def Conv2d_backprop_in(p_out, in_nchw, kernel_W, kernel_b):
    OC, IC, KH, KW = kernel_W.shape
    N, C, H, W = in_nchw.shape
    if C != IC or kernel_b.shape != (OC,) or p_out.shape != (N, OC, H-KH+1, W-KW+1):
        raise Exception("Conv2d_backprop_in size mismatch: %s = %s @ (%s, %s)" % (
            p_out.shape, in_nchw.shape, kernel_W.shape, kernel_b.shape))
    # view p_out as a padded 6D tensor
    padded = np.zeros((N, OC, H+KH-1, W+KW-1))
    padded[:, :, KH-1:-KH+1, KW-1:-KW+1] = p_out
    shape = (N, IC, H, W, KH, KW)
    strides = padded.strides+padded.strides[2:]
    data = np.lib.stride_tricks.as_strided(padded,
        shape = shape, strides = strides, writeable = False)
    # np.einsum("nohwyx,oiyx->nihw", data, kernel_W)
    nhwi = np.tensordot(data, kernel_W, ((1,4,5), (0,2,3)))
    return nhwi.transpose((0,3,1,2))
 # return out_nchw
 def MaxPool_2by2(in_nchw):
    N, C, H, W = in_nchw.shape
    shape = (N, C, H//2, 2, W//2, 2)
    return np.nanmax(in_nchw.reshape(shape), axis = (3,5))
 # return p_in for the whole batch
 def MaxPool_2by2_backprop(p_out, out_nchw, in_nchw):
    p_in = np.zeros(in_nchw.shape)
    p_in[:, :, 0::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 0::2])
    p_in[:, :, 0::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 0::2, 1::2])
    p_in[:, :, 1::2, 0::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 0::2])
    p_in[:, :, 1::2, 1::2] = p_out*(out_nchw == in_nchw[:, :, 1::2, 1::2])
    return p_in
 # forward:
 #    x = NCHW(images)
 #   cx = Conv2d_c1(x)
 #   rx = ReLU(cx)
 #    y = MaxPool(rx)
 #   cy = Conv2d_c2(hx)
 #   ry = ReLU(cy)
 #    g = MaxPool(ry)
 #    h = Flatten(g)
 #    z = Linear_f(h)
 # return (z, h, g, ry, cy, y, rx, cx, x)
 def forward(images, theta):
    # number of samples
    N = images.shape[0]
    # unpack theta into c1, c2, and f
    c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
    # x = NCHW(images)
    x = images.astype(float).transpose(0,3,1,2)
    # cx = Conv2d_c1(x)
    cx = Conv2d(x, c1_W, c1_b)
    # rx = ReLU(cx)
    rx = cx*(cx > 0)
    # y = MaxPool(rx)
    y = MaxPool_2by2(rx)
    # cy = Conv2d_c2(y)
    cy = Conv2d(y, c2_W, c2_b)
    # ry = ReLU(cy)
    ry = cy*(cy > 0)
    # g = MaxPool(ry)
    g = MaxPool_2by2(ry)
    # h = Flatten(g)
    h = g.reshape((N, -1))
    # z = Linear_f(h)
    z = np.zeros((N, f_b.shape[0]))
    for i in range(N):
        z[i, :] = np.matmul(f_W, h[i])+f_b
    return (z, h, g, ry, cy, y, rx, cx, x)
 def backprop(labels, theta, z, h, g, ry, cy, y, rx, cx, x):
    # Number of samples
    N = labels.shape[0]
    # Unpack theta into c1, c2, and f
    c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
    # Initialize gradients for each parameter
    p_c1_W, p_c1_b = np.zeros_like(c1_W), np.zeros_like(c1_b)
    p_c2_W, p_c2_b = np.zeros_like(c2_W), np.zeros_like(c2_b)
    p_f_W, p_f_b = np.zeros_like(f_W), np.zeros_like(f_b)
    # sample-by-sample from z to h
    #p_f_W = np.zeros(f_W.shape)
    #p_f_b = np.zeros(f_b.shape)
    p_h = np.zeros(h.shape)
    # Step 1: Backprop through linear layer
    for i in range(N):
        # Softmax and cross-entropy gradient
        expz = np.exp(z[i] - np.max(z[i]))
        softmax = expz / np.sum(expz)
        softmax[labels[i]] -= 1
        p_z = softmax / N
        # Backprop through linear layer to compute gradients
        p_h[i, :] = np.dot(f_W.T, p_z)
        p_f_W += np.outer(p_h[i, :], p_z).T
        p_f_b += p_z
    # Average the gradients over the batch
    p_f_W /= N
    p_f_b /= N
    # Step 2: Backprop through flatten operation
    p_g = p_h.reshape(g.shape)
    # Step 3: Backprop through MaxPool layer
    p_ry = MaxPool_2by2_backprop(p_g, g, ry)
    # Step 4: Backprop through ReLU layer
    p_cy = (ry > 0) * p_ry
    # Step 5: Backprop through second convolutional layer
    p_c2_W = Conv2d_backprop_W(p_cy, y, c2_W, c2_b)
    p_c2_b = Conv2d_backprop_b(p_cy, y, c2_W, c2_b)
    p_y = Conv2d_backprop_in(p_cy, y, c2_W, c2_b)
    # Step 6: Backprop through first MaxPool layer
    p_rx = MaxPool_2by2_backprop(p_y, y, rx)
    # Step 7: Backprop through first ReLU layer
    p_cx = (rx > 0) * p_rx
    # Step 8: Backprop through first convolutional layer
    p_c1_W = Conv2d_backprop_W(p_cx, x, c1_W, c1_b)
    p_c1_b = Conv2d_backprop_b(p_cx, x, c1_W, c1_b)
    return (p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b)
 # apply SGD to update theta by nabla_J and the learning rate epsilon
 # return updated theta
 def update_theta(theta, nabla_J, epsilon):
    # Unpack the current parameters and their corresponding gradients
    c1_W, c1_b, c2_W, c2_b, f_W, f_b = theta
    p_c1_W, p_c1_b, p_c2_W, p_c2_b, p_f_W, p_f_b = nabla_J
    # Update each parameter by subtracting the gradient times the learning rate
    c1_W_updated = c1_W - epsilon * p_c1_W
    c1_b_updated = c1_b - epsilon * p_c1_b
    c2_W_updated = c2_W - epsilon * p_c2_W
    c2_b_updated = c2_b - epsilon * p_c2_b
    f_W_updated = f_W - epsilon * p_f_W
    f_b_updated = f_b - epsilon * p_f_b
    # Return the updated parameters as a tuple
    updated_theta = (c1_W_updated, c1_b_updated, c2_W_updated, c2_b_updated, f_W_updated, f_b_updated)
    return updated_theta
 def sample_attempts(accuracies, num_samples=10):
    total_attempts = len(accuracies)
    if total_attempts <= num_samples:
        return accuracies
    # Always include first and last attempts
    sampled_indices = [0, total_attempts - 1]
    # Sample additional attempts, excluding the first and last
    additional_samples = random.sample(range(1, total_attempts - 1), num_samples - 2)
    sampled_indices.extend(additional_samples)
    # Sort the indices to maintain chronological order
    sampled_indices.sort()
    # Get the sampled attempts
    sampled_accuracies = [accuracies[i] for i in sampled_indices]
    return sampled_accuracies
 # hyperparameters
 # default params
 #bound = 1 # initial weight range
 #epsilon = 0.00001 # learning rate
 #batch_size = 4
 # start training
 def start_training(mnist_train, bound, epsilon, batch_size):
    print(f"bound: {bound}, epsilon: {epsilon}, batch_size: {batch_size}")
    validation_images = mnist_train["images"][:1000]
    validation_labels = mnist_train["labels"][:1000]
    training_images = mnist_train["images"][1000:21000]
    training_labels = mnist_train["labels"][1000:21000]
    #training_images = mnist_train["images"][1000:11000]
    #training_labels = mnist_train["labels"][1000:11000]
    start = time.time()
    theta = initialize_theta(bound)
    batches = training_images.shape[0]//batch_size
    accuracies = []
    for epoch in range(5):
        indices = np.arange(training_images.shape[0])
        np.random.shuffle(indices)
        for i in range(batches):
            batch_images = training_images[indices[i*batch_size:(i+1)*batch_size]]
            batch_labels = training_labels[indices[i*batch_size:(i+1)*batch_size]]
            z, h, g, ry, cy, y, rx, cx, x = forward(batch_images, theta)
            nabla_J = backprop(batch_labels, theta, z, h, g, ry, cy, y, rx, cx, x)
            theta = update_theta(theta, nabla_J, epsilon)
        # check accuracy using validation examples
        z, _, _, _, _, _, _, _, _ = forward(validation_images, theta)
        pred_labels = z.argmax(axis = 1)
        count = sum(pred_labels == validation_labels)
        accuracy = count/validation_images.shape[0]
        accuracies.append(accuracy)
        print("epoch %d, accuracy %.3f, time %.2f" % (
            epoch, accuracy, time.time()-start))
    # save the weights to be submitted
    save_theta(theta)
    return accuracies
 def plot_accuracies(accuracies):
    plt.figure(figsize=(12, 6))
    for attempt, acc_list in accuracies:
        plt.plot(acc_list, label=f'Attempt {attempt+1}')
    plt.title('Model Accuracy over Epochs for Sampled Attempts')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()
 def main():
    # list of (attempt, accuracies_for_run)
    accuracies = []
    # ToDo: set numpy random seed to the last 8 digits of your CWID
    np.random.seed(20497299)
    # load training data once 
    mnist_train = np.load("mnist_train.npz")
    max_accuracy = 0.93  # Target accuracy
    max_attempts = 999999    # Maximum number of attempts to reach the target accuracy
    # initial starting values
    # notes from experimentation:
    # 90 percent accuracy
    # attempt: 36
    # increase bound/epsilon *= 1.01
    bound = 0.01
    epsilon = 0.0001
    batch_size = 1 
    # 90 percent accuracy if we start at initial starting values. on 36th attempt
    # note: if we start at these below values we do not hit 90. only if we start at 0.01/0.0001 and *= by 1.01
    #bound = 0.014166027560312683
    #epsilon = 0.0001416602756031268
    #batch_size = 1
    attempt = 0
    while attempt < max_attempts:
        print(f"Attempt: {attempt} out of: {max_attempts}")
        accuracies_for_run = start_training(mnist_train, bound, epsilon, batch_size)
        accuracies.append((attempt, accuracies_for_run))
        if any(i >= max_accuracy for i in accuracies_for_run):
            print(f"Reached Max Accuracy: {max_accuracy}")
            print(f"bound: {bound}")
            print(f"epsilon: {epsilon}")
            print(f"batch_size: {batch_size}")
            sampled_accuracies = sample_attempts(accuracies)
            plot_accuracies(sampled_accuracies)
            break
        bound *= 1.0005  # Increase by 0.05%
        epsilon *= 1.0005 # Increase by 0.05%
        #bound *= 1.01 # Increase by 1%
        #epsilon *= 1.01 # Increase by 1%
        attempt += 1
 if __name__ == "__main__":
    main()
--- a/src/eval_add.cpp
+++ b/src/eval_add.cpp
@ -0,0 +1,17 @@
 #include "eval_add.h"
 tensor eval_add::compute(const tensor &a, const tensor &b) {
    tensor c = a + b; 
    return c;
 }
 void eval_add::eval(vars_type &variables, const kwargs_type &kwargs) {
    assert(inputs_.size() == 2);
    auto ita = variables.find(inputs_[0]);
    auto itb = variables.find(inputs_[1]);
    variables[expr_id_] = compute(ita->second,itb->second);
 }
 std::shared_ptr<eval_op> eval_add::clone(const expression &expr) {
    return std::make_shared<eval_add>(expr);
 }
--- a/src/eval_add.h
+++ b/src/eval_add.h
@ -0,0 +1,21 @@
 #ifndef EVAL_ADD_H
 #define EVAL_ADD_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_add: public eval_op {
    tensor compute(const tensor &a, const tensor &b);
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_add() {}
    eval_add(const expression &expr): eval_op(expr) {}
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Add") == proto_map.end());
        proto_map["Add"] = std::make_shared<eval_add>(); // where is expr?
    }
 }; // class eval_add
 #endif
--- a/src/eval_const.cpp
+++ b/src/eval_const.cpp
@ -0,0 +1,17 @@
 #include "eval_const.h"
 eval_const::eval_const(const expression &expr): eval_op(expr) {
    auto params = expr.get_op_param_tensor("value");
    value_ = params;
    // value_.print(); // uncomment for viewing logging details of tensor object
 }
 void eval_const::eval(vars_type &variables, const kwargs_type &kwargs) {
    variables[expr_id_] = value_;
    // value_.print(); // uncomment for viewing logging details of tensor object
 }
 std::shared_ptr<eval_op> eval_const::clone(const expression &expr) {
    return std::make_shared<eval_const>(expr);
 }
--- a/src/eval_const.h
+++ b/src/eval_const.h
@ -0,0 +1,21 @@
 #ifndef EVAL_CONST_H
 #define EVAL_CONST_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_const: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_const() {}
    eval_const(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Const") == proto_map.end());
        proto_map["Const"] = std::make_shared<eval_const>(); // where is expr?
    }
 }; // class eval_const
 #endif
--- a/src/eval_conv2d.cpp
+++ b/src/eval_conv2d.cpp
@ -0,0 +1,36 @@
 #include "eval_conv2d.h"
 eval_conv2d::eval_conv2d(const expression &expr): eval_op(expr), 
      in_channels(expr.get_op_param_int("in_channels")),
      kernel_size(expr.get_op_param_int("kernel_size")),
      out_channels(expr.get_op_param_int("out_channels")),
      padding(expr.get_op_param_double("padding")),
      weight(expr.get_op_param_tensor("weight")),
      bias(expr.get_op_param_tensor("bias")),
      stride(expr.get_op_param_int("stride"))
    {
    }
 void eval_conv2d::eval(vars_type &variables, const kwargs_type &kwargs) {
    if (inputs_.empty()) {
        throw std::runtime_error("No input tensor for eval_conv2d.");
    }
    // Get the input tensor from variables using the expression ID of the input
    int inputExprId = inputs_[0];
    if (variables.find(inputExprId) == variables.end()) {
        throw std::runtime_error("Input tensor not found in variables map.");
    }
    tensor input_tensor = variables.at(inputExprId);
    // Perform the Conv2D operation
    tensor conv2d_tensor = Conv2D(input_tensor, weight, bias, in_channels, out_channels, kernel_size);
    // Store the result in the variables map using the current expression ID
    variables[expr_id_] = conv2d_tensor;
 }
 std::shared_ptr<eval_op> eval_conv2d::clone(const expression &expr) {
    return std::make_shared<eval_conv2d>(expr);
 }
--- a/src/eval_conv2d.h
+++ b/src/eval_conv2d.h
@ -0,0 +1,38 @@
 #ifndef EVAL_CONV2D_H
 #define EVAL_CONV2D_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_conv2d: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_conv2d () {}
    eval_conv2d(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Conv2d") == proto_map.end());
        proto_map["Conv2d"] = std::make_shared<eval_conv2d>(); // where is expr?
    }
 private:
    int in_channels;
    int kernel_size;
    int out_channels;
    double padding;
    tensor weight;
    tensor bias;
    int stride;
    /*
    tensor weight;
    tensor bias;
    int kernel_size;
    int stride;
    int in_channels;
    int out_channels;
    double padding;
    */
 }; // class eval_conv2d
 #endif
--- a/src/eval_flatten.cpp
+++ b/src/eval_flatten.cpp
@ -0,0 +1,27 @@
 #include "eval_flatten.h"
 eval_flatten::eval_flatten(const expression &expr): eval_op(expr) {
 }
 void eval_flatten::eval(vars_type &variables, const kwargs_type &kwargs) {
   // Check that inputs_ is not empty
   assert(!inputs_.empty());
   // Retrieve the tensor from variables using the first input expression ID
   const int inputExprId = inputs_[0];
   tensor t = variables.at(inputExprId);
   // For debugging: print the input tensor
   std::cout << "Input Tensor for Flatten (expr_id " << inputExprId << "):" << std::endl;
   //t.print();
   // Perform the Flatten operation
   tensor flattenResult = Flatten(t);
   // Store the result in variables under the key expr_id_
   variables[expr_id_] = flattenResult;
 }
 std::shared_ptr<eval_op> eval_flatten::clone(const expression &expr) {
    return std::make_shared<eval_flatten>(expr);
 }
--- a/src/eval_flatten.h
+++ b/src/eval_flatten.h
@ -0,0 +1,21 @@
 #ifndef EVAL_FLATTEN_H
 #define EVAL_FLATTEN_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_flatten: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_flatten () {}
    eval_flatten(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Flatten") == proto_map.end());
        proto_map["Flatten"] = std::make_shared<eval_flatten>(); // where is expr?
    }
 }; // class eval_flatten
 #endif
--- a/src/eval_input.cpp
+++ b/src/eval_input.cpp
@ -0,0 +1,13 @@
 #include "eval_input.h"
 eval_input::eval_input(const expression &expr): eval_op(expr) {
 }
 void eval_input::eval(vars_type &variables, const kwargs_type &kwargs) {
   variables[expr_id_] = kwargs.at(op_name_);
 }
 std::shared_ptr<eval_op> eval_input::clone(const expression &expr) {
    return std::make_shared<eval_input>(expr);
 }
--- a/src/eval_input.h
+++ b/src/eval_input.h
@ -0,0 +1,21 @@
 #ifndef EVAL_INPUT_H
 #define EVAL_INPUT_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_input: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_input () {}
    eval_input(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Input") == proto_map.end());
        proto_map["Input"] = std::make_shared<eval_input>(); // where is expr?
    }
 }; // class eval_input
 #endif
--- a/src/eval_input2d.cpp
+++ b/src/eval_input2d.cpp
@ -0,0 +1,23 @@
 #include "eval_input2d.h"
 eval_input2d::eval_input2d(const expression &expr): eval_op(expr), height(expr.get_op_param_int("height")), width(expr.get_op_param_int("width")) {
 }
 void eval_input2d::eval(vars_type &variables, const kwargs_type &kwargs) {
   // Retrieve the tensor directly from kwargs using op_name_
   tensor t = kwargs.at(op_name_);
   // For debugging: print the input tensor
   std::cout << "Input Tensor for Input2d (op_name " << op_name_ << "):" << std::endl;
   //t.print();
   // Perform the Input2d operation
   tensor input2dResult = Input2d(t);
   // Store the result in variables under the key expr_id_
   variables[expr_id_] = input2dResult;
 }
 std::shared_ptr<eval_op> eval_input2d::clone(const expression &expr) {
    return std::make_shared<eval_input2d>(expr);
 }
--- a/src/eval_input2d.h
+++ b/src/eval_input2d.h
@ -0,0 +1,24 @@
 #ifndef EVAL_INPUT2D_H
 #define EVAL_INPUT2D_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_input2d: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_input2d () {}
    eval_input2d(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Input2d") == proto_map.end());
        proto_map["Input2d"] = std::make_shared<eval_input2d>(); // where is expr?
    }
 private:
    int height;
    int width;
 }; // class eval_input2d
 #endif
--- a/src/eval_linear.cpp
+++ b/src/eval_linear.cpp
@ -0,0 +1,27 @@
 #include "eval_linear.h"
 eval_linear::eval_linear(const expression &expr): eval_op(expr),weight(expr.get_op_param_tensor("weight")),bias(expr.get_op_param_tensor("bias")) {
 }
 void eval_linear::eval(vars_type &variables, const kwargs_type &kwargs) {
    if (inputs_.empty()) {
        throw std::runtime_error("No input tensor for eval_linear.");
    }
    int inputExprId = inputs_[0];
    if (variables.find(inputExprId) == variables.end()) {
        throw std::runtime_error("Input tensor not found in variables map.");
    }
    tensor x = variables.at(inputExprId);
    // Now you have x, weight, and bias available to perform the Linear operation
    tensor linearResult = Linear(weight, x, bias);
    // Process the result...
    variables[expr_id_] = linearResult;
 }
 std::shared_ptr<eval_op> eval_linear::clone(const expression &expr) {
    return std::make_shared<eval_linear>(expr);
 }
--- a/src/eval_linear.h
+++ b/src/eval_linear.h
@ -0,0 +1,24 @@
 #ifndef EVAL_LINEAR_H
 #define EVAL_LINEAR_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_linear: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_linear () {}
    eval_linear(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Linear") == proto_map.end());
        proto_map["Linear"] = std::make_shared<eval_linear>(); // where is expr?
    }
 private:
    tensor weight; // Add weight tensor
    tensor bias;   // Add bias tensor
 }; // class eval_linear
 #endif
--- a/src/eval_maxpool2d.cpp
+++ b/src/eval_maxpool2d.cpp
@ -0,0 +1,32 @@
 #include "eval_maxpool2d.h"
 eval_maxpool2d::eval_maxpool2d(const expression &expr): eval_op(expr), kernel_size(expr.get_op_param_int("kernel_size")), stride(expr.get_op_param_int("stride")){
    // Retrieve kernel size and stride from the expression
    //int kernel_size = expr.get_ker_and_str_value("kernel_size");
    //int stride = expr.get_ker_and_str_value("stride");
 }
 void eval_maxpool2d::eval(vars_type &variables, const kwargs_type &kwargs) {
    // Ensure that there is at least one input (the input tensor)
    if (inputs_.empty()) {
        throw std::runtime_error("No input tensor for eval_maxpool2d.");
    }
    int inputExprId = inputs_[0];
    if (variables.find(inputExprId) == variables.end()) {
        throw std::runtime_error("Input tensor not found in variables map.");
    }
    // Get the input tensor
    tensor input_tensor = variables.at(inputExprId);
    // Perform the max pooling operation
    tensor pooled_tensor = MaxPool2d(input_tensor, kernel_size, stride);
    // Store the result in the variables map using the expression ID
    variables[expr_id_] = pooled_tensor;
 }
 std::shared_ptr<eval_op> eval_maxpool2d::clone(const expression &expr) {
    return std::make_shared<eval_maxpool2d>(expr);
 }
--- a/src/eval_maxpool2d.h
+++ b/src/eval_maxpool2d.h
@ -0,0 +1,24 @@
 #ifndef EVAL_MAXPOOL2D_H
 #define EVAL_MAXPOOL2D_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_maxpool2d: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_maxpool2d () {}
    eval_maxpool2d(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("MaxPool2d") == proto_map.end());
        proto_map["MaxPool2d"] = std::make_shared<eval_maxpool2d>(); // where is expr?
    }
 private:
    int kernel_size;
    int stride;
 }; // class eval_linear
 #endif
--- a/src/eval_mul.cpp
+++ b/src/eval_mul.cpp
@ -0,0 +1,20 @@
 #include "eval_mul.h"
 tensor eval_mul::compute(const tensor &a, const tensor &b) {
    // these use our tensor operator* overload
    tensor c = a * b; 
    return c;
 }
 void eval_mul::eval(vars_type &variables, const kwargs_type &kwargs) {
    assert(inputs_.size() == 2);
    auto ita = variables.find(inputs_[0]);
    auto itb = variables.find(inputs_[1]);
    // handle errors for ita and itb
    // we handle these in the operator* override instead of here
    variables[expr_id_] = compute(ita->second,itb->second);
 }
 std::shared_ptr<eval_op> eval_mul::clone(const expression &expr) {
    return std::make_shared<eval_mul>(expr);
 }
--- a/src/eval_mul.h
+++ b/src/eval_mul.h
@ -0,0 +1,22 @@
 #ifndef EVAL_MUL_H
 #define EVAL_MUL_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_mul: public eval_op {
    tensor compute(const tensor &a, const tensor &b);
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_mul() {}
    eval_mul(const expression &expr): eval_op(expr) {}
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Mul") == proto_map.end());
        proto_map["Mul"] = std::make_shared<eval_mul>(); // where is expr?
    }
 }; // class eval_mul
 #endif
--- a/src/eval_op.cpp
+++ b/src/eval_op.cpp
@ -0,0 +1,20 @@
 #include "eval_op.h"
 eval_op::eval_op(const expression &expr)
 {
    // set all of our member vars one time using expr getters
    expr_id_ = expr.get_expr_id();
    op_name_ = expr.get_op_name();
    op_type_ = expr.get_op_type();
    inputs_ = expr.get_inputs();
 }
 void eval_op::eval(vars_type & variables, const kwargs_type & kwargs)
 {
    assert(false); // should be provided by derived classes
 }
 eval_op::~eval_op() {
 }
--- a/src/eval_op.h
+++ b/src/eval_op.h
@ -0,0 +1,27 @@
 #ifndef EVAL_OP_H
 #define EVAL_OP_H
 #include <map>
 #include "expression.h"
 #include "assert.h"
 #include <memory>
 // typedef introduces alias for types.
 typedef std::map<int, tensor> vars_type;
 typedef std::map<std::string, tensor> kwargs_type;
 class eval_op {
 protected:
    friend class evaluation;
    eval_op(): expr_id_(-1) {}
    eval_op(const expression &expr);
    int expr_id_;
    std::string op_name_, op_type_;
    std::vector<int> inputs_;
 public:
    virtual ~eval_op();
    virtual void eval(vars_type &variables, const kwargs_type &kwargs) = 0;
    virtual std::shared_ptr<eval_op> clone(const expression &expr) = 0;
 }; //class eval_op
 #endif
--- a/src/eval_op_prototypes.cpp
+++ b/src/eval_op_prototypes.cpp
@ -0,0 +1,46 @@
 #include "eval_op_prototypes.h"
 eval_op_prototypes::eval_op_prototypes() {
    eval_const::store_prototype(proto_map_);
    eval_input::store_prototype(proto_map_);
    eval_add::store_prototype(proto_map_);
    eval_sub::store_prototype(proto_map_);
    eval_mul::store_prototype(proto_map_);
    eval_relu::store_prototype(proto_map_);
    eval_flatten::store_prototype(proto_map_);
    eval_input2d::store_prototype(proto_map_);
    eval_linear::store_prototype(proto_map_);
    eval_maxpool2d::store_prototype(proto_map_);
    eval_conv2d::store_prototype(proto_map_);
    //log_proto_types(); // used this when troubleshooting. uncomment to enable log
 }
 void eval_op_prototypes::log_proto_types() {
    std::cout << "eval_op_prototypes: registering prototypes begin:" << std::endl;
    std::cout << "----------------------" << std::endl;
    for (const auto& pair : proto_map_) {
        std::cout << "eval_op_prototypes: registered prototype: " << pair.first << std::endl;
    }
    std::cout << "----------------------" << std::endl;
    std::cout << "eval_op_prototypes: registering prototypes complete" << std::endl;
 }
 std::shared_ptr<eval_op> eval_op_prototypes::locate(std::string name)
 {
    auto found = proto_map_.find(name);
    if (found != proto_map_.end()) {
        return found->second;
    } else {
        std::string error_message = "eval_op_prototypes: prototype for operator '" + name + "' not found.";
        std::cerr << error_message << std::endl;
        throw std::runtime_error(error_message);
    }
 }
 eval_op_prototypes &eval_op_prototypes::instance()
 {
    static eval_op_prototypes instance; // the only instance
    return instance;
 }
--- a/src/eval_op_prototypes.h
+++ b/src/eval_op_prototypes.h
@ -0,0 +1,25 @@
 #include "eval_op.h"
 #include "eval_input.h"
 #include "eval_input2d.h"
 #include "eval_add.h"
 #include "eval_mul.h"
 #include "eval_sub.h"
 #include "eval_const.h"
 #include "eval_flatten.h"
 #include "eval_relu.h"
 #include "eval_linear.h"
 #include "eval_maxpool2d.h"
 #include "eval_conv2d.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_op_prototypes {
    // prevent creation of additional instances
    eval_op_prototypes(const eval_op_prototypes &)=delete;
    eval_op_prototypes();
    eval_op_proto_map proto_map_;
    void log_proto_types();
 public:
    std::shared_ptr<eval_op> locate(std::string name);
    static eval_op_prototypes &instance(); // access the only instance
 }; // class eval_op_prototypes
--- a/src/eval_relu.cpp
+++ b/src/eval_relu.cpp
@ -0,0 +1,27 @@
 #include "eval_relu.h"
 eval_relu::eval_relu(const expression &expr): eval_op(expr) {
 }
 void eval_relu::eval(vars_type &variables, const kwargs_type &kwargs) {
   // Check that inputs_ is not empty
   assert(!inputs_.empty());
   // Retrieve the tensor from variables using the first input expression ID
   const int inputExprId = inputs_[0];
   tensor t = variables.at(inputExprId);
   // For debugging: print the input tensor
   std::cout << "Input Tensor for ReLU (expr_id " << inputExprId << "):" << std::endl;
   //t.print();
   // Perform the ReLU operation
   tensor reluResult = ReLU(t);
   // Store the result in variables under the key expr_id_
   variables[expr_id_] = reluResult;
 }
 std::shared_ptr<eval_op> eval_relu::clone(const expression &expr) {
    return std::make_shared<eval_relu>(expr);
 }
--- a/src/eval_relu.h
+++ b/src/eval_relu.h
@ -0,0 +1,21 @@
 #ifndef EVAL_RELU_H
 #define EVAL_RELU_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_relu: public eval_op {
    tensor value_;
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_relu () {}
    eval_relu(const expression &expr);
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("ReLU") == proto_map.end());
        proto_map["ReLU"] = std::make_shared<eval_relu>(); // where is expr?
    }
 }; // class eval_relu
 #endif
--- a/src/eval_sub.cpp
+++ b/src/eval_sub.cpp
@ -0,0 +1,18 @@
 #include "eval_sub.h"
 tensor eval_sub::compute(const tensor &a, const tensor &b) {
    // these use our tensor operator- overload
    tensor c = a - b; 
    return c;
 }
 void eval_sub::eval(vars_type &variables, const kwargs_type &kwargs) {
    assert(inputs_.size() == 2);
    auto ita = variables.find(inputs_[0]);
    auto itb = variables.find(inputs_[1]);
    variables[expr_id_] = compute(ita->second,itb->second);
 }
 std::shared_ptr<eval_op> eval_sub::clone(const expression &expr) {
    return std::make_shared<eval_sub>(expr);
 }
--- a/src/eval_sub.h
+++ b/src/eval_sub.h
@ -0,0 +1,22 @@
 #ifndef EVAL_SUB_H
 #define EVAL_SUB_H
 #include "eval_op.h"
 typedef std::map<std::string, std::shared_ptr<eval_op>> eval_op_proto_map;
 class eval_sub: public eval_op {
    tensor compute(const tensor &a, const tensor &b);
    std::shared_ptr<eval_op> clone(const expression &expr) override;
 public:
    eval_sub() {}
    eval_sub(const expression &expr): eval_op(expr) {}
    void eval(vars_type &variables, const kwargs_type &kwargs) override;
    static void store_prototype(eval_op_proto_map &proto_map) {
        assert(proto_map.find("Sub") == proto_map.end());
        proto_map["Sub"] = std::make_shared<eval_sub>(); // where is expr?
    }
 }; // class eval_sub
 #endif
--- a/src/evaluation.cpp
+++ b/src/evaluation.cpp
@ -0,0 +1,60 @@
 #include "evaluation.h"
 #include "eval_op_prototypes.h"
 evaluation::evaluation(const std::vector<expression> &exprs) {
    try {
        std::cout << "Starting evaluation construction with " << exprs.size() << " expressions." << std::endl;
        for (auto &expr: exprs) {
            std::cout << "Processing expression with ID: " << expr.get_expr_id() << std::endl;
            std::shared_ptr<eval_op> p = eval_op_prototypes::instance().locate(expr.get_op_type());
            ops_.push_back(p->clone(expr));
            std::cout << "Expression with ID: " << expr.get_expr_id() << " processed successfully." << std::endl;
 }
        std::cout << "Evaluation construction completed successfully." << std::endl;
    } catch (const std::runtime_error& e) {
        std::cerr << "Failed to construct evaluation: " << e.what() << std::endl;
    }
 }
 void evaluation::add_kwargs_double(
    const char *key,
    double value)
 {
    tensor scalar(value);
    kwargs_[key] = scalar;
 }
 void evaluation::add_kwargs_ndarray(
    const char *key,
    int dim,
    size_t shape[],
    double data[])
 {
    tensor ndarray(dim, shape, data);
    kwargs_[key] = ndarray;
 }
 tensor &evaluation::get_result() 
 { 
    if (!variables_.empty()) {
        int last_expr_id = std::prev(variables_.end())->first;
        std::cout << "Returning result from the last expression with ID: " << last_expr_id << std::endl;
        return variables_[last_expr_id];
    }
    throw std::runtime_error("No result available."); 
 }
 int evaluation::execute()
 {
    std::cout << "Executing evaluation with " << ops_.size() << " operations.\n";
    variables_.clear();
    for (auto &op : ops_) {
        std::cout << "Evaluating operation of type: " << typeid(*op).name() << "\n";
        op->eval(variables_, kwargs_);
        std::cout << "Operation evaluation complete. Variables size: " << variables_.size() << "\n";
    }
    std::cout << "Evaluation complete. Total variables evaluated: " << variables_.size() << "\n";
    return 0;
 }
--- a/src/evaluation.h
+++ b/src/evaluation.h
@ -0,0 +1,33 @@
 #ifndef EVALUATION_H
 #define EVALUATION_H
 #include "expression.h"
 #include "eval_op.h"
 class evaluation
 {
 public:
    evaluation(const std::vector<expression> &exprs);
    void add_kwargs_double(
        const char *key,
        double value);
    void add_kwargs_ndarray(
        const char *key,
        int dim,
        size_t shape[],
        double data[]);
    // return 0 for success
    int execute();
    tensor &get_result();
 private:
    std::vector<std::shared_ptr<eval_op>> ops_; // instead of exprs_
    std::map<int, tensor> variables_;
    std::map<std::string, tensor> kwargs_;
 }; // class evaluation
 #endif // EVALUATION_H
--- a/src/expression.cpp
+++ b/src/expression.cpp
@ -0,0 +1,112 @@
 #include "expression.h"
 #include "evaluation.h"
 expression::expression(
    int expr_id,
    const char *op_name,
    const char *op_type,
    int *inputs,
    int num_inputs)
    : expr_id_(expr_id), op_name_(op_name), op_type_(op_type)
 {
    // initialize inputs_ with the given inputs from our expression.
    for (int i = 0; i < num_inputs; ++i)
    {
        inputs_.push_back(inputs[i]);
    }
 }
 // getter methods
 int expression::get_expr_id() const
 {
    return expr_id_;
 }
 const std::string& expression::get_op_name() const
 {
    return op_name_;
 }
 const std::string& expression::get_op_type() const
 {
    return op_type_;
 }
 const std::vector<int>& expression::get_inputs() const
 {
    return inputs_;
 }
 tensor expression::get_op_param_tensor(const std::string& key) const {
        auto it = op_params_tensor_.find(key);
        if (it != op_params_tensor_.end()) {
            return it->second;
        } else {
            throw std::out_of_range("No operation parameter tensor found for key: " + key);
        }
 }
 double expression::get_op_param_double(const std::string& key) const {
    auto it = op_params_double_.find(key);
    if (it != op_params_double_.end()) {
        return it->second;
    } else {
        return 0.0;
    }
 }
 int expression::get_op_param_int(const std::string& key) const {
    auto it = op_params_int_.find(key);
    if (it != op_params_int_.end()) {
        return it->second;
    } else {
        return 0;
    }
 }
 void expression::add_op_param_double(
    const char *key,
    double value)
 {
    std::cout << "Adding operation parameter (double) with key: " << key << std::endl;
    const char *w = "weight"; 
    const char *b = "bias"; 
    const char *v = "value"; // for backwards compatibility with p3 tests
    const char *k = "kernel_size";
    const char *s = "stride";
    const char *i = "in_channels";
    const char *o = "out_channels";
    const char *p = "padding";
    if((*key) == (*w)|| (*key) == (*b) || (*key) == (*v)){ // handle weight, bias, value tensor values
        op_params_tensor_[key] = tensor(value);
    } else if ((*key) == (*k) || (*key) == (*s) || (*key) == (*i) || (*key) == (*o)) { // handle kernel_size, stride, in_channels, out_channels int values
        op_params_int_[key] = int(value);
    } else if ((*key) == (*p)){ // handle padding double value
        op_params_double_[key] = value;
    }else{
        op_params_tensor_[key] = tensor(value);
    }
 }
 void expression::add_op_param_ndarray(
    const char *key,
    int dim,
    size_t shape[],
    double data[])
 {
    std::cout << "Adding operation parameter (ndarray) with key: " << key << std::endl;
    const char *w = "weight"; 
    const char *b = "bias"; 
  if((*key) == (*w)|| (*key) == (*b) ){
    op_params_tensor_[key] = tensor(dim, shape, data);
  }else{
    op_params_tensor_[key] = tensor(dim, shape, data);
  }
 }
--- a/src/expression.h
+++ b/src/expression.h
@ -0,0 +1,51 @@
 #ifndef EXPRESSION_H
 #define EXPRESSION_H
 #include <vector>
 #include <string>
 #include <map>
 #include "tensor.h"
 class evaluation;
 class expression
 {
    friend class evaluation;
 public:
    expression(
        int expr_id,
        const char *op_name,
        const char *op_type,
        int *inputs,
        int num_inputs);
    // Getter methods for expression attributes
    int get_expr_id() const;
    const std::string& get_op_name() const;
    const std::string& get_op_type() const;
    const std::vector<int>& get_inputs() const;
    tensor get_op_param_tensor(const std::string& key) const;
    double get_op_param_double(const std::string& key) const;
    int get_op_param_int(const std::string& key) const;
    void add_op_param_double(
        const char *key,
        double value);
    void add_op_param_ndarray(
        const char *key,
        int dim,
        size_t shape[],
        double data[]);
 private:
    int expr_id_;
    std::string op_name_;
    std::string op_type_;
    std::vector<int> inputs_;
    std::map<std::string, tensor> op_params_tensor_;
    std::map<std::string,double> op_params_double_;
    std::map<std::string,int> op_params_int_;
 }; // class expression
 #endif // EXPRESSION_H
--- a/src/libeasynn.cpp
+++ b/src/libeasynn.cpp
@ -0,0 +1,109 @@
 #include <stdio.h>
 #include "libeasynn.h"
 #include "program.h"
 #include "evaluation.h"
 program *create_program()
 {
    program *prog = new program;
    printf("created_program: program %p\n", prog);
    return prog;
 }
 void append_expression(
    program *prog,
    int expr_id,
    const char *op_name,
    const char *op_type,
    int inputs[],
    int num_inputs)
 {
    printf("append_expression: program %p, expr_id %d, op_name %s, op_type %s, inputs %d (",
        prog, expr_id, op_name, op_type, num_inputs);
    for (int i = 0; i != num_inputs; ++i)
        printf("%d,", inputs[i]);
    printf(")\n");
    prog->append_expression(expr_id, op_name, op_type, inputs, num_inputs);
 }
 int add_op_param_double(
    program *prog,
    const char *key,
    double value)
 {
    printf("program %p, key %s, value %f\n",
        prog, key, value);
    return prog->add_op_param_double(key, value);
 }
 int add_op_param_ndarray(
    program *prog,
    const char *key,
    int dim,
    size_t shape[],
    double data[])
 {
    printf("program %p, key %s, value %p dim %d (",
        prog, key, data, dim);
    for (int i = 0; i != dim; ++i)
        printf("%zu,", shape[i]);
    printf(")\n");
    return prog->add_op_param_ndarray(key, dim, shape, data);
 }
 evaluation *build(program *prog)
 {
    evaluation *eval = prog->build();
    printf("evaluation %p\n", eval);
    return eval;
 }
 void add_kwargs_double(
    evaluation *eval,
    const char *key,
    double value)
 {
    printf("evaluation %p, key %s, value %f\n",
        eval, key, value);
    eval->add_kwargs_double(key, value);
 }
 void add_kwargs_ndarray(
    evaluation *eval,
    const char *key,
    int dim,
    size_t shape[],
    double data[])
 {
    printf("evaluation %p, key %s, value %p dim %d (",
        eval, key, data, dim);
    for (int i = 0; i != dim; ++i)
        printf("%zu,", shape[i]);
    printf(")\n");
    eval->add_kwargs_ndarray(key, dim, shape, data);
 }
 int execute(
    evaluation *eval,
    int *p_dim,
    size_t **p_shape,
    double **p_data)
 {
    printf("evaluation %p, p_dim %p, p_shape %p, p_data %p\n",
        eval, p_dim, p_shape, p_data);
    int ret = eval->execute();
    if (ret != 0) {
        printf("execution failed with return code %d\n", ret);
        return ret;
    }
    tensor &res = eval->get_result();
    *p_dim = res.get_dim();
    *p_shape = res.get_shape_array();
    *p_data = res.get_data_array();
    //res.print(); // uncomment to view tensor data output
    printf("execution completed succesfully. result: dim=%d\n",*p_dim);
    fflush(stdout);
    return 0;
 }
--- a/src/libeasynn.h
+++ b/src/libeasynn.h
@ -0,0 +1,63 @@
 #ifndef LIBEASYNN_H
 #define LIBEASYNN_H
 class program;
 class evaluation;
 // return program handle
 extern "C"
 program *create_program();
 extern "C"
 void append_expression(
    program *prog,
    int expr_id,
    const char *op_name,
    const char *op_type,
    int inputs[],
    int num_inputs);
 // return 0 for success
 extern "C"
 int add_op_param_double(
    program *prog,
    const char *key,
    double value);
 // return 0 for success
 extern "C"
 int add_op_param_ndarray(
    program *prog,
    const char *key,
    int dim,
    size_t shape[],
    double data[]);
 // return evaluation handle
 extern "C"
 evaluation *build(
    program *prog);
 extern "C"
 void add_kwargs_double(
    evaluation *eval,
    const char *key,
    double value);
 extern "C"
 void add_kwargs_ndarray(
    evaluation *eval,
    const char *key,
    int dim,
    size_t shape[],
    double data[]);
 // return 0 for success
 extern "C"
 int execute(
    evaluation *eval,
    int *p_dim,
    size_t **p_shape,
    double **p_data);
 #endif // LIBEASYNN_H
--- a/src/program.cpp
+++ b/src/program.cpp
@ -0,0 +1,47 @@
 #include "program.h"
 #include "evaluation.h"
 program::program()
 {
 }
 void program::append_expression(
    int expr_id,
    const char *op_name,
    const char *op_type,
    int inputs[],
    int num_inputs)
 {
    // create an expression object with the provided parameters
    expression expr(expr_id, op_name, op_type, inputs, num_inputs);
    // append the expression to the expressions vector
    expressions_.push_back(expr);
 }
 int program::add_op_param_double(
    const char *key,
    double value)
 {
    // get end of expressions_ values and send to our add_op_param_double
    expressions_.back().add_op_param_double(key, value);
    return 0;
 }
 int program::add_op_param_ndarray(
    const char *key,
    int dim,
    size_t shape[],
    double data[])
 {
   // get end of expressions_ values and send to our add_op_param_ndarray
   expressions_.back().add_op_param_ndarray(key, dim, shape, data);
   return 0;
 }
 evaluation *program::build()
 {
    evaluation *eval = new evaluation(expressions_);
    return eval;
 }
--- a/src/program.h
+++ b/src/program.h
@ -0,0 +1,38 @@
 #ifndef PROGRAM_H
 #define PROGRAM_H
 #include "expression.h"
 class evaluation;
 class program
 {
 public:
    program();
    void append_expression(
        int expr_id,
        const char *op_name,
        const char *op_type,
        int inputs[],
        int num_inputs);
    // return 0 for success
    int add_op_param_double(
        const char *key,
        double value);
    // return 0 for success
    int add_op_param_ndarray(
        const char *key,
        int dim,
        size_t shape[],
        double data[]);
    evaluation *build();
 private:
    std::vector<expression> expressions_; // Store the expressions.
 }; // class program
 #endif // PROGRAM_H
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@ -0,0 +1,430 @@
 #include "tensor.h"
 #include <assert.h>
 #include <stdexcept>
 #include <algorithm>
 #include <numeric>
 #include <math.h>
 tensor::tensor() {}
 tensor::tensor(double v) {
  shape_.push_back(1);
  data_.push_back(v);
 }
 tensor::tensor(int dim, std::initializer_list<size_t> shape, std::initializer_list<double> data) {
    shape_.assign(shape.begin(), shape.end());
    data_.assign(data.begin(), data.end());
    dim_ = dim;
 }
 size_t tensor::getTotalSize() const {
  size_t totalSize = 1;
  for (size_t dim : shape_) {
    if (totalSize > std::numeric_limits<size_t>::max() / dim)
      throw std::overflow_error("Size overflow detected");
    totalSize *= dim;
  }
  return totalSize;
 }
 tensor::tensor(int dim, size_t shape[], double data[]):
    shape_(shape, shape+dim) {
    dim_ = dim;
    // calc N has shape[0]*shape[1]*...*shape[dim-1]
    // calc N as the product of elements in the shape array
    size_t N = 1;
    for (int i=0; i < dim; ++i) {
        N = shape[i] * N;
    }
    data_.assign(data, data+N);
 }
 int tensor::get_dim() const {
    return shape_.size();
 }
 double tensor::item() const {
    if (shape_.empty()) {
        return data_[0];
    } else {
        throw std::runtime_error("item() called on a non-scalar tensor.");
    }
 }
 double &tensor::item() {
    if (shape_.empty()) {
        return data_[0];
    } else {
        throw std::runtime_error("item() called on a non-scalar tensor.");
    }
 }
 double tensor::at(size_t i) const {
    assert(get_dim() == 1);
    assert(i < shape_[0]);
    return data_[i];
 }
 double tensor::at(size_t i, size_t j) const {
    assert(get_dim() == 2);
    if (shape_.size() != 2)
        throw std::runtime_error("Tensor is not 2-dimensional");
    assert((i < shape_[0]) && (j < shape_[1]));
    return data_[i * shape_[1] + j];
 }
 double tensor::at(size_t i, size_t j, size_t k, size_t l) const {
    assert(get_dim() == 4); // Ensure the tensor is 4D
    assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]);
    size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l;
    return data_[index];
 }
 double& tensor::at(size_t i, size_t j, size_t k, size_t l) {
    assert(get_dim() == 4); // Ensure the tensor is 4D
    assert(i < shape_[0] && j < shape_[1] && k < shape_[2] && l < shape_[3]);
    size_t index = i * shape_[1] * shape_[2] * shape_[3] + j * shape_[2] * shape_[3] + k * shape_[3] + l;
    return data_[index];
 }
 size_t* tensor::get_shape_array() {
    return shape_.empty()? nullptr: &shape_[0];
 }
 double* tensor::get_data_array() {
    return &data_[0];
 }
 // This can be used for debugging across the entire program.
 // prints the values of the tensor. Be careful though because
 // it will clutter the screen output.
 void tensor::print() const {
    std::cout << "Tensor - Dim: " << dim_ << ", Shape: {";
    for (const auto& s : shape_) std::cout << s << " ";
    std::cout << "}, Data: {";
    for (const auto& d : data_) std::cout << d << " ";
    std::cout << "}\n";
 }
 // helper template
 template <class T> tensor helper(const tensor &lhs, const tensor &rhs) {
  tensor result;
  result.shape_ = lhs.shape_;
  size_t totalSize = lhs.getTotalSize();
  result.data_.reserve(totalSize);
  std::transform(lhs.data_.begin(), lhs.data_.begin() + totalSize,
                 rhs.data_.begin(), std::back_inserter(result.data_), T());
  return result;
 }
 // operator overloads
 tensor operator+(const tensor &lhs, const tensor &rhs) {
    tensor tensor_a = tensor(lhs);
    tensor tensor_b = tensor(rhs);
    size_t size_a = lhs.getTotalSize();
    std::vector<double> result;
    for (size_t i =0; i< size_a; ++i) {
        double sum_array = tensor_a.get_data_array()[i] + tensor_b.get_data_array()[i];
        result.push_back(sum_array);
    }
    tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]);
    return c;
 }
 tensor operator-(const tensor &lhs, const tensor &rhs) {
    tensor tensor_a = tensor(lhs);
    tensor tensor_b = tensor(rhs);
    size_t size_a = lhs.getTotalSize();
    std::vector<double> result;
    for (size_t i =0; i< size_a; ++i) {
        double sum_array = tensor_a.get_data_array()[i] - tensor_b.get_data_array()[i];
        result.push_back(sum_array);
    }
    tensor c(tensor_a.get_dim(), tensor_a.get_shape_array(), &result[0]);
    return c;
 }
 tensor operator*(const tensor &lhs, const tensor &rhs) 
 {   
    try {
        // scalar * scalar multiplication
        if (lhs.getTotalSize() == 1 && rhs.getTotalSize() == 1) {
            return tensor(lhs.data_[0] * rhs.data_[0]);
        }
        if (lhs.shape_.empty() || rhs.shape_.empty()) {
            return tensor(); // for p3-q6, passing in None 
        }
        tensor result;
        size_t lhsSize = lhs.getTotalSize();
        size_t rhsSize = rhs.getTotalSize();
        // scalar * tensor multiplication
        if (lhsSize == 1 || rhsSize == 1) {
            double scalar = (lhsSize == 1 ? lhs : rhs).data_[0];
            const tensor &t = lhsSize == 1 ? rhs : lhs;
            result.shape_ = t.shape_;
            result.data_.reserve(lhsSize * rhsSize);
            std::transform(t.data_.begin(), t.data_.end(),
                std::back_inserter(result.data_),
                [&scalar](double x) { return x * scalar; });
        } else {
        // matrix multiplication
            size_t I = lhs.shape_[0];
            size_t J = rhs.shape_[1];
            size_t K = lhs.shape_[1];
            result.shape_ = {I, J};
            size_t totalSize = result.getTotalSize();
            result.data_.resize(totalSize);
            for (size_t i = 0; i < I; ++i) {
                for (size_t j = 0; j < J; ++j) {
                    for (size_t k = 0; k < K; ++k) {
                        result.data_[J * i + j] +=
                            lhs.data_[K * i + k] * rhs.data_[J * k + j];
                    }
                }
            }
        }
        return result;
    } catch (const std::bad_alloc&) {
        throw std::runtime_error("Memory allocation failed in tensor multiplication.");
    }
 }
 tensor ReLU(const tensor &x){
  tensor result;
  size_t xSize = x.getTotalSize();
  result.shape_ = x.shape_;
  for(size_t i = 0; i < xSize; i++ ){
      result.data_.push_back(x.data_[i]*(x.data_[i]>0));
  }
  return result;
  }
 tensor Flatten(const tensor &x){
  tensor result;
  size_t xSize = x.getTotalSize();
  size_t shape_1 = xSize/x.shape_[0];
  std::vector<size_t> shape_;
  shape_.push_back(x.shape_[0]);
  shape_.push_back(shape_1);
  result.shape_ = shape_;
  result.data_ = x.data_;
  return result;
  }
 tensor Input2d(const tensor &x){
     tensor result;
     std::vector<size_t> shape_;
     shape_.push_back(x.shape_[0]);
     shape_.push_back(x.shape_[3]);
     shape_.push_back(x.shape_[1]);
     shape_.push_back(x.shape_[2]);
     result.shape_ = shape_;
     for(size_t i = 0; i < x.shape_[0]; i++ ){
      for(size_t j =0; j < x.shape_[3];j++){
        for(size_t k = 0; k <x.shape_[1]*x.shape_[2]; k++){
            result.data_.push_back(x.data_[j + k*x.shape_[3] + i*x.shape_[3]*x.shape_[1]*x.shape_[2]]);
        }
      }
    }
    return result;
 }
 tensor Linear(const tensor &w, const tensor &x, const tensor &b){
    tensor result_wT;
    std::vector<size_t> shape_0;
    shape_0.push_back(x.shape_[1]);
    shape_0.push_back(w.shape_[0]);
    size_t totalSize_w = w.getTotalSize();
    result_wT.data_.reserve(totalSize_w);
    result_wT.shape_ = shape_0;
    for(size_t i = 0; i < w.shape_[1]; i++){
      for(size_t j = 0; j < w.shape_[0]; j++){
         result_wT.data_.push_back(w.data_[i + j*w.shape_[1]]);
      }
    }
    tensor result_xwT;
    std::vector<size_t> shape_xwT;
    shape_xwT.push_back(x.shape_[0]);
    shape_xwT.push_back(result_wT.shape_[1]);
    size_t totalSize_xwT = x.shape_[0]*result_wT.shape_[1];
    result_xwT.data_.resize(totalSize_xwT);
    result_xwT.shape_ = shape_xwT;
    result_xwT =  x * result_wT;
    tensor result_bBrodcast;
    std::vector<size_t> shape_1;
    shape_1.push_back(b.shape_[0]);
    shape_1.push_back(1*x.shape_[0]);
    result_bBrodcast.shape_ = shape_1;
    for(size_t i = 0; i < result_bBrodcast.shape_[0]; i++){
       for(size_t j = 0; j < result_bBrodcast.shape_[1]; j++){
          result_bBrodcast.data_.push_back(b.data_[i]);
       }
    }
   tensor result_bBroadcastT;
   std::vector<size_t> shape_2;
   size_t totalSize_b = result_bBrodcast.getTotalSize();
   result_bBrodcast.data_.resize(totalSize_b);
   shape_2.push_back(result_bBrodcast.shape_[1]);
   shape_2.push_back(result_bBrodcast.shape_[0]);
   result_bBroadcastT.shape_ = shape_2;
  for(size_t i = 0; i < result_bBrodcast.shape_[1]; i++){
      for(size_t j = 0; j < result_bBrodcast.shape_[0]; j++){
         result_bBroadcastT.data_.push_back(result_bBrodcast.data_[i + j*result_bBrodcast.shape_[1]]);
      }
    }
    tensor result;
    result.data_.resize(result_xwT.shape_[0]*result_xwT.shape_[1]);
    result = result_xwT + result_bBroadcastT;
    return result;
 }
 std::vector<double> maxpooling(std::vector<std::vector<double> > map, int k_w, int k_h, int s_w, int s_h)
 {
 	int row = map.size(); int col = map[0].size();
 	int out_row = 1+(row-k_h)/s_h, out_col = 1+(col-k_w)/s_w;
 	int mod_row = row%s_h, mod_col = col%s_w;
 	if (mod_row != 0) out_row++;
 	if (mod_col != 0) out_col++;
 	std::vector<std::vector<double> > res(out_row, std::vector<double>(out_col, 0));
 	std::vector<std::vector<double> > pad_map(map);
 	for (int i=0;i<row;i++)
 		for (int j = 0; j < k_h - mod_row; j++)
 		{
 			pad_map[i].push_back(map[i][col - 1]);
 		}
 	for (int j = 0; j < k_w - mod_col; j++)
 	{
 		pad_map.push_back(pad_map[row - 1]);
 	}
 	for(int i=0;i<out_row;i++)
 		for (int j = 0; j < out_col; j++)
 		{
 			int start_x = j*s_w;
 			int start_y = i*s_h;
 			std::vector<double> temp;
 			for(int ii=0;ii<k_w;ii++)
 				for (int jj = 0; jj < k_h; jj++)
 				{
 					temp.push_back(pad_map[start_y + jj][start_x + ii]);
 				}
 			sort(temp.begin(), temp.end());
 			res[i][j] = temp[temp.size() - 1];
 		}
       std::vector<double> res_;
      for (auto l : res)
 	     {
 		       for (auto ll : l)
 		       {
 			          res_.push_back(ll);
 		       }
 	      }
 	return res_;
 }
 tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride){
    std::cout <<"======1================="<<std::endl;
    std::cout << x.shape_[0] <<"_"<< x.shape_[1] << x.shape_[2] << x.shape_[3] << std::endl;
    std::cout << kernel_size << std::endl;
    std::cout << stride << std::endl;
    std::cout <<"======2================="<<std::endl;
    tensor result;
    size_t xSize = x.getTotalSize();
    std::cout << xSize << std::endl;
    std::vector<size_t> shape_;
    shape_.push_back(x.shape_[0]);
    shape_.push_back(x.shape_[1]);
    shape_.push_back(floor(x.shape_[2]/kernel_size));
    shape_.push_back(floor(x.shape_[3]/kernel_size));
    result.shape_ = shape_;
    for(size_t i = 0; i < x.shape_[0]; i++ ){
      for(size_t j =0; j < x.shape_[1];j++){
        std::vector<std::vector<double> > tmp;
        for(size_t k =0; k < x.shape_[2];k++){
          std::vector<double> tmp_;
          for(size_t l =0; l < x.shape_[3];l++){
            double int_data = x.data_[l + k*x.shape_[3] + j*x.shape_[3]*x.shape_[2] + i*x.shape_[3]*x.shape_[2]*x.shape_[1] ];
            tmp_.push_back(int_data);
          }
          tmp.push_back(tmp_);
        }
       auto res = maxpooling(tmp,kernel_size,kernel_size,stride,stride);
       for(size_t i = 0; i< res.size(); i++){
         result.data_.push_back(res[i]);
       }
      }
    }
    return result;
  }
 tensor Conv2D(const tensor &input_tensor,
              const tensor &kernel_tensor,
              const tensor &bias_tensor,
              int in_channels,
              int out_channels,
              int kernel_size)
 {
    std::cout << "In Channels: " << in_channels << ", Out Channels: " << out_channels << std::endl;
    std::cout << "Kernel Size: " << kernel_size << std::endl;
    // Calculate output dimensions
    int out_height = input_tensor.shape_[2] - kernel_size + 1;
    int out_width = input_tensor.shape_[3] - kernel_size + 1;
    // Ensure out_height and out_width are non-negative
    out_height = std::max(out_height, 0);
    out_width = std::max(out_width, 0);
    // Initialize shape array for output tensor
    size_t output_shape[] = {input_tensor.shape_[0], static_cast<size_t>(out_channels), static_cast<size_t>(out_height), static_cast<size_t>(out_width)};
    // Initialize data array for output tensor (fill with zeros)
    std::vector<double> output_data(output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3], 0);
    // Create output tensor
    tensor output_tensor(4, output_shape, output_data.data());
    // Convolution operation
    for (size_t n = 0; n < input_tensor.shape_[0]; ++n) { // for each batch
        for (int m = 0; m < out_channels; ++m) { // for each filter
            for (int y = 0; y < out_height; ++y) {
                for (int x = 0; x < out_width; ++x) {
                    double sum = 0.0;
                    for (int c = 0; c < in_channels; ++c) { // for each channel
                        for (int ky = 0; ky < kernel_size; ++ky) {
                            for (int kx = 0; kx < kernel_size; ++kx) {
                                size_t ix = x + kx;
                                size_t iy = y + ky;
                                if (ix < input_tensor.shape_[3] && iy < input_tensor.shape_[2]) {
                                    sum += input_tensor.at(n, c, iy, ix) * kernel_tensor.at(m, c, ky, kx);
                                }
                            }
                        }
                    }
                    sum += bias_tensor.at(m); // add bias
                    output_tensor.at(n, m, y, x) = sum;
                }
            }
        }
    }
    return output_tensor;
 }
--- a/src/tensor.h
+++ b/src/tensor.h
@ -0,0 +1,59 @@
 #ifndef TENSOR_H
 #define TENSOR_H
 #include <stdio.h>
 #include <vector>
 #include <iostream>
 class tensor {
 public:
    tensor(); // scalar 0
    explicit tensor(double v); // scalar v
    tensor(int dim, size_t shape[], double data[]); // From C
    tensor(int dim, std::initializer_list<size_t> shape, std::initializer_list<double> data); // Conv2d
    int get_dim() const;
    // scalar only
    double item() const;
    double &item();
    // more accessors
    double at(size_t i) const;
    double at(size_t i, size_t j) const;
    double at(size_t i, size_t j, size_t k, size_t l) const;
    double& at(size_t i, size_t j, size_t k, size_t l);
    // passing tensor back to C code
    size_t *get_shape_array();
    double *get_data_array();
    // easy way to print out the tensor for logging
    void print() const;
    size_t getTotalSize() const;
  // friend is better than member, because it allows
  // double + tensor (implicit conversion)
  // operator overloads
  friend tensor operator+(const tensor &lhs, const tensor &rhs);
  friend tensor operator-(const tensor &lhs, const tensor &rhs);
  friend tensor operator*(const tensor &lhs, const tensor &rhs);
  template <class T>
  friend tensor helper(const tensor &lhs, const tensor &rhs);
  friend tensor ReLU(const tensor &x);
  friend tensor Flatten(const tensor &x);
  friend tensor Input2d(const tensor &x);
  friend tensor Linear(const tensor &w, const tensor &x, const tensor &b);
  friend tensor MaxPool2d(const tensor &x, const int kernel_size, const int stride);
  friend tensor Conv2D(const tensor &input_tensor, const tensor &kernel_tensor, const tensor &bias_tensor, int in_channels, int out_channels, int kernel_size);
 private:
    int dim_;
    std::vector<size_t> shape_;
    std::vector<double> data_;
 }; // class tensor
 #endif // TENSOR_H;