return y
-def python_mul_algorithm2(a, b):
+def python_mul_remap_algorithm(a, b):
# version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather
# than 2^64, since that's easier to read.
- # the idea here is that it will "morph" into something more akin to
- # using REMAP bigmul (first using REMAP Indexed)
-
- # create a schedule for use below. the "end of inner loop" marker is 0b01
- iyl = []
- il = []
- for iy in range(4):
- for i in range(4):
- iyl.append((iy+i, i == 3))
- il.append(i)
- for i in range(5):
- iyl.append((iy+i, i == 4))
- il.append(i)
-
- y = [0] * 8 # result y and temp t of same size
- t = [0] * 8 # no need after this to set t[4] to zero
- for iy in range(4):
- for i in range(4): # use t[iy+4] as a 64-bit carry
- t[iy+i], t[iy+4] = maddedu(a[iy], b[i], t[iy+4])
- ca = 0
- for i in range(5): # add vec t to y with 1-bit carry
- idx = iy + i
- y[idx], ca = adde(y[idx], t[idx], ca)
+ # run this file in a debugger to see all the intermediate values.
+ a_sz = len(a)
+ b_sz = len(b)
+ a_idx = []
+ b_idx = []
+ a_plus_b_idx = []
+ a_plus_b_plus_1_idx = []
+ for ai in range(a_sz):
+ for bi in range(b_sz):
+ a_idx.append(ai)
+ b_idx.append(bi)
+ a_plus_b_idx.append(ai + bi)
+ a_plus_b_plus_1_idx.append(ai + bi + 1)
+
+ y = [0] * (a_sz + b_sz)
+ ca = 0
+ for i in range(a_sz * b_sz):
+ # no need to clear ca between ai outer loops, since the partial
+ # products can't get big enough to have a carry out, so ca will
+ # always be zero when (i % b_sz == 0).
+ # That said, hardware will probably want to pattern-match this to
+ # remove the unnecessary dependency through ca.
+ y[a_plus_b_idx[i]], t = maddedu(
+ a[a_idx[i]], b[b_idx[i]], y[a_plus_b_idx[i]])
+ y[a_plus_b_plus_1_idx[i]], ca = adde(
+ y[a_plus_b_plus_1_idx[i]], t, ca)
return y
a = b = (99, 99, 99, 99)
expected = [1, 0, 0, 0, 98, 99, 99, 99]
assert python_mul_algorithm(a, b) == expected
+ # check python_mul_remap_algorithm
+ assert python_mul_remap_algorithm(a, b) == expected
- # now test python_mul_algorithm2 *against* python_mul_algorithm
+ # now test python_mul_remap_algorithm *against* python_mul_algorithm
import random
random.seed(0) # reproducible values
- for i in range(10000):
+
+ def fmt_l(l):
+ return "[" + ", ".join("%2i" % (i,) for i in l) + "]"
+
+ for i in range(100000):
a = []
b = []
for j in range(4):
a.append(random.randint(0, 99))
b.append(random.randint(0, 99))
expected = python_mul_algorithm(a, b)
- testing = python_mul_algorithm2(a, b)
- report = "%+17s * %-17s = %s\n" % (repr(a), repr(b), repr(expected))
- report += " (%s)" % repr(testing)
+ testing = python_mul_remap_algorithm(a, b)
+ report = "%s * %s = " % (fmt_l(a), fmt_l(b))
+ indent = " " * len(report)
+ report += "%s\n%s%s" % (fmt_l(expected), indent, fmt_l(testing))
print(report)
assert expected == testing