return y
-def python_mul_remap_algorithm(a, b):
- # version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather
- # than 2^64, since that's easier to read.
- a_sz, b_sz = len(a), = len(b)
- ai, bi, apbi, apbp1 = [], [], [], [] # REMAP indices
+def python_mul_remap_yielder(a_sz, b_sz):
for ai in range(a_sz):
for bi in range(b_sz):
- ai.append(ai)
- bi.append(bi)
- apbi.append(ai + bi)
- apbp1.append(ai + bi + 1)
+ yield ai, bi, ai + bi, ai + bi + 1
+def python_mul_remap_algorithm(a, b):
+ # version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather
+ # than 2^64, since that's easier to read.
+ a_sz, b_sz = len(a), len(b)
y = [0] * (a_sz + b_sz)
ca = 0
- for i in range(a_sz * b_sz):
+ for ai, bi, apbi, apbp1 in python_mul_remap_yielder(a_sz, b_sz):
# no need to clear ca between ai outer loops, since the partial
# products can't get big enough to have a carry out, so ca will
# always be zero when (i % b_sz == 0).
# That said, hardware will probably want to pattern-match this to
# remove the unnecessary dependency through ca.
- y[apbi[i]], t = maddedu(a[ai[i]], b[bi[i]], y[apbi[i]])
- y[apbp1[i]], ca = adde(y[apbp1[i]], t, ca)
+ y[apbi], t = maddedu(a[ai], b[bi], y[apbi])
+ y[apbp1], ca = adde(y[apbp1], t, ca)
return y