def mul_remap_256_x_256_to_512_asm():
# manually create SVSHAPE until we have svshape[34] instructions working
- # FIXME: fill in correct SVSHAPE[0-2] values
# x for y in range(4) for x in range(4)
SVSHAPE0 = SVSHAPE(0)
+ SVSHAPE0.lims = [4, 4, 1]
+ SVSHAPE0.skip = 0b10
# y for y in range(4) for x in range(4)
SVSHAPE1 = SVSHAPE(0)
+ SVSHAPE1.lims = [4, 4, 1]
+ SVSHAPE1.skip = 0b01
# x + y for y in range(4) for x in range(4)
# *must not* be limited mod 4, maximum value is 6 (3 + 3)
SVSHAPE2 = SVSHAPE(0)
+ SVSHAPE0.lims = [4, 4, 1]
+ SVSHAPE0.mode = 0b11
+ SVSHAPE0.submode = 0b01 # rhombus mode
- # 0 for y in range(4) for x in range(4)
+ # -1 for y in range(4) for x in range(4)
# yes, this is a constant
SVSHAPE3 = SVSHAPE(0)
+ SVSHAPE3.order = (0, 2, 1) # so something is non-zero
+ # unsigned -- see https://bugs.libre-soc.org/show_bug.cgi?id=1155#c72
+ SVSHAPE3.offset = -1 # FIXME: offset is unsigned
retval = [
"mul_256_to_512:",
# a is in r4-7, b is in r8-11, output (y) in r4-11
"mtspr %d, 0" % (SPRfull.SVSHAPE1.value,),
*load_u32(0, SVSHAPE2),
"mtspr %d, 0" % (SPRfull.SVSHAPE2.value,),
+ *load_u32(0, SVSHAPE3),
+ "mtspr %d, 0" % (SPRfull.SVSHAPE3.value,),
]
retval += [
"loop:",
# use SVSHAPE0 for RA, SVSHAPE1 for RB,
# SVSHAPE2 for RC/RT, SVSHAPE3 for RS
"svremap 0o37, 0, 1, 2, 2, 3, 0",
- # RS is scalar by using constant remap
- "sv.maddedu *4, *32, *36, *4", # FIXME: need to set EXTRA2_MODE to 0
+ # RS is scalar r3 by using constant remap with offset -1
+ "sv.maddedu *4, *32, *36, *4",
# use SVSHAPE2 for RT/RA, all others are not remapped
"svremap 0o11, 2, 0, 0, 2, 0, 0",
- "sv.adde *5, *5, 20", # FIXME: maddedu's RS is in r20, right?
+ "sv.adde *5, *5, 3", # FIXME: maddedu's RS is in r3, right?
"svstep 0, 0, 1",
"bc 16, 0, loop # bdnz loop",
"bclr 20, 0, 0 # blr",