Pseudo-code:
- # hack: clear out all SVSHAPEs and set them up for multiply
+ # clear out all SVSHAPEs
SVSHAPE0[0:31] <- [0] * 32
SVSHAPE1[0:31] <- [0] * 32
SVSHAPE2[0:31] <- [0] * 32
SVSHAPE3[0:31] <- [0] * 32
- # set up template in SVSHAPE0, then copy to 1-3
- SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
- SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
- SVSHAPE0[28:29] <- 0b11 # skip z
- # copy
- SVSHAPE1[0:31] <- SVSHAPE0[0:31]
- SVSHAPE2[0:31] <- SVSHAPE0[0:31]
- SVSHAPE3[0:31] <- SVSHAPE0[0:31]
- # set up FRA
- SVSHAPE1[18:20] <- 0b001 # permute x,z,y
- SVSHAPE1[28:29] <- 0b01 # skip z
- # FRC
- SVSHAPE2[18:20] <- 0b001 # permute x,z,y
- SVSHAPE2[28:29] <- 0b11 # skip y
+ # set schedule up for multiply
+ if (SVRM = 0b00000) then
+ # set up template in SVSHAPE0, then copy to 1-3
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
+ SVSHAPE0[28:29] <- 0b11 # skip z
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE3[0:31] <- SVSHAPE0[0:31]
+ # set up FRA
+ SVSHAPE1[18:20] <- 0b001 # permute x,z,y
+ SVSHAPE1[28:29] <- 0b01 # skip z
+ # FRC
+ SVSHAPE2[18:20] <- 0b001 # permute x,z,y
+ SVSHAPE2[28:29] <- 0b11 # skip y
+ # set schedule up for butterfly
+ if (SVRM = 0b00001) then
+ # set up template in SVSHAPE0, then copy to 1-3
+ # for FRA and FRT
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[30:31] <- 0b01 # Butterfly mode
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ # set up FRB and FRS
+ SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
+ # FRC (coefficients)
+ SVSHAPE2[28:29] <- 0b10 # k schedule
Special Registers Altered:
print (" mode", shape.mode)
print (" skip", shape.skip)
- remaps = [SVSHAPE0.get_iterator(),
- SVSHAPE1.get_iterator(),
- SVSHAPE2.get_iterator(),
- SVSHAPE3.get_iterator(),
+ remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
+ (SVSHAPE1, SVSHAPE1.get_iterator()),
+ (SVSHAPE2, SVSHAPE2.get_iterator()),
+ (SVSHAPE3, SVSHAPE3.get_iterator()),
]
rremaps = []
- for i, remap in enumerate(remaps):
+ for i, (shape, remap) in enumerate(remaps):
# XXX hardcoded! pick dststep for out (i==0) else srcstep
step = dststep if (i == 0) else srcstep
# this is terrible. O(N^2) looking for the match. but hey.
for idx, remap_idx in enumerate(remap):
if idx == step:
break
- if i == 0:
- yield self.dec2.o_step.eq(remap_idx)
- yield self.dec2.o2_step.eq(remap_idx)
- elif i == 1:
- yield self.dec2.in1_step.eq(remap_idx)
- elif i == 2:
- yield self.dec2.in3_step.eq(remap_idx)
- elif i == 3:
- yield self.dec2.in2_step.eq(remap_idx)
- rremaps.append((i, idx, remap_idx))
+ # multiply mode
+ if shape.mode == 0b00:
+ if i == 0:
+ yield self.dec2.o_step.eq(remap_idx) # RT
+ yield self.dec2.o2_step.eq(remap_idx) # EA
+ elif i == 1:
+ yield self.dec2.in1_step.eq(remap_idx) # RA
+ elif i == 2:
+ yield self.dec2.in3_step.eq(remap_idx) # RB
+ elif i == 3:
+ yield self.dec2.in2_step.eq(remap_idx) # RC
+ # FFT butterfly mode
+ if shape.mode == 0b01:
+ if i == 0:
+ yield self.dec2.o_step.eq(remap_idx) # RT
+ yield self.dec2.in1_step.eq(remap_idx) # RA
+ elif i == 1:
+ yield self.dec2.in2_step.eq(remap_idx) # RB
+ yield self.dec2.o2_step.eq(remap_idx) # EA (FRS)
+ elif i == 2:
+ yield self.dec2.in3_step.eq(remap_idx) # RC
+ elif i == 3:
+ pass # no SVSHAPE3
+ rremaps.append((i, idx, remap_idx)) # debug printing
for x in rremaps:
print ("shape remap", x)
# after that, settle down (combinatorial) to let Vector reg numbers