bc_vli = mode[SVP64MODEb.BC_VLI] != 0
bc_snz = mode[SVP64MODEb.BC_SNZ] != 0
bc_vsb = yield self.dec2.rm_dec.bc_vsb
+ bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest
bc_lru = yield self.dec2.rm_dec.bc_lru
bc_gate = yield self.dec2.rm_dec.bc_gate
sz = yield self.dec2.rm_dec.pred_sz
self.namespace['ALL'] = SelectableInt(bc_gate, 1)
self.namespace['VSb'] = SelectableInt(bc_vsb, 1)
self.namespace['LRu'] = SelectableInt(bc_lru, 1)
+ self.namespace['CTRtest'] = SelectableInt(bc_ctrtest, 1)
self.namespace['VLSET'] = SelectableInt(bc_vlset, 1)
self.namespace['VLI'] = SelectableInt(bc_vli, 1)
self.namespace['sz'] = SelectableInt(sz, 1)
# this is the last check to be made as a loop. combined with
# the ALL/ANY mode we can early-exit
if self.is_svp64_mode and ins_name.startswith("sv.bc"):
+ bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest
no_in_vec = yield self.dec2.no_in_vec # BI is scalar
+ no_in_vec &= not bc_ctrtest # allow CTR loop on scalar
end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1
self.namespace['end_loop'] = SelectableInt(end_loop, 1)
lst = SVP64Asm(
[
"mtspr 9, 3", # move r3 to CTR
+ "setvl. 0,0,8,0,1,1", # set MVL=VL=8
"sv.addi *8, 0, 0", # initialise r8-r15 to zero
- # chr-copy loop starts here:
- # for (i = 0; i < n && src[i] != '\0'; i++)
- # dest[i] = src[i];
# VL = MIN(CTR,MAXVL=8), Rc=1 (CR0 set if CTR ends)
- "setvl. 3,0,8,0,1,1",
+ "setvl. 3,0,2,0,1,1",
# load VL bytes (update r4 addr) but compressed (dw=8)
"sv.lbzu/pi/dw=8 *16, 1(4)", # should be /lf here as well
# branch back if still CTR
- "sv.bc 16, 0, -0x10", # test CTR, stop if cmpi failed
+ "sv.bc/ctr/all 16, *0, -0x18", # CTR mode, reduce VL by CTR
]
)
lst = list(lst)
tst_array = [23,19,25,189,76,255,32,191,67,205,0,39,107]
- tst_array = [1,2,3,4,5,6,7,8,9] #8,9,10,11,12,13]
+ tst_array = [1,2,3,] #4,5,6,7,8,9] #8,9,10,11,12,13]
initial_regs = [0] * 64
initial_regs[3] = len(tst_array)
initial_regs[4] = 16 # load address