Sub-vector elements are not be considered "Vertical". The vec2/3/4
is to be considered as if the "single element". Caveats exist for
-[[sv/mv.swizzle]] and [[sv/mv.vec]] when Pack/Unpack is enabled.
-
-# Pseudocode
-
- // instruction fields:
- rd = get_rt_field(); // bits 6..10
- ra = get_ra_field(); // bits 11..15
- vf = get_vf_field(); // bit 23
- vs = get_vs_field(); // bit 24
- ms = get_ms_field(); // bit 25
- Rc = get_Rc_field(); // bit 31
-
- if vf and not vs and not ms {
- // increment src/dest step mode
- // NOTE! this is in no way complete! predication is not included
- // and neither is SUBVL mode
- srcstep = SPR[SV].srcstep
- dststep = SPR[SV].dststep
- VL = SPR[SV].VL
- srcstep++
- dststep++
- rollover = (srcstep == VL or dststep == VL)
- if rollover:
- // Reset srcstep, dststep, and also exit "Vertical First" mode
- srcstep = 0
- dststep = 0
- MSR[6] = 0
- SPR[SV].srcstep = srcstep
- SPR[SV].dststep = dststep
-
- // write CR? helps for doing Vertical loops, detects end
- // of Vector Elements
- if Rc = 1 {
- // update CR to indicate that srcstep/dststep "rolled over"
- CR0.eq = rollover
- }
- } else {
- // add one. MVL/VL=1..64 not 0..63
- vlimmed = get_immed_field()+1; // 16..22
-
- // set VL (or not).
- // 4 options: from SPR, from immed, from ra, from CTR
- if vs {
- // VL to be sourced from fields/regs
- if ra != 0 {
- VL = GPR[ra]
- } else {
- VL = vlimmed
- }
- } else {
- // VL not to change (except if MVL is reduced)
- // read from SPRs
- VL = SPR[SV_VL]
- }
-
- // set MVL (or not).
- // 2 options: from SPR, from immed
- if ms {
- MVL = vlimmed
- } else {
- // MVL not to change, read from SPRs
- MVL = SPR[SV_MVL]
- }
-
- // calculate (limit) VL
- VL = min(VL, MVL)
-
- // store VL, MVL
- SVSTATE.VL = VL
- SVSTATE.MVL = MVL
-
- // write rd
- if rt != 0 {
- // rt is not zero
- regs[rt] = VL;
- }
- // write CR?
- if Rc = 1 {
- // update CR from VL (not rt)
- CR0.eq = (VL == 0)
- ...
- ...
- }
- // write Vertical-First mode
- SVSTATE.vf = vf
- }
+[[sv/mv.swizzle]] and [[sv/mv.vec]] when Pack/Unpack is enabled,
+due to the order in which VL and SUBVL loops are applied being
+swapped (outer-inner becomes inner-outer)
# Examples