From 8c6d66775330ea57f4a8a05f7b92db75a247cfb8 Mon Sep 17 00:00:00 2001 From: lkcl Date: Thu, 23 Jun 2022 12:48:33 +0100 Subject: [PATCH] --- openpower/sv/svp64/appendix.mdwn | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/openpower/sv/svp64/appendix.mdwn b/openpower/sv/svp64/appendix.mdwn index 3ab14e84f..1ed354fda 100644 --- a/openpower/sv/svp64/appendix.mdwn +++ b/openpower/sv/svp64/appendix.mdwn @@ -293,6 +293,57 @@ The following schedule for srcstep and dststep will occur: Here, both srcstep and dststep remain in lockstep because sz=dz=1 +# EXTRA Pack/Unpack Modes + +The pack/unpack concept of VSX `vpack` is abstracted out as a Sub-Vector +reordering Schedule, named `RM-2P-1S1D-PU`. +The usual RM-2P-1S1D is reduced from EXTRA3 to EXTRA2, making +room for 2 extra bits that enable either "packing" or "unpacking" +on the subvectors vec2/3/4. + +Illustrating a +"normal" SVP64 operation with `SUBVL!=1:` (assuming no elwidth overrides): + + def index(): + for i in range(VL): + for j in range(SUBVL): + yield i*SUBVL+j + + for idx in index(): + operation_on(RA+idx) + +For pack/unpack (again, no elwidth overrides): + + # yield an outer-SUBVL or inner VL loop with SUBVL + def index_p(outer): + if outer: + for j in range(SUBVL): + for i in range(VL): + yield i+VL*j + else: + for i in range(VL): + for j in range(SUBVL): + yield i*SUBVL+j + + # walk through both source and dest indices simultaneously + for src_idx, dst_idx in zip(index_p(PACK), index_p(UNPACK)): + move_operation(RT+dst_idx, RA+src_idx) + +"yield" from python is used here for simplicity and clarity. +The two Finite State Machines for the generation of the source +and destination element offsets progress incrementally in +lock-step. + +Setting of both `PACK_en` and `UNPACK_en` is neither prohibited nor +`UNDEFINED` because the reordering is fully deterministic, and +additional REMAP reordering may be applied. For Matrix this would +give potentially up to 4 Dimensions of reordering. + +Pack/Unpack applies to mv operations and some other single-source +single-destination operations such as Indexed LD/ST and extsw. +[[sv/mv.swizzle] has a slightly different pseudocode algorithm +for Vertical-First Mode. + # Twin Predication This is a novel concept that allows predication to be applied to a single -- 2.30.2