From 0879c186340c65b1b0e6391698ba5aa7551bf040 Mon Sep 17 00:00:00 2001 From: lkcl Date: Mon, 13 Jun 2022 15:33:51 +0100 Subject: [PATCH] --- openpower/sv/mv.vec.mdwn | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/openpower/sv/mv.vec.mdwn b/openpower/sv/mv.vec.mdwn index d0b1e1263..40a31c954 100644 --- a/openpower/sv/mv.vec.mdwn +++ b/openpower/sv/mv.vec.mdwn @@ -100,31 +100,29 @@ Illustrating a For a separate source/dest SUBVL (again, no elwidth overrides): + # only one of these will be >1 at any given time + subvl = MAX(SUBVL,SRC_SUBVL) # yield an outer-SUBVL, inner VL loop with SRC SUBVL def index_src(outer): if outer: - # outer on *dest* subvl, to match inner dest - for j in range(SUBVL): + for j in range(subvl): for i in range(VL): yield i+VL*j else: - # inner on *src* subvl, to match *outer* src for i in range(VL): - for j in range(SRC_SUBVL): - yield i*SRC_SUBVL+j + for j in range(subvl): + yield i*subvl+j # yield an outer-SUBVL, inner VL loop with DEST SUBVL def index_dest(outer): if outer: - # outer on *src* subvl, to match inner src - for j in range(SRC_SUBVL): + for j in range(subvl): for i in range(VL): yield i+VL*j else: - # inner on *dest* subvl, to match *outer* dest for i in range(VL): - for j in range(SUBVL): - yield i*SUBVL+j + for j in range(subvl): + yield i*subvl+j # inner looping when SUBVLs are equal if SRC_SUBVL == SUBVL: @@ -132,7 +130,8 @@ For a separate source/dest SUBVL (again, no elwidth overrides): move_operation(RT+idx, RA+idx) else: # walk through both source and dest indices simultaneously - for src_idx, dst_idx in zip(index_src(), index_dst()): + so, do = SRC_SUBVL>SUBVL, SUBVL>SRC_SUBVL + for src_idx, dst_idx in zip(index_src(so), index_dst(do)): move_operation(RT+dst_idx, RA+src_idx) "yield" from python is used here for simplicity and clarity. @@ -140,9 +139,8 @@ The two Finite State Machines for the generation of the source and destination element offsets progress incrementally in lock-step. -Normal uaage, `SRC_SUBVL=1, SUBVL=2/3/4` gives -a "pack" effect, and `SUBVL=1, SRC_SUBVL=2/3/4` gives an -"unpack". Setting both SUBVL and SRC_SUBVL to greater than -1 will, unlike [[sv/mv.swizzle]], produce defined deterministic results, -even if a little hard to understand. Loops run through -`MIN(SUBVL, SRC_SUBVL) * VL` elements. +* Normal usage, `SUBVL=SRC_SUBVL`, gives straight subvector copy. +* `SRC_SUBVL=1, SUBVL=2/3/4` gives a "pack" effect +* `SUBVL=1, SRC_SUBVL=2/3/4` gives an "unpack". +* Setting both SUBVL and SRC_SUBVL to unequal values greater than + 1 will, like [[sv/mv.swizzle]], produce `UNDEFINED` results. -- 2.30.2