From 3e04fe8f31972b048a90270a973b192fa74a3a1b Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 20 Aug 2022 12:46:11 +0100 Subject: [PATCH] --- openpower/sv/ldst.mdwn | 55 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/openpower/sv/ldst.mdwn b/openpower/sv/ldst.mdwn index 02e6358a9..ad03600fd 100644 --- a/openpower/sv/ldst.mdwn +++ b/openpower/sv/ldst.mdwn @@ -438,12 +438,13 @@ Below is the pseudocode for Unit-Strided LD (which includes Vector capability). both Immediate and Indexed LD/ST, does not have element-width overriding applied to it. -Note that twin predication, predication-zeroing, saturation -and other modes have all been removed, for clarity and simplicity: +Note that predication, predication-zeroing, +and other modes except saturation have all been removed, +for clarity and simplicity: - # LD not VLD! (ldbrx if brev=True) + # LD not VLD! # this covers unit stride mode and a type of vector offset - function op_ld(RT, RA, brev, op_width, imm_offs, svctx) + function op_ld(RT, RA, op_width, imm_offs, svctx) for (int i = 0, int j = 0; i < svctx.VL && j < svctx.VL): if not svctx.unit/el-strided: # strange vector mode, compute 64 bit address which is @@ -455,16 +456,9 @@ and other modes have all been removed, for clarity and simplicity: # adjust for unit/el-stride srcbase += .... - # takes care of (merges) processor LE/BE and ld/ldbrx - bytereverse = brev XNOR MSR.LE - # read the underlying memory memread <= MEM(srcbase + imm_offs, op_width) - # optionally performs byteswap at op width - if (bytereverse): - memread = byteswap(memread, op_width) - # check saturation. if svpctx.saturation_mode: # ... saturation adjustment... @@ -483,11 +477,14 @@ and other modes have all been removed, for clarity and simplicity: i++; j++; -For LD/Indexed, the key here is that in the calculation of the Effective Address, -RA has no elwidth override but RB does. +Note above that the source elwidth is *not used at all* in LD-immediate - # LD not VLD! - function op_ld(RT, RA, RB, op_width, svctx) +For LD/Indexed, the key is that in the calculation of the Effective Address, +RA has no elwidth override but RB does. Pseudocode below is simplified +for clarity: predication and all modes except saturation are removed: + + # LD not VLD! ld*rx if brev else ld* + function op_ld(RT, RA, RB, op_width, svctx, brev) for (int i = 0, int j = 0; i < svctx.VL && j < svctx.VL): if not svctx.el-strided: # RA not polymorphic! elwidth hardcoded to 64 here @@ -499,11 +496,33 @@ RA has no elwidth override but RB does. offs = get_polymorphed_reg(RB, svctx.src_elwidth, i) # sign-extend if svctx.SEA: offs = sext(offs, svctx.src_elwidth, 64) + + # takes care of (merges) processor LE/BE and ld/ldbrx + bytereverse = brev XNOR MSR.LE + # read the underlying memory memread <= MEM(srcbase + offs, op_width) - # proceed to check saturation - ... - ... + + # optionally performs byteswap at op width + if (bytereverse): + memread = byteswap(memread, op_width) + + if svpctx.saturation_mode: + # ... saturation adjustment... + memread = clamp(memread, op_width, svctx.dest_elwidth) + else: + # truncate/extend to over-ridden dest width. + memread = adjust_wid(memread, op_width, svctx.dest_elwidth) + + # takes care of inserting memory-read (now correctly byteswapped) + # into regfile underlying LE-defined order, into the right place + # within the NEON-like register, respecting destination element + # bitwidth, and the element index (j) + set_polymorphed_reg(RT, svctx.dest_elwidth, j, memread) + + # increments both src and dest element indices (no predication here) + i++; + j++; # Remapped LD/ST -- 2.30.2