From b73bd117d8840b1257b7c75f9d2e89f233085ee0 Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 12 Mar 2022 11:55:42 +0000 Subject: [PATCH] --- openpower/sv/bitmanip.mdwn | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/openpower/sv/bitmanip.mdwn b/openpower/sv/bitmanip.mdwn index 8bfeab0cb..2467873f4 100644 --- a/openpower/sv/bitmanip.mdwn +++ b/openpower/sv/bitmanip.mdwn @@ -187,24 +187,26 @@ mode (3 bit) may be used to do inversion of ordering, similar to carryless mul, ## ternlogv -also, another possible variant involving swizzle and vec4: +also, another possible variant involving swizzle-like selection +and masking, this only requires 2 64 bit registers (RA, RT) and +only up to 16 LUT3s | 0.5|6.10|11.15| 16.23 |24.27 | 28.30 |31| | -- | -- | --- | ----- | ---- | ----- |--| | NN | RT | RA | idx0-3| mask | sz 01 |0 | - SZ = sz * 8 - raoff = idx0 * SZ - rboff = idx0 * SZ - rcoff = idx0 * SZ - imoff = idx0 * SZ + SZ = (1+sz) * 8 # 8 or 16 + raoff = MIN(XLEN, idx0 * SZ) + rboff = MIN(XLEN, idx1 * SZ) + rcoff = MIN(XLEN, idx2 * SZ) + imoff = MIN(XLEN, idx3 * SZ) imm = RA[imoff:imoff+SZ] - for i in range(SZ): + for i in range(MIN(XLEN, SZ)): ra = RA[raoff:+i] rb = RA[rboff+i] rc = RA[rcoff+i] res = lut3(imm, ra, rb, rc) - for j in range(3): + for j in range(MIN(XLEN//8, 4)): if mask[j]: RT[i+j*SZ] = res ## ternlogcr -- 2.30.2