From 2f48956d9a54177e129cbaffd83122df339eb120 Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 1 Apr 2023 18:34:14 +0100 Subject: [PATCH] --- openpower/sv/rfc/ls010.mdwn | 50 +++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/openpower/sv/rfc/ls010.mdwn b/openpower/sv/rfc/ls010.mdwn index bacec089a..8e32a11fd 100644 --- a/openpower/sv/rfc/ls010.mdwn +++ b/openpower/sv/rfc/ls010.mdwn @@ -180,17 +180,17 @@ Note the deliberate similarity to how VSX register elements are defined: Example Vector-looped add operation implementation when elwidths are 64-bit: ``` - # vector-add RT, RA,RB using the "uint64_t" union member, "l" + # vector-add RT, RA,RB using the "uint64_t" union member, "dwords" for i in range(VL): - int_regfile[RT].l[i] = int_regfile[RA].l[i] + int_regfile[RB].l[i] + int_regfile[RT].dword[i] = int_regfile[RA].dword[i] + int_regfile[RB].dword[i] ``` However if elwidth overrides are set to 16 for both source and destination: ``` - # vector-add RT, RA, RB using the "uint64_t" union member "s" + # vector-add RT, RA, RB using the "uint64_t" union member "halfs" for i in range(VL): - int_regfile[RT].s[i] = int_regfile[RA].s[i] + int_regfile[RB].s[i] + int_regfile[RT].halfs[i] = int_regfile[RA].halfs[i] + int_regfile[RB].halfs[i] ``` Hardware Architectural note: to avoid a Read-Modify-Write at the register @@ -248,7 +248,7 @@ the numerically-lowest element at the **MSB** end of the register. case 8 : el->bytes[15-elt] = VSR_regfile[gpr].bytes[15-elt]; } } - void set_register_element(el_reg_t* el, int gpr, int elt, int width) { + void set_VSR_element(el_reg_t* el, int gpr, int elt, int width) { check_num_elements(elt, width); switch (width) { case 64: VSR_regfile[gpr].dwords[elt] = el->dwords[1-elt]; @@ -270,6 +270,46 @@ Implementations are not permitted to violate the Canonical definition: software will be critically relying on the wrapped (overflow) behaviour inherently implied from the unbounded c arrays. +Illustrating the exact same loop with the exact same effect as achieved by Simple-V +we are first forced to create wrapper functions: + +``` + int calc_VSR_reg_offs(int elt, int width) { + switch (width) { + case 64: return floor(elt / 2); + case 32: return floor(elt / 4); + case 16: return floor(elt / 8); + case 8 : return floor(elt / 16); + } + } + int calc_VSR_elt_offs(int elt, int width) { + switch (width) { + case 64: return (elt % 2); + case 32: return (elt % 4); + case 16: return (elt % 8); + case 8 : return (elt % 16); + } + } + void _set_VSR_element(el_reg_t* el, int gpr, int elt, int width) { + int new_elt = calc_VSR_elt_offs(elt, width); + int new_reg = calc_VSR_reg_offs(elt, width); + set_VSR_element(el, gpr+new_reg, new_elt, width); + } +``` + +And finally use these functions: + +``` + # VSX-add RT, RA, RB using the "uint64_t" union member "halfs" + for i in range(VL): + el_reg_t result, ra, rb; + _get_VSR_element(&ra, RA, i, 16); + _get_VSR_element(&rb, RB, i, 16); + result.halfs[0] = ra.halfs[0] + rb.halfs[0]; // use array 0 elements + _set_VSR_element(&result, RT, i, 16); + +``` + ## Scalar Identity Behaviour SVP64 is designed so that when the prefix is all zeros, and VL=1, no -- 2.30.2