From: lkcl <lkcl@web>
Date: Sat, 1 Apr 2023 17:34:14 +0000 (+0100)
Subject: (no commit message)
X-Git-Tag: opf_rfc_ls012_v1~196
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2f48956d9a54177e129cbaffd83122df339eb120;p=libreriscv.git

---

diff --git a/openpower/sv/rfc/ls010.mdwn b/openpower/sv/rfc/ls010.mdwn
index bacec089a..8e32a11fd 100644
--- a/openpower/sv/rfc/ls010.mdwn
+++ b/openpower/sv/rfc/ls010.mdwn
@@ -180,17 +180,17 @@ Note the deliberate similarity to how VSX register elements are defined:
 Example Vector-looped add operation implementation when elwidths are 64-bit:
 
 ```
-    # vector-add RT, RA,RB using the "uint64_t" union member, "l"
+    # vector-add RT, RA,RB using the "uint64_t" union member, "dwords"
     for i in range(VL):
-        int_regfile[RT].l[i] = int_regfile[RA].l[i] + int_regfile[RB].l[i]
+        int_regfile[RT].dword[i] = int_regfile[RA].dword[i] + int_regfile[RB].dword[i]
 ```
 
 However if elwidth overrides are set to 16 for both source and destination:
 
 ```
-    # vector-add RT, RA, RB using the "uint64_t" union member "s"
+    # vector-add RT, RA, RB using the "uint64_t" union member "halfs"
     for i in range(VL):
-        int_regfile[RT].s[i] = int_regfile[RA].s[i] + int_regfile[RB].s[i]
+        int_regfile[RT].halfs[i] = int_regfile[RA].halfs[i] + int_regfile[RB].halfs[i]
 ```
 
 Hardware Architectural note: to avoid a Read-Modify-Write at the register
@@ -248,7 +248,7 @@ the numerically-lowest element at the **MSB** end of the register.
             case 8 : el->bytes[15-elt] = VSR_regfile[gpr].bytes[15-elt];
         }
     }
-    void set_register_element(el_reg_t* el, int gpr, int elt, int width) {
+    void set_VSR_element(el_reg_t* el, int gpr, int elt, int width) {
         check_num_elements(elt, width);
         switch (width) {
             case 64: VSR_regfile[gpr].dwords[elt] = el->dwords[1-elt];
@@ -270,6 +270,46 @@ Implementations are not permitted to violate the Canonical definition: software
 will be critically relying on the wrapped (overflow) behaviour inherently
 implied from the unbounded c arrays.
 
+Illustrating the exact same loop with the exact same effect as achieved by Simple-V
+we are first forced to create wrapper functions:
+
+```
+    int calc_VSR_reg_offs(int elt, int width) {
+        switch (width) {
+            case 64: return floor(elt / 2);
+            case 32: return floor(elt / 4);
+            case 16: return floor(elt / 8);
+            case 8 : return floor(elt / 16);
+        }
+    }
+    int calc_VSR_elt_offs(int elt, int width) {
+        switch (width) {
+            case 64: return (elt % 2);
+            case 32: return (elt % 4);
+            case 16: return (elt % 8);
+            case 8 : return (elt % 16);
+        }
+    }
+    void _set_VSR_element(el_reg_t* el, int gpr, int elt, int width) {
+        int new_elt = calc_VSR_elt_offs(elt, width);
+        int new_reg = calc_VSR_reg_offs(elt, width);
+        set_VSR_element(el, gpr+new_reg, new_elt, width);
+    }
+```
+
+And finally use these functions:
+
+```
+    # VSX-add RT, RA, RB using the "uint64_t" union member "halfs"
+    for i in range(VL):
+         el_reg_t result, ra, rb;
+        _get_VSR_element(&ra, RA, i, 16);
+        _get_VSR_element(&rb, RB, i, 16);
+         result.halfs[0] = ra.halfs[0] + rb.halfs[0]; // use array 0 elements
+        _set_VSR_element(&result, RT, i, 16);
+
+```
+
 ## Scalar Identity Behaviour
 
 SVP64 is designed so that when the prefix is all zeros, and VL=1, no