(no commit message)

author lkcl <lkcl@web>

Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)

committer IkiWiki <ikiwiki.info>

Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)
author lkcl <lkcl@web>
Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)
committer IkiWiki <ikiwiki.info>
Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)
diff --git a/openpower/sv/rfc/ls010.mdwn b/openpower/sv/rfc/ls010.mdwn

index bacec089a77aa40939e848c5db70ad0643c3dea6..8e32a11fd8bf0e99b2048589c75e687f59d14d9b 100644 (file)
--- a/openpower/sv/rfc/ls010.mdwn
+++ b/openpower/sv/rfc/ls010.mdwn
@@ -180,17 +180,17 @@ Note the deliberate similarity to how VSX register elements are defined:
  Example Vector-looped add operation implementation when elwidths are 64-bit:
  
  ```
-    # vector-add RT, RA,RB using the "uint64_t" union member, "l"
+    # vector-add RT, RA,RB using the "uint64_t" union member, "dwords"
      for i in range(VL):
-        int_regfile[RT].l[i] = int_regfile[RA].l[i] + int_regfile[RB].l[i]
+        int_regfile[RT].dword[i] = int_regfile[RA].dword[i] + int_regfile[RB].dword[i]
  ```
  
  However if elwidth overrides are set to 16 for both source and destination:
  
  ```
-    # vector-add RT, RA, RB using the "uint64_t" union member "s"
+    # vector-add RT, RA, RB using the "uint64_t" union member "halfs"
      for i in range(VL):
-        int_regfile[RT].s[i] = int_regfile[RA].s[i] + int_regfile[RB].s[i]
+        int_regfile[RT].halfs[i] = int_regfile[RA].halfs[i] + int_regfile[RB].halfs[i]
  ```
  
  Hardware Architectural note: to avoid a Read-Modify-Write at the register
@@ -248,7 +248,7 @@ the numerically-lowest element at the **MSB** end of the register.
              case 8 : el->bytes[15-elt] = VSR_regfile[gpr].bytes[15-elt];
          }
      }
-    void set_register_element(el_reg_t* el, int gpr, int elt, int width) {
+    void set_VSR_element(el_reg_t* el, int gpr, int elt, int width) {
          check_num_elements(elt, width);
          switch (width) {
              case 64: VSR_regfile[gpr].dwords[elt] = el->dwords[1-elt];
@@ -270,6 +270,46 @@ Implementations are not permitted to violate the Canonical definition: software
  will be critically relying on the wrapped (overflow) behaviour inherently
  implied from the unbounded c arrays.
  
+Illustrating the exact same loop with the exact same effect as achieved by Simple-V
+we are first forced to create wrapper functions:
+
+```
+    int calc_VSR_reg_offs(int elt, int width) {
+        switch (width) {
+            case 64: return floor(elt / 2);
+            case 32: return floor(elt / 4);
+            case 16: return floor(elt / 8);
+            case 8 : return floor(elt / 16);
+        }
+    }
+    int calc_VSR_elt_offs(int elt, int width) {
+        switch (width) {
+            case 64: return (elt % 2);
+            case 32: return (elt % 4);
+            case 16: return (elt % 8);
+            case 8 : return (elt % 16);
+        }
+    }
+    void _set_VSR_element(el_reg_t* el, int gpr, int elt, int width) {
+        int new_elt = calc_VSR_elt_offs(elt, width);
+        int new_reg = calc_VSR_reg_offs(elt, width);
+        set_VSR_element(el, gpr+new_reg, new_elt, width);
+    }
+```
+
+And finally use these functions:
+
+```
+    # VSX-add RT, RA, RB using the "uint64_t" union member "halfs"
+    for i in range(VL):
+         el_reg_t result, ra, rb;
+        _get_VSR_element(&ra, RA, i, 16);
+        _get_VSR_element(&rb, RB, i, 16);
+         result.halfs[0] = ra.halfs[0] + rb.halfs[0]; // use array 0 elements
+        _set_VSR_element(&result, RT, i, 16);
+
+```
+
  ## Scalar Identity Behaviour
  
  SVP64 is designed so that when the prefix is all zeros, and VL=1, no
author	lkcl <lkcl@web>
	Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)
committer	IkiWiki <ikiwiki.info>
	Sat, 1 Apr 2023 17:34:14 +0000 (18:34 +0100)