From a393c854c825c5a775545344d580a0b9ca84dd20 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 9 Sep 2022 01:21:14 +0100
Subject: [PATCH] use-cases

---
 openpower/sv/rfc/ls001.mdwn | 74 ++++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 6 deletions(-)

diff --git a/openpower/sv/rfc/ls001.mdwn b/openpower/sv/rfc/ls001.mdwn
index 460098ab4..74c6fb1dd 100644
--- a/openpower/sv/rfc/ls001.mdwn
+++ b/openpower/sv/rfc/ls001.mdwn
@@ -275,9 +275,6 @@ in 7 instructions.  Expanding this to 16-wide is a matter of setting
 `svshape 16`.  Lee Composition may be deployed to construct non-power-two
 DCTs.  The cosine table may be computed (once) with 18 Vector instructions.
 
-<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_dct.py;hb=HEAD>
-
-
 ```
 1014     def test_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self):
 1015         """>>> lst = [# LOAD bit-reversed with half-swap
@@ -293,6 +290,8 @@ DCTs.  The cosine table may be computed (once) with 18 Vector instructions.
 1025                       "sv.ffmadds *0, *0, *0, *8"
 ```
 
+<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_dct.py;hb=HEAD>
+
 # Use case: Matrix Multiply
 
 Matrix Multiply of any size (non-power-2) up to a total of 127 operations
@@ -301,7 +300,6 @@ ISA at least one source requires Transposition and often massive rolling
 repetition of data is required.  These 3 instructions may be used as the
 "inner triple-loop kernel" of the usual 6-loop Massive Matrix Multiply.
 
-<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_matrix.py;hb=HEAD>
 ```
   28     def test_sv_remap1(self):
   29         """>>> lst = ["svshape 2, 2, 3, 0, 0",
@@ -310,6 +308,8 @@ repetition of data is required.  These 3 instructions may be used as the
   32                      ]
 ```
 
+<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_matrix.py;hb=HEAD>
+
 # Use case: Parallel Reduction
 
 Parallel (Horizontal) Reduction is often deeply problematic in SIMD and
@@ -317,8 +317,6 @@ Vector ISAs.  Parallel Reduction is Fully Deterministic in Simple-V and
 thus may even usefully be deployed on non-associative and non-commutative
 operations.
 
-<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py;hb=HEAD>
-
 ```
   75     def test_sv_remap2(self):
   76         """>>> lst = ["svshape 7, 0, 0, 7, 0",
@@ -329,6 +327,8 @@ operations.
   81                                          left/right due to subf
 ```
 
+<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py;hb=HEAD>
+
 # Use case: LD/ST-Multi
 
 Context-switching saving and restoring of registers on the stack often
@@ -343,6 +343,68 @@ runtime-configurable LD/ST-Multi is achievable with 2 instructions.
     setvli 64
     sv.ld/sm=EQ *rt,0(ra)
 ```
+\newpage{}
+
+# Use case: Twin-Predication, re-entrant
+
+This example demonstrates two key concepts: firstly Twin-Predication
+(separate source predicate mask from destination predicate mask) and
+that sufficient state is stored within the Vector Context SPR, SVSTATE,
+for full re-entrancy on a Context Switch or function call *even if
+in the middle of executing a loop*.  Also demonstrates that it is
+permissible for a programmer to write **directly** to the SVSTATE
+SPR, and still expect Deterministic Behaviour.
+
+```
+ 292     # checks that we are able to resume in the middle of a VL loop,
+ 293     # after an interrupt, or after the user has updated src/dst step
+ 294     # let's assume the user has prepared src/dst step before running this
+ 295     # vector instruction
+ 296     def test_intpred_reentrant(self):
+ 297         #   reg num        0 1 2 3 4 5 6 7 8 9 10 11 12
+ 298         #   srcstep=1                           v
+ 299         #   src r3=0b0101                    Y  N  Y  N
+ 300         #                                    :     |
+ 301         #                              + - - +     |
+ 302         #                              :   +-------+
+ 303         #                              :   |
+ 304         #   dest ~r3=0b1010          N Y N Y
+ 305         #   dststep=2                    ^
+ 306
+ 307         isa = SVP64Asm(['sv.extsb/sm=r3/dm=~r3 *5, *9'])
+```
+
+<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_predication.py;hb=HEAD>
+
+# Use case: 3D GPU style "Branch Conditional"
+
+(*Note: Specification is ready, Simulator still under development of
+full specification capabilities*)
+This example demonstrates a 2-long Vector Branch-Conditional only
+succeeding if *all* elements in the Vector are successful.  This
+avoids the need for additional instructions that would need to
+perform a Parallel Reduction of a Vector of Condition Register
+tests down to a single value, on which a Scalar Branch-Conditional
+could then be performed.  Full Rationale at
+
+<https://libre-soc.org/openpower/sv/branches/>
+
+```
+  80   def test_sv_branch_cond_all(self):
+  81       for i in [7, 8, 9]:
+  82           lst = SVP64Asm(
+  83               [f"addi 1, 0, {i+1}",  # set r1 to i
+  84                f"addi 2, 0, {i}",  # set r2 to i
+  85               "cmpi cr0, 1, 1, 8",  # compare r1 with 10 and store to cr0
+  86               "cmpi cr1, 1, 2, 8",  # compare r2 with 10 and store to cr1
+  87               "sv.bc/all 12, *1, 0xc",    # bgt 0xc - branch if BOTH
+  88                                      # r1 AND r2 greater 8 to the nop below
+  89               "addi 3, 0, 0x1234",   # if tests fail this shouldn't execute
+  90               "or 0, 0, 0"]          # branch target
+  91               )
+```
+
+<https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=src/openpower/decoder/isa/test_caller_svp64_bc.py;hb=HEAD>
 
 [[!tag opf_rfc]]
 
-- 
2.30.2