(no commit message)

[libreriscv.git] / simple_v_extension / daxpy_example.mdwn
diff --git a/simple_v_extension/daxpy_example.mdwn b/simple_v_extension/daxpy_example.mdwn

index cc0ee3b7ab43c937fcc674e8f4410919189f774d..9a43ef314e3de1398faada4be93fd4baaf011f42 100644 (file)
--- a/simple_v_extension/daxpy_example.mdwn
+++ b/simple_v_extension/daxpy_example.mdwn
@@ -1,29 +1,37 @@
-    # c code
+# c code
+
+```
      void daxpy(size_t n, double a, const double x[], double y[])
      {
       for (size_t i = 0; i < n; i++) {
         y[i] = a*x[i] + y[i];
       }
      }
+```
  
-    # SV Version
-    # a0 is n, a1 is ptr to x[0], a2 is ptr to y[0], fa0 is a (scalar)
-      VBLK.REG[0] = {type: F, isvec: 1, regkey: a3, regidx: a3, elwidth: dflt}
-      VBLK.REG[1] = {type: F, isvec: 1, regkey: a7, regidx: a7, elwidth: dflt}
-    loop:
-      VBLK.SETVL  t0, a0, #4   # MVL=4, vl = t0 = min(a0, MVL))
-      c.ld     a3, a1          # load 4 registers a3-6 from x
-      c.slli   t1, t0, 3       # t1 = vl * 8 (in bytes)
-      c.ld     a7, a2          # load 4 registers a7-10 from y
-      c.add    a1, a1, t1      # increment pointer to x by vl*8
-      fmadd  a7, a3, fa0, a7   # v1 += v0 * fa0 (y = a * x + y)
-      c.sub    a0, a0, t0      # n -= vl (t0)
-      c.st     a7, a2          # store 4 registers a7-10 to y
-      c.add    a2, a2, t1      # increment pointer to y by vl*8
-      c.bnez   a0, loop        # repeat if n != 0
-      c.ret                    # return
+# SVP64 Power ISA version
+
+```  
+
+    # r5: n
+    # r5: x
+    # r6: y
+    # fp1: a
+    mtctr 5                 # move n to CTR
+    addi r10,r6,0           # copy y-ptr into r10
+.L2
+    setvl MAXVL=32,VL=CTR   # could do more
+    sv.lfdup/els *32,8(6)   # load from x
+    sv.lfdup/els *64,8(7)   # load from y
+    sv.fmadd *64,*64,1,*32  # fmadd
+    stfdup/els *64,8(10)    # store y-copy
+    sv.bc/ctr .L2           # decrement VL by CTR
+    blr                     # return
+```
+
+# RVV version
  
-    # RVV version
+```
      # a0 is n, a1 is pointer to x[0], a2 is pointer to y[0], fa0 is a
        li t0, 2<<25
        vsetdcfg t0             # enable 2 64b Fl.Pt. registers
@@ -39,3 +47,4 @@
        c.add    a2, a2, t1     # increment pointer to y by vl*8
        c.bnez   a0, loop       # repeat if n != 0
        c.ret                   # return
+```