(no commit message)

[libreriscv.git] / simple_v_extension / vector_ops.mdwn
diff --git a/simple_v_extension/vector_ops.mdwn b/simple_v_extension/vector_ops.mdwn

index 4d93ab2ed06e99fd7439dceddd46df9cf5f695f9..ce4c370cc1ef5a813b5a3fa8d219c8afe65aecac 100644 (file)
--- a/simple_v_extension/vector_ops.mdwn
+++ b/simple_v_extension/vector_ops.mdwn
@@ -15,6 +15,10 @@ Examples which can require SUBVL include cross product and may in future involve
  ## CORDIC
  
  * SUBVL=2, vd, vs; SUBVL ignored on beta.
+* VL nonzero ok.  beta as scalar ok (applies across all vectors)
+* non vector args vd, vs, or SUBVL!=2 reserved.
+
+6 opcode options (fmt3):
  
  * CORDIC.lin.rot vd, vs, beta
  * CORDIC.cir.rot vd, vs, beta
@@ -35,42 +39,15 @@ CORDIC can also be used for performing DCT.  See
  
  vx, vy = CORDIC(vx, vy, coordinate\_mode, beta)
  
-     int i = 0;
-     int iterations = 0; // Number of times to run the algorithm
-     float arctanTable[iterations]; // in Radians
-     float K = 0.6073; // K
-     float v_x,v_y; // Vector v; x and y components
-
-     for(i=0; i < iterations; i++) {
-        arctanTable[i] = atan(pow(2,-i));
-     }
-
-     float vnew_x;   // To store the new value of x;
-     for(i = 0; i < iterations; i++) {
-         // If beta is negative, we need to do a counter-clockwise rotation:
-         if( beta < 0) {
-            vnew_x = v_x + (v_y*pow(2,-i)); 
-            v_y -= (v_x*pow(2,-i));  
-            beta += arctanTable[i]; 
-         }
-         // If beta is positive, we need to do a clockwise rotation:
-         else {
-            vnew_x = v_x - (v_y*pow(2,-i));
-            v_y += (v_x*pow(2,-i));
-            beta -= arctanTable[i];
-         }
-         v_x = vnew_x;
-     }
-     v_x *= K;
-     v_y *= K;
  
  Links:
  
  * <http://www.myhdl.org/docs/examples/sinecomp/>
+* <https://www.atlantis-press.com/proceedings/jcis2006/232>
  
  ## Vector cross product
  
-SUBVL=2,3,4 all regs
+SUBVL=3, all regs. VL nonzero produces multiple vd results.
  
  * VCROSS vd, vs1, vs1
  
@@ -93,10 +70,19 @@ Pseudocode:
      vec3 p = t3 * t4;
      vec3 cross = t1 * t2 - p;
  
+Assembler:
+
+    fpermute,2130 F4, F1
+    fpermute,1320 F5, F1
+    fpermute,2130 F6, F2
+    fpermute,1320 F7, F2
+    fmul F8, F5, F6
+    fmulsub F3, F4, F7, F8
+
  ## Vector dot product
  
-* SUBVL ignored on rd.  SUBVL=2,3,4 vs1,vs2
-* rd=scalar, SUBVL=1, vs1, vs2=vec
+* SUBVL ignored on rd.  SUBVL=2,3,4 vs1,vs2, if all vectors, multiple results generated. If rd scalar, only first (unpredicated) SUBVector is used.
+* rd=scalar, SUBVL=1 and vs1, vs2=vec will produce one scalar result. Predication allowed on src vectors.
  
  * VDOT rd, vs1, vs2
  
@@ -118,6 +104,16 @@ Pseudocode in c:
          return result;
      }
  
+## Vector Normalisation (not included)
+
+Vector normalisation may be performed through dot product, recip square root and multiplication:
+
+    fdot F3, F1, F1 # vector dot with self
+    rcpsqrta F3, F3
+    fscale,0 F2, F3, F1
+
+Or it may be performed through VLEN (Vector length) and division.
+
  ## Vector length
  
  * rd=scalar, vs1=vec (SUBVL=1)
@@ -131,6 +127,8 @@ The scalar length of a vector:
  
      sqrt(x[0]^2 + x[1]^2 + ...).
  
+One option is for this to be a macro op fusion sequence, with inverse-sqrt also being a second macro op sequence suitable for normalisation.
+
  ## Vector distance
  
  * VDIST rd, vs1, vs2
@@ -168,7 +166,7 @@ Pseudocode:
  * VSLERP vd, vs1, vs2, rs3
  
  Not recommended as it is not commonly used and has several trigonometric
-functions. Also a costly 4 arg operation.
+functions, although CORDIC in vector rotate circular mode is designed for this purpose. Also a costly 4 arg operation.
  
  <https://en.m.wikipedia.org/wiki/Slerp>
  
@@ -317,3 +315,13 @@ Another is to overwrite one of the src registers.
  # Opcode Table
  
  TODO
+
+# Links
+
+* <http://lists.libre-riscv.org/pipermail/libre-riscv-dev/2019-September/002736.html>
+* <http://lists.libre-riscv.org/pipermail/libre-riscv-dev/2019-September/002733.html>
+* <http://bugs.libre-riscv.org/show_bug.cgi?id=142>
+
+Research Papers
+
+* <https://www.researchgate.net/publication/2938554_PLX_FP_An_Efficient_Floating-Point_Instruction_Set_for_3D_Graphics>