(no commit message)

[libreriscv.git] / openpower / sv / bitmanip.mdwn
diff --git a/openpower/sv/bitmanip.mdwn b/openpower/sv/bitmanip.mdwn

index 428e3b877faa089e7917902abf8f693c3d8d4278..850a11e9515dc91abbcf009c3bc85969120a0a30 100644 (file)
--- a/openpower/sv/bitmanip.mdwn
+++ b/openpower/sv/bitmanip.mdwn
@@ -1,6 +1,6 @@
  [[!tag standards]]
  
-[[!toc levels=2]]
+[[!toc levels=1]]
  
  # Implementation Log
  
@@ -8,7 +8,6 @@
  * grev <https://bugs.libre-soc.org/show_bug.cgi?id=755>
  * GF2^M <https://bugs.libre-soc.org/show_bug.cgi?id=782>
  
-TODO https://www.felixcloutier.com/x86/pclmulqdq
  
  # bitmanipulation
  
@@ -99,13 +98,10 @@ TODO: convert all instructions to use RT and not RS
  | NN | RT | RA  | RB   | im0-4   | im5-7  00 |1 | grevlog |
  | NN |    |     |      |         | .....  01 |0 | crternlog |
  | NN | RT | RA  | RB   | RC      | mode  010 |Rc| bitmask* |
-| NN | RS | RA  | RB   | RC      | 00    011 |0 | gfbmadd |
-| NN | RS | RA  | RB   | RC      | 00    011 |1 | gfbmaddsub |
-| NN | RS | RA  | RB   | RC      | 01    011 |0 | clmadd |
-| NN | RS | RA  | RB   | RC      | 01    011 |1 | clmaddsub |
-| NN | RS | RA  | RB   | RC      | 10    011 |0 | gfpmadd |
-| NN | RS | RA  | RB   | RC      | 10    011 |1 | gfpmaddsub |
-| NN | RS | RA  | RB   | RC      | 11    011 |  | rsvd |
+| NN |    |     |      |         | 00    011 |  | rsvd |
+| NN |    |     |      |         | 01    011 |  | rsvd |
+| NN |    |     |      |         | 10    011 |  | rsvd |
+| NN |    |     |      |         | 11    011 |Rc| setvl |
  | NN | RT | RA  | RB   | sh0-4   | sh5 1 111 |Rc| bmrevi |
  
  ops (note that av avg and abs as well as vec scalar mask
@@ -127,13 +123,13 @@ double check that instructions didn't need 3 inputs.
  | NN | RA | RB  | RC  | 0  |   01  | 0001 110 |Rc| vec sofm |
  | NN | RA | RB  | RC  | 0  |   10  | 0001 110 |Rc| vec sifm |
  | NN | RA | RB  | RC  | 0  |   11  | 0001 110 |Rc| vec cprop |
+| NN | RT | RA  | RB  | 0  |       | 0101 110 |Rc| rsvd |
  | NN | RT | RA  | RB  | 1  | itype | 0101 110 |Rc| xperm |
-| NN | RA | RB  | RC  | 0  | itype | 0101 110 |Rc| av minmax |
-| NN | RA | RB  | RC  | 1  |   00  | 0101 110 |Rc| av abss |
-| NN | RA | RB  | RC  | 1  |   01  | 0101 110 |Rc| av absu|
-| NN | RA | RB  |     | 1  |   10  | 0101 110 |Rc| av avgadd |
-| NN | RA | RB  |     | 1  |   11  | 0101 110 |Rc| rsvd |
-| NN | RA | RB  |     |    |       | 1001 110 |Rc| rsvd |
+| NN | RA | RB  | RC  | 0  | itype | 1001 110 |Rc| av minmax |
+| NN | RA | RB  | RC  | 1  |   00  | 1001 110 |Rc| av abss |
+| NN | RA | RB  | RC  | 1  |   01  | 1001 110 |Rc| av absu|
+| NN | RA | RB  |     | 1  |   10  | 1001 110 |Rc| av avgadd |
+| NN | RA | RB  |     | 1  |   11  | 1001 110 |Rc| rsvd |
  | NN | RA | RB  |     |    |       | 1101 110 |Rc| rsvd |
  | NN | RA | RB  | RC  | 0  | 00    | 0010 110 |Rc| gorc |
  | NN | RA | RB  | sh  | SH | 00    | 1010 110 |Rc| gorci |
@@ -149,7 +145,7 @@ double check that instructions didn't need 3 inputs.
  | NN | RA | RB  | RC  |    | 10    | --10 110 |Rc| rsvd  |
  | NN | RA | RB  | RC  | 0  | 11    | 1110 110 |Rc| clmulr  |
  | NN | RA | RB  | RC  | 1  | 11    | 1110 110 |Rc| clmulh  |
-| NN |    |     |     |    |       | --11 110 |Rc| setvl  |
+| NN |    |     |     |    |       | --11 110 |Rc| rsvd  |
  
  # ternlog bitops
  
@@ -609,7 +605,7 @@ These are operations on polynomials with coefficients in `GF(2)`, with the
  polynomial's coefficients packed into integers with the following algorithm:
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/pack_poly.py" raw="yes"]]
+[[!inline pagenames="gf_reference/pack_poly.py" raw="yes"]]
  ```
  
  ## Carry-less Multiply Instructions
@@ -625,13 +621,13 @@ They are worth adding as their own non-overwrite operations
  ### `clmul` Carry-less Multiply
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/clmul.py" raw="yes"]]
+[[!inline pagenames="gf_reference/clmul.py" raw="yes"]]
  ```
  
  ### `clmulh` Carry-less Multiply High
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/clmulh.py" raw="yes"]]
+[[!inline pagenames="gf_reference/clmulh.py" raw="yes"]]
  ```
  
  ### `clmulr` Carry-less Multiply (Reversed)
@@ -640,7 +636,7 @@ Useful for CRCs. Equivalent to bit-reversing the result of `clmul` on
  bit-reversed inputs.
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/clmulr.py" raw="yes"]]
+[[!inline pagenames="gf_reference/clmulr.py" raw="yes"]]
  ```
  
  ## `clmadd` Carry-less Multiply-Add
@@ -687,7 +683,7 @@ c = (RC)
  for other instructions.
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/cldivrem.py" raw="yes"]]
+[[!inline pagenames="gf_reference/cldivrem.py" raw="yes"]]
  ```
  
  ## `cldiv` Carry-less Division
@@ -744,7 +740,7 @@ making it reducible, making whatever we're working on no longer a Field.
  Therefore, we can reuse the LSB to indicate `degree == XLEN`.
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/decode_reducing_polynomial.py" raw="yes"]]
+[[!inline pagenames="gf_reference/decode_reducing_polynomial.py" raw="yes"]]
  ```
  
  ## `gfbredpoly` -- Set the Reducing Polynomial SPR `GFBREDPOLY`
@@ -752,7 +748,7 @@ Therefore, we can reuse the LSB to indicate `degree == XLEN`.
  unless this is an immediate op, `mtspr` is completely sufficient.
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfbredpoly.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfbredpoly.py" raw="yes"]]
  ```
  
  ## `gfbmul` -- Binary Galois Field `GF(2^m)` Multiplication
@@ -762,7 +758,7 @@ gfbmul RT, RA, RB
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfbmul.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfbmul.py" raw="yes"]]
  ```
  
  ## `gfbmadd` -- Binary Galois Field `GF(2^m)` Multiply-Add
@@ -772,7 +768,7 @@ gfbmadd RT, RA, RB, RC
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfbmadd.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfbmadd.py" raw="yes"]]
  ```
  
  ## `gfbtmadd` -- Binary Galois Field `GF(2^m)` Twin Multiply-Add (for FFT)
@@ -804,7 +800,7 @@ gfbinv RT, RA
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfbinv.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfbinv.py" raw="yes"]]
  ```
  
  # Instructions for Prime Galois Fields `GF(p)`
@@ -818,7 +814,7 @@ gfpadd RT, RA, RB
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpadd.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpadd.py" raw="yes"]]
  ```
  
  the addition happens on infinite-precision integers
@@ -830,7 +826,7 @@ gfpsub RT, RA, RB
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpsub.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpsub.py" raw="yes"]]
  ```
  
  the subtraction happens on infinite-precision integers
@@ -842,7 +838,7 @@ gfpmul RT, RA, RB
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpmul.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpmul.py" raw="yes"]]
  ```
  
  the multiplication happens on infinite-precision integers
@@ -857,7 +853,7 @@ Some potential hardware implementations are found in:
  <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.5233&rep=rep1&type=pdf>
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpinv.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpinv.py" raw="yes"]]
  ```
  
  ## `gfpmadd` Prime Galois Field `GF(p)` Multiply-Add
@@ -867,7 +863,7 @@ gfpmadd RT, RA, RB, RC
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpmadd.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpmadd.py" raw="yes"]]
  ```
  
  the multiplication and addition happens on infinite-precision integers
@@ -879,7 +875,7 @@ gfpmsub RT, RA, RB, RC
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpmsub.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpmsub.py" raw="yes"]]
  ```
  
  the multiplication and subtraction happens on infinite-precision integers
@@ -891,7 +887,7 @@ gfpmsubr RT, RA, RB, RC
  ```
  
  ```python
-[[!inline pagenames="openpower/sv/bitmanip/gfpmsubr.py" raw="yes"]]
+[[!inline pagenames="gf_reference/gfpmsubr.py" raw="yes"]]
  ```
  
  the multiplication and subtraction happens on infinite-precision integers
@@ -982,7 +978,7 @@ RA ← EXTZ64(count)
  
  ## bit deposit
  
-vpdepd VRT,VRA,VRB, identical to RV bitmamip bdep, found already in v3.1 p106
+pdepd VRT,VRA,VRB, identical to RV bitmamip bdep, found already in v3.1 p106
  
      do while(m < 64)
         if VSR[VRB+32].dword[i].bit[63-m]=1 then do
@@ -1007,9 +1003,9 @@ uint_xlen_t bdep(uint_xlen_t RA, uint_xlen_t RB)
  
  ```
  
-# bit extract
+## bit extract
  
-other way round: identical to RV bext, found in v3.1 p196
+other way round: identical to RV bext: pextd, found in v3.1 p196
  
  ```
  uint_xlen_t bext(uint_xlen_t RA, uint_xlen_t RB)
@@ -1025,7 +1021,7 @@ uint_xlen_t bext(uint_xlen_t RA, uint_xlen_t RB)
  }
  ```
  
-# centrifuge
+## centrifuge
  
  found in v3.1 p106 so not to be added here
  
@@ -1044,13 +1040,18 @@ do i = 0 to 63
  RA = result
  ```
  
-# bit to byte permute
+## bit to byte permute
  
  similar to matrix permute in RV bitmanip, which has XOR and OR variants,
-these perform a transpose.
+these perform a transpose. TODO this looks VSX is there a scalar variant
+in v3.0/1 already
  
      do j = 0 to 7
        do k = 0 to 7
           b = VSR[VRB+32].dword[i].byte[k].bit[j]
           VSR[VRT+32].dword[i].byte[j].bit[k] = b
  
+# Appendix
+
+see [[bitmanip/appendix]]
+