(no commit message)

[libreriscv.git] / openpower / sv / bitmanip.mdwn
diff --git a/openpower/sv/bitmanip.mdwn b/openpower/sv/bitmanip.mdwn

index 316d148a35c0b6b34a4df79f544f7a2874bed311..56b0c8cb27d96331ead9aa427966864785ff5bcf 100644 (file)
--- a/openpower/sv/bitmanip.mdwn
+++ b/openpower/sv/bitmanip.mdwn
@@ -4,22 +4,22 @@
  
  minor opcode allocation
  
-    |  28.30 |31| name   |
-    | ------ |--| ------ |
-    |   00   |Rc| ternaryi |
-    |  001   |Rc| ternary |
-    |  011   |Rc| gf*      |
-    |  101   |1 | ternaryv |
+    |  28.30 |31| name      |
+    | ------ |--| --------- |
+    |   00   |Rc| ternaryi  |
+    |  001   |Rc| ternary   |
+    |  010   |Rc| bitmask   |
+    |  011   |Rc| gf*       |
+    |  101   |1 | ternaryv  |
      |  101   |0 | ternarycr |
-    |  110   |1 | 2-op |
-    |  111   |Rc| reserved |
+    |  110   |Rc| 1/2-op    |
+    |  111   |Rc| bitmaski  |
+
  1-op and variants
  
  | dest | src1 | subop | op       |
  | ---- | ---- | ----- | -------- |
  | RT   | RA   | ..    | bmatflip | 
-| RT   | RA   | size  | crc32    | 
-| RT   | RA   | size  | crc32c   | 
  
  2-op and variants
  
@@ -30,11 +30,11 @@ minor opcode allocation
  | RT   | RA   | RB   | bdep  | dep/ext  | 
  | RT   | RA   | RB   | bext  | dep/ext  | 
  | RT   | RA   | RB   |       | grev  |
+| RT   | RA   | RB   |       | clmul*  |
  | RT   | RA   | RB   |       | gorc |  
  | RT   | RA   | RB   | shuf  | shuffle | 
  | RT   | RA   | RB   | unshuf| shuffle | 
  | RT   | RA   | RB   | width | xperm  | 
-| RT   | RA   | RB   | type  | clmul | 
  | RT   | RA   | RB   | type | minmax | 
  | RT   | RA   | RB   |  |  | 
  | RT   | RA   | RB   |  |  | 
@@ -42,7 +42,7 @@ minor opcode allocation
  
  3 ops 
  
-* bitmask swt/extract
+* bitmask set/extract
  * ternary bitops
  * GF
  
@@ -81,6 +81,7 @@ ops
  | NN | RA | RB  | sh  | 00    | 0  | 1110 110 |Rc| gorcwi |
  | NN | RA | RB  | RC  | 00    | 1  | 1110 110 |Rc| bmator  |
  | NN | RA | RB  | RC  | 01    | 0  | 0010 110 |Rc| grev |
+| NN | RA | RB  | RC  | 01    | 1  | 0010 110 |Rc| clmul |
  | NN | RA | RB  | sh  | 01    | SH | 1010 110 |Rc| grevi |
  | NN | RA | RB  | RC  | 01    | 0  | 0110 110 |Rc| grevw |
  | NN | RA | RB  | sh  | 01    | 0  | 1110 110 |Rc| grevwi |
@@ -90,7 +91,8 @@ ops
  | NN | RA | RB  | RC  | 10    | 0  | 0110 110 |Rc| shflw |
  | NN | RA | RB  | RC  | 10    | 0  | 1110 110 |Rc| bdep   |
  | NN | RA | RB  | RC  | 10    | 1  | 1110 110 |Rc| bext  |
-| NN | RA | RB  |     | 11    |    | 1110 110 |Rc| rsvd  |
+| NN | RA | RB  | RC  | 11    | 0  | 1110 110 |Rc| clmulr  |
+| NN | RA | RB  | RC  | 11    | 1  | 1110 110 |Rc| clmulh  |
  | NN | RA | RB  |     |       |    | NN11 110 |Rc| rsvd  |
  
  # bit to byte permute
@@ -153,6 +155,22 @@ signed and unsigned min/max for integer.  this is sort-of partly synthesiseable
  
  signed/unsigned min/max gives more flexibility.
  
+```
+uint_xlen_t min(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return (int_xlen_t)rs1 < (int_xlen_t)rs2 ? rs1 : rs2;
+}
+uint_xlen_t max(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return (int_xlen_t)rs1 > (int_xlen_t)rs2 ? rs1 : rs2;
+}
+uint_xlen_t minu(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return rs1 < rs2 ? rs1 : rs2;
+}
+uint_xlen_t maxu(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return rs1 > rs2 ? rs1 : rs2;
+}
+```
+
+
  # ternary bitops
  
  Similar to FPGA LUTs: for every bit perform a lookup into a table using an 8bit immediate, or in another register
@@ -208,12 +226,16 @@ another mode selection would be CRs not Ints.
  # bitmask set
  
  based on RV bitmanip singlebit set, instruction format similar to shift
-[[isa/fixedshift]].  bmext is actually covered already (shift-with-mask).
-however bitmask-invert is not, and set/clr are not covered, although they can ise the same Shift ALU.
+[[isa/fixedshift]].  bmext is actually covered already (shift-with-mask rldicl but only immediate version).
+however bitmask-invert is not, and set/clr are not covered, although they can use the same Shift ALU.
  
-| 0.5|6.10|11.15|16.20|21.25| 26..30  |31|
-| -- | -- | --- | --- | --- | ------- |--|
-| NN | RT | RA  | RB  | RC  | mode 010 |Rc|
+bmext (RB) version is not the same as rldicl because bmext is a right shift by RC, where rldicl is a left rotate.  for the immediate version this does not matter.
+
+| 0.5|6.10|11.15|16.20|21.25| 26..30  |31| name  |
+| -- | -- | --- | --- | --- | ------- |--| ----- |
+| NN | RT | RA  | RB  | RC  | mode 010 |Rc| bm*   |
+| NN | RT | RA  | RB  | RC  | 0 1  111 |Rc| bmrev |
+| NN |    |     |     |     | 1 1  111 |Rc| rsvd |
  
  ```
  uint_xlen_t bmset(RA, RB, sh)
@@ -245,6 +267,20 @@ uint_xlen_t bmext(RA, RB, sh)
  }
  ```
  
+bitmask extract with reverse
+
+```
+msb = rb[5:0];
+rev[0:msb] = ra[msb:0];
+rt = ZE(rev[msb:0]);
+```
+
+| 0.5|6.10|11.15|16.20|21.26| 27..30  |31| name   |
+| -- | -- | --- | --- | --- | ------- |--| ------ |
+| NN | RT | RA  | RB  | sh  | 0   111 |Rc| bmrevi |
+
+
+
  # grev
  
  based on RV bitmanip
@@ -552,36 +588,6 @@ def gf_invert(a, mod=0x1B) :
    return g1
  ```
  
-# crc
-
-* <https://stackoverflow.com/questions/21171733/calculating-constants-for-crc32-using-pclmulqdq>
-* <https://en.wikipedia.org/wiki/Cyclic_redundancy_check>
-
-```
-uint_xlen_t crc32(uint_xlen_t x, int nbits)
-{
-    for (int i = 0; i < nbits; i++)
-        x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1));
-    return x;
-}
-uint_xlen_t crc32c(uint_xlen_t x, int nbits)
-{
-    for (int i = 0; i < nbits; i++)
-        x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1));
-    return x;
-}
-uint_xlen_t crc32_b(uint_xlen_t RA) { return crc32(RA, 8); }
-uint_xlen_t crc32_h(uint_xlen_t RA) { return crc32(RA, 16); }
-uint_xlen_t crc32_w(uint_xlen_t RA) { return crc32(RA, 32); }
-uint_xlen_t crc32c_b(uint_xlen_t RA) { return crc32c(RA, 8); }
-uint_xlen_t crc32c_h(uint_xlen_t RA) { return crc32c(RA, 16); }
-uint_xlen_t crc32c_w(uint_xlen_t RA) { return crc32c(RA, 32); }
-#if XLEN > 32
-uint_xlen_t crc32_d (uint_xlen_t RA) { return crc32 (RA, 64); }
-uint_xlen_t crc32c_d(uint_xlen_t RA) { return crc32c(RA, 64); }
-#endif
-```
-
  # bitmatrix
  
  ```