minor opcode allocation
- | 28.30 |31| name |
- | ------ |--| ------ |
- | 00 |Rc| ternaryi |
- | 001 |Rc| ternary |
- | 011 |Rc| gf* |
- | 101 |1 | ternaryv |
+ | 28.30 |31| name |
+ | ------ |--| --------- |
+ | 00 |Rc| ternaryi |
+ | 001 |Rc| ternary |
+ | 010 |Rc| bitmask |
+ | 011 |Rc| gf* |
+ | 101 |1 | ternaryv |
| 101 |0 | ternarycr |
- | 110 |1 | 2-op |
- | 111 |Rc| reserved |
+ | 110 |Rc| 1/2-op |
+ | 111 |Rc| bitmaski |
+
1-op and variants
| dest | src1 | subop | op |
| ---- | ---- | ----- | -------- |
| RT | RA | .. | bmatflip |
-| RT | RA | size | crc32 |
-| RT | RA | size | crc32c |
2-op and variants
| RT | RA | RB | bdep | dep/ext |
| RT | RA | RB | bext | dep/ext |
| RT | RA | RB | | grev |
+| RT | RA | RB | | clmul* |
| RT | RA | RB | | gorc |
| RT | RA | RB | shuf | shuffle |
| RT | RA | RB | unshuf| shuffle |
| RT | RA | RB | width | xperm |
-| RT | RA | RB | type | clmul |
| RT | RA | RB | type | minmax |
| RT | RA | RB | | |
| RT | RA | RB | | |
3 ops
-* bitmask swt/extract
+* bitmask set/extract
* ternary bitops
* GF
| NN | RA | RB | sh | 00 | 0 | 1110 110 |Rc| gorcwi |
| NN | RA | RB | RC | 00 | 1 | 1110 110 |Rc| bmator |
| NN | RA | RB | RC | 01 | 0 | 0010 110 |Rc| grev |
+| NN | RA | RB | RC | 01 | 1 | 0010 110 |Rc| clmul |
| NN | RA | RB | sh | 01 | SH | 1010 110 |Rc| grevi |
| NN | RA | RB | RC | 01 | 0 | 0110 110 |Rc| grevw |
| NN | RA | RB | sh | 01 | 0 | 1110 110 |Rc| grevwi |
| NN | RA | RB | RC | 10 | 0 | 0110 110 |Rc| shflw |
| NN | RA | RB | RC | 10 | 0 | 1110 110 |Rc| bdep |
| NN | RA | RB | RC | 10 | 1 | 1110 110 |Rc| bext |
-| NN | RA | RB | | 11 | | 1110 110 |Rc| rsvd |
+| NN | RA | RB | RC | 11 | 0 | 1110 110 |Rc| clmulr |
+| NN | RA | RB | RC | 11 | 1 | 1110 110 |Rc| clmulh |
| NN | RA | RB | | | | NN11 110 |Rc| rsvd |
# bit to byte permute
signed/unsigned min/max gives more flexibility.
+```
+uint_xlen_t min(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return (int_xlen_t)rs1 < (int_xlen_t)rs2 ? rs1 : rs2;
+}
+uint_xlen_t max(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return (int_xlen_t)rs1 > (int_xlen_t)rs2 ? rs1 : rs2;
+}
+uint_xlen_t minu(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return rs1 < rs2 ? rs1 : rs2;
+}
+uint_xlen_t maxu(uint_xlen_t rs1, uint_xlen_t rs2)
+{ return rs1 > rs2 ? rs1 : rs2;
+}
+```
+
+
# ternary bitops
Similar to FPGA LUTs: for every bit perform a lookup into a table using an 8bit immediate, or in another register
# bitmask set
based on RV bitmanip singlebit set, instruction format similar to shift
-[[isa/fixedshift]]. bmext is actually covered already (shift-with-mask).
-however bitmask-invert is not, and set/clr are not covered, although they can ise the same Shift ALU.
+[[isa/fixedshift]]. bmext is actually covered already (shift-with-mask rldicl but only immediate version).
+however bitmask-invert is not, and set/clr are not covered, although they can use the same Shift ALU.
-| 0.5|6.10|11.15|16.20|21.25| 26..30 |31|
-| -- | -- | --- | --- | --- | ------- |--|
-| NN | RT | RA | RB | RC | mode 010 |Rc|
+bmext (RB) version is not the same as rldicl because bmext is a right shift by RC, where rldicl is a left rotate. for the immediate version this does not matter.
+
+| 0.5|6.10|11.15|16.20|21.25| 26..30 |31| name |
+| -- | -- | --- | --- | --- | ------- |--| ----- |
+| NN | RT | RA | RB | RC | mode 010 |Rc| bm* |
+| NN | RT | RA | RB | RC | 0 1 111 |Rc| bmrev |
+| NN | | | | | 1 1 111 |Rc| rsvd |
```
uint_xlen_t bmset(RA, RB, sh)
}
```
+bitmask extract with reverse
+
+```
+msb = rb[5:0];
+rev[0:msb] = ra[msb:0];
+rt = ZE(rev[msb:0]);
+```
+
+| 0.5|6.10|11.15|16.20|21.26| 27..30 |31| name |
+| -- | -- | --- | --- | --- | ------- |--| ------ |
+| NN | RT | RA | RB | sh | 0 111 |Rc| bmrevi |
+
+
+
# grev
based on RV bitmanip
return g1
```
-# crc
-
-* <https://stackoverflow.com/questions/21171733/calculating-constants-for-crc32-using-pclmulqdq>
-* <https://en.wikipedia.org/wiki/Cyclic_redundancy_check>
-
-```
-uint_xlen_t crc32(uint_xlen_t x, int nbits)
-{
- for (int i = 0; i < nbits; i++)
- x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1));
- return x;
-}
-uint_xlen_t crc32c(uint_xlen_t x, int nbits)
-{
- for (int i = 0; i < nbits; i++)
- x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1));
- return x;
-}
-uint_xlen_t crc32_b(uint_xlen_t RA) { return crc32(RA, 8); }
-uint_xlen_t crc32_h(uint_xlen_t RA) { return crc32(RA, 16); }
-uint_xlen_t crc32_w(uint_xlen_t RA) { return crc32(RA, 32); }
-uint_xlen_t crc32c_b(uint_xlen_t RA) { return crc32c(RA, 8); }
-uint_xlen_t crc32c_h(uint_xlen_t RA) { return crc32c(RA, 16); }
-uint_xlen_t crc32c_w(uint_xlen_t RA) { return crc32c(RA, 32); }
-#if XLEN > 32
-uint_xlen_t crc32_d (uint_xlen_t RA) { return crc32 (RA, 64); }
-uint_xlen_t crc32c_d(uint_xlen_t RA) { return crc32c(RA, 64); }
-#endif
-```
-
# bitmatrix
```