based on RV bitmanip, instruction format similar to shift
```
-uint_xlen_t sbset(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t sbset(uint_xlen_t RA, uint_xlen_t RB)
{
- int shamt = rs2 & (XLEN - 1);
- return rs1 | (uint_xlen_t(1) << shamt);
+ int shamt = RB & (XLEN - 1);
+ return RA | (uint_xlen_t(1) << shamt);
}
-uint_xlen_t sbclr(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t sbclr(uint_xlen_t RA, uint_xlen_t RB)
{
- int shamt = rs2 & (XLEN - 1);
- return rs1 & ~(uint_xlen_t(1) << shamt);
+ int shamt = RB & (XLEN - 1);
+ return RA & ~(uint_xlen_t(1) << shamt);
}
-uint_xlen_t sbinv(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t sbinv(uint_xlen_t RA, uint_xlen_t RB)
{
- int shamt = rs2 & (XLEN - 1);
- return rs1 ^ (uint_xlen_t(1) << shamt);
+ int shamt = RB & (XLEN - 1);
+ return RA ^ (uint_xlen_t(1) << shamt);
}
-uint_xlen_t sbext(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t sbext(uint_xlen_t RA, uint_xlen_t RB)
{
- int shamt = rs2 & (XLEN - 1);
- return 1 & (rs1 >> shamt);
+ int shamt = RB & (XLEN - 1);
+ return 1 & (RA >> shamt);
}
```
based on RV bitmanip
```
-uint64_t grev64(uint64_t rs1, uint64_t rs2)
+uint64_t grev64(uint64_t RA, uint64_t RB)
{
- uint64_t x = rs1;
- int shamt = rs2 & 63;
+ uint64_t x = RA;
+ int shamt = RB & 63;
if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) |
((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) |
based on RV bitmanip
```
-uint32_t shfl32(uint32_t rs1, uint32_t rs2)
+uint32_t shfl32(uint32_t RA, uint32_t RB)
{
- uint32_t x = rs1;
- int shamt = rs2 & 15;
+ uint32_t x = RA;
+ int shamt = RB & 15;
if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
return x;
}
-uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
+uint32_t unshfl32(uint32_t RA, uint32_t RB)
{
- uint32_t x = rs1;
- int shamt = rs2 & 15;
+ uint32_t x = RA;
+ int shamt = RB & 15;
if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
x |= ((src << N) & maskL) | ((src >> N) & maskR);
return x;
}
-uint64_t shfl64(uint64_t rs1, uint64_t rs2)
+uint64_t shfl64(uint64_t RA, uint64_t RB)
{
- uint64_t x = rs1;
- int shamt = rs2 & 31;
+ uint64_t x = RA;
+ int shamt = RB & 31;
if (shamt & 16) x = shuffle64_stage(x, 0x0000ffff00000000LL,
0x00000000ffff0000LL, 16);
if (shamt & 8) x = shuffle64_stage(x, 0x00ff000000ff0000LL,
0x2222222222222222LL, 1);
return x;
}
-uint64_t unshfl64(uint64_t rs1, uint64_t rs2)
+uint64_t unshfl64(uint64_t RA, uint64_t RB)
{
- uint64_t x = rs1;
- int shamt = rs2 & 31;
+ uint64_t x = RA;
+ int shamt = RB & 31;
if (shamt & 1) x = shuffle64_stage(x, 0x4444444444444444LL,
0x2222222222222222LL, 1);
if (shamt & 2) x = shuffle64_stage(x, 0x3030303030303030LL,
based on RV bitmanip
```
-uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
+uint_xlen_t xperm(uint_xlen_t RA, uint_xlen_t RB, int sz_log2)
{
uint_xlen_t r = 0;
uint_xlen_t sz = 1LL << sz_log2;
uint_xlen_t mask = (1LL << sz) - 1;
for (int i = 0; i < XLEN; i += sz) {
- uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
+ uint_xlen_t pos = ((RB >> i) & mask) << sz_log2;
if (pos < XLEN)
- r |= ((rs1 >> pos) & mask) << i;
+ r |= ((RA >> pos) & mask) << i;
}
return r;
}
-uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
-uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
-uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
-uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); }
+uint_xlen_t xperm_n (uint_xlen_t RA, uint_xlen_t RB) { return xperm(RA, RB, 2); }
+uint_xlen_t xperm_b (uint_xlen_t RA, uint_xlen_t RB) { return xperm(RA, RB, 3); }
+uint_xlen_t xperm_h (uint_xlen_t RA, uint_xlen_t RB) { return xperm(RA, RB, 4); }
+uint_xlen_t xperm_w (uint_xlen_t RA, uint_xlen_t RB) { return xperm(RA, RB, 5); }
```
# gorc
based on RV bitmanip
```
-uint32_t gorc32(uint32_t rs1, uint32_t rs2)
+uint32_t gorc32(uint32_t RA, uint32_t RB)
{
- uint32_t x = rs1;
- int shamt = rs2 & 31;
+ uint32_t x = RA;
+ int shamt = RB & 31;
if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
return x;
}
-uint64_t gorc64(uint64_t rs1, uint64_t rs2)
+uint64_t gorc64(uint64_t RA, uint64_t RB)
{
- uint64_t x = rs1;
- int shamt = rs2 & 63;
+ uint64_t x = RA;
+ int shamt = RB & 63;
if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) |
((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) |
based on RV bitmanip, covered by ternary bitops
```
-uint_xlen_t cmix(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) {
- return (rs1 & rs2) | (rs3 & ~rs2);
+uint_xlen_t cmix(uint_xlen_t RA, uint_xlen_t RB, uint_xlen_t RC) {
+ return (RA & RB) | (RC & ~RB);
}
```
based on RV bitmanip
```
-uint_xlen_t bext(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t bext(uint_xlen_t RA, uint_xlen_t RB)
{
uint_xlen_t r = 0;
for (int i = 0, j = 0; i < XLEN; i++)
- if ((rs2 >> i) & 1) {
- if ((rs1 >> i) & 1)
+ if ((RB >> i) & 1) {
+ if ((RA >> i) & 1)
r |= uint_xlen_t(1) << j;
j++;
}
return r;
}
-uint_xlen_t bdep(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t bdep(uint_xlen_t RA, uint_xlen_t RB)
{
uint_xlen_t r = 0;
for (int i = 0, j = 0; i < XLEN; i++)
- if ((rs2 >> i) & 1) {
- if ((rs1 >> j) & 1)
+ if ((RB >> i) & 1) {
+ if ((RA >> j) & 1)
r |= uint_xlen_t(1) << i;
j++;
}
based on RV bitmanip
```
-uint_xlen_t clmul(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t clmul(uint_xlen_t RA, uint_xlen_t RB)
{
uint_xlen_t x = 0;
for (int i = 0; i < XLEN; i++)
- if ((rs2 >> i) & 1)
- x ^= rs1 << i;
+ if ((RB >> i) & 1)
+ x ^= RA << i;
return x;
}
-uint_xlen_t clmulh(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t clmulh(uint_xlen_t RA, uint_xlen_t RB)
{
uint_xlen_t x = 0;
for (int i = 1; i < XLEN; i++)
- if ((rs2 >> i) & 1)
- x ^= rs1 >> (XLEN-i);
+ if ((RB >> i) & 1)
+ x ^= RA >> (XLEN-i);
return x;
}
-uint_xlen_t clmulr(uint_xlen_t rs1, uint_xlen_t rs2)
+uint_xlen_t clmulr(uint_xlen_t RA, uint_xlen_t RB)
{
uint_xlen_t x = 0;
for (int i = 0; i < XLEN; i++)
- if ((rs2 >> i) & 1)
- x ^= rs1 >> (XLEN-i-1);
+ if ((RB >> i) & 1)
+ x ^= RA >> (XLEN-i-1);
return x;
}
```
x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1));
return x;
}
-uint_xlen_t crc32_b(uint_xlen_t rs1) { return crc32(rs1, 8); }
-uint_xlen_t crc32_h(uint_xlen_t rs1) { return crc32(rs1, 16); }
-uint_xlen_t crc32_w(uint_xlen_t rs1) { return crc32(rs1, 32); }
-uint_xlen_t crc32c_b(uint_xlen_t rs1) { return crc32c(rs1, 8); }
-uint_xlen_t crc32c_h(uint_xlen_t rs1) { return crc32c(rs1, 16); }
-uint_xlen_t crc32c_w(uint_xlen_t rs1) { return crc32c(rs1, 32); }
+uint_xlen_t crc32_b(uint_xlen_t RA) { return crc32(RA, 8); }
+uint_xlen_t crc32_h(uint_xlen_t RA) { return crc32(RA, 16); }
+uint_xlen_t crc32_w(uint_xlen_t RA) { return crc32(RA, 32); }
+uint_xlen_t crc32c_b(uint_xlen_t RA) { return crc32c(RA, 8); }
+uint_xlen_t crc32c_h(uint_xlen_t RA) { return crc32c(RA, 16); }
+uint_xlen_t crc32c_w(uint_xlen_t RA) { return crc32c(RA, 32); }
#if XLEN > 32
-uint_xlen_t crc32_d (uint_xlen_t rs1) { return crc32 (rs1, 64); }
-uint_xlen_t crc32c_d(uint_xlen_t rs1) { return crc32c(rs1, 64); }
+uint_xlen_t crc32_d (uint_xlen_t RA) { return crc32 (RA, 64); }
+uint_xlen_t crc32c_d(uint_xlen_t RA) { return crc32c(RA, 64); }
#endif
```
# bitmatrix
```
-uint64_t bmatflip(uint64_t rs1)
+uint64_t bmatflip(uint64_t RA)
{
- uint64_t x = rs1;
+ uint64_t x = RA;
x = shfl64(x, 31);
x = shfl64(x, 31);
x = shfl64(x, 31);
return x;
}
-uint64_t bmatxor(uint64_t rs1, uint64_t rs2)
+uint64_t bmatxor(uint64_t RA, uint64_t RB)
{
- // transpose of rs2
- uint64_t rs2t = bmatflip(rs2);
- uint8_t u[8]; // rows of rs1
- uint8_t v[8]; // cols of rs2
+ // transpose of RB
+ uint64_t RBt = bmatflip(RB);
+ uint8_t u[8]; // rows of RA
+ uint8_t v[8]; // cols of RB
for (int i = 0; i < 8; i++) {
- u[i] = rs1 >> (i*8);
- v[i] = rs2t >> (i*8);
+ u[i] = RA >> (i*8);
+ v[i] = RBt >> (i*8);
}
uint64_t x = 0;
for (int i = 0; i < 64; i++) {
}
return x;
}
-uint64_t bmator(uint64_t rs1, uint64_t rs2)
+uint64_t bmator(uint64_t RA, uint64_t RB)
{
- // transpose of rs2
- uint64_t rs2t = bmatflip(rs2);
- uint8_t u[8]; // rows of rs1
- uint8_t v[8]; // cols of rs2
+ // transpose of RB
+ uint64_t RBt = bmatflip(RB);
+ uint8_t u[8]; // rows of RA
+ uint8_t v[8]; // cols of RB
for (int i = 0; i < 8; i++) {
- u[i] = rs1 >> (i*8);
- v[i] = rs2t >> (i*8);
+ u[i] = RA >> (i*8);
+ v[i] = RBt >> (i*8);
}
uint64_t x = 0;
for (int i = 0; i < 64; i++) {