based on RV bitmanip.
-RB contains a vector of indices to select parts of RA to be
-copied to RT.
+RA contains a vector of indices to select parts of RB to be
+copied to RT. The immediate-variant allows up to an 8 bit
+pattern (repeated) to be targetted at different parts of RT
```
+uint_xlen_t xpermi(uint8_t imm8, uint_xlen_t RB, int sz_log2)
+{
+ uint_xlen_t r = 0;
+ uint_xlen_t sz = 1LL << sz_log2;
+ uint_xlen_t mask = (1LL << sz) - 1;
+ uint_xlen_t RA = imm8 | imm8<<8 | ... | imm8<<56;
+ for (int i = 0; i < XLEN; i += sz) {
+ uint_xlen_t pos = ((RA >> i) & mask) << sz_log2;
+ if (pos < XLEN)
+ r |= ((RB >> pos) & mask) << i;
+ }
+ return r;
+}
uint_xlen_t xperm(uint_xlen_t RA, uint_xlen_t RB, int sz_log2)
{
uint_xlen_t r = 0;
uint_xlen_t sz = 1LL << sz_log2;
uint_xlen_t mask = (1LL << sz) - 1;
for (int i = 0; i < XLEN; i += sz) {
- uint_xlen_t pos = ((RB >> i) & mask) << sz_log2;
+ uint_xlen_t pos = ((RA >> i) & mask) << sz_log2;
if (pos < XLEN)
- r |= ((RA >> pos) & mask) << i;
+ r |= ((RB >> pos) & mask) << i;
}
return r;
}