x = shfl64(x, 31);
return x;
}
-uint64_t bmatxor(uint64_t RA, uint64_t RB)
-{
+
+uint64_t bmatxori(uint64_t RA, uint64_t RB, uint8_t imm) {
// transpose of RB
uint64_t RBt = bmatflip(RB);
uint8_t u[8]; // rows of RA
}
uint64_t x = 0;
for (int i = 0; i < 64; i++) {
+ uint64_t bit = (imm >> (i%8)) & 1;
if (pcnt(u[i / 8] & v[i % 8]) & 1)
- x |= 1LL << i;
+ x |= bit << i;
}
return x;
}
-uint64_t bmator(uint64_t RA, uint64_t RB)
-{
+
+uint64_t bmatxor(uint64_t RA, uint64_t RB) {
+ return bmatxori(RA, RB, 0xff)
+}
+
+uint64_t bmator(uint64_t RA, uint64_t RB) {
// transpose of RB
uint64_t RBt = bmatflip(RB);
uint8_t u[8]; // rows of RA
}
return x;
}
-uint64_t bmatand(uint64_t RA, uint64_t RB)
-{
+
+uint64_t bmatand(uint64_t RA, uint64_t RB) {
// transpose of RB
uint64_t RBt = bmatflip(RB);
uint8_t u[8]; // rows of RA