ins.src[2] = ins.src[0];
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
- ins.csel_cond = cond;
+ ins.cond = cond;
if (!bit_test_single(dev, &ins, input, true, debug)) {
fprintf(stderr, "FAIL: csel%u.%s\n",
}
}
+static void
+bit_round_helper(struct panfrost_device *dev, uint32_t *input, unsigned sz, bool FMA, enum bit_debug debug)
+{
+ bi_instruction ins = bit_ins(BI_ROUND, 1, nir_type_float, sz);
+
+ for (enum bifrost_roundmode mode = 0; mode <= 3; ++mode) {
+ ins.roundmode = mode;
+
+ if (!bit_test_single(dev, &ins, input, FMA, debug)) {
+ fprintf(stderr, "FAIL: round.%u.%u\n",
+ sz, mode);
+ }
+ }
+}
+
static void
bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
{
}
}
+static void
+bit_select_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
+{
+ unsigned C = 32 / size;
+ bi_instruction ins = bit_ins(BI_SELECT, C, nir_type_uint, 32);
+
+ for (unsigned c = 0; c < C; ++c)
+ ins.src_types[c] = nir_type_uint | size;
+
+ if (size == 8) {
+ /* SCHEDULER: We can only read 3 registers at once. */
+ ins.src[2] = ins.src[0];
+ }
+
+ /* Each argument has swizzle {lo, hi} so 2^C options */
+ unsigned hi = (size == 16) ? 1 : 2;
+
+ for (unsigned add = 0; add < ((size == 16) ? 2 : 1); ++add) {
+ for (unsigned swizzle = 0; swizzle < (1 << C); ++swizzle) {
+ for (unsigned i = 0; i < C; ++i)
+ ins.swizzle[i][0] = ((swizzle >> i) & 1) ? hi : 0;
+
+ if (!bit_test_single(dev, &ins, input, !add, debug)) {
+ fprintf(stderr, "FAIL: select.%u.%u\n",
+ size, swizzle);
+ }
+ }
+ }
+}
+
+static void
+bit_fcmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
+{
+ bi_instruction ins = bit_ins(BI_CMP, 2, nir_type_float, size);
+ ins.dest_type = nir_type_uint | size;
+
+ /* 16-bit has swizzles and abs. 32-bit has abs/neg mods. */
+ unsigned max_mods = (size == 16) ? 64 : (size == 32) ? 16 : 1;
+
+ for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
+ for (unsigned mods = 0; mods < max_mods; ++mods) {
+ ins.cond = cond;
+
+ if (size == 16) {
+ for (unsigned i = 0; i < 2; ++i) {
+ ins.swizzle[i][0] = ((mods >> (i * 2)) & 1) ? 1 : 0;
+ ins.swizzle[i][1] = ((mods >> (i * 2)) & 2) ? 1 : 0;
+ }
+
+ ins.src_abs[0] = (mods & 16) ? true : false;
+ ins.src_abs[1] = (mods & 32) ? true : false;
+ } else if (size == 8) {
+ for (unsigned i = 0; i < 2; ++i) {
+ for (unsigned j = 0; j < 4; ++j)
+ ins.swizzle[i][j] = j;
+ }
+ } else if (size == 32) {
+ ins.src_abs[0] = (mods & 1) ? true : false;
+ ins.src_abs[1] = (mods & 2) ? true : false;
+ ins.src_neg[0] = (mods & 4) ? true : false;
+ ins.src_neg[1] = (mods & 8) ? true : false;
+ }
+
+ if (!bit_test_single(dev, &ins, input, FMA, debug)) {
+ fprintf(stderr, "FAIL: cmp.%s.%u.%u.%u\n",
+ FMA ? "fma" : "add", size, mods, cond);
+ }
+ }
+ }
+}
+
static void
bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
unsigned to_size, unsigned cx, unsigned cy, bool FMA,
}
}
+static void
+bit_bitwise_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
+{
+ bi_instruction ins = bit_ins(BI_BITWISE, 3, nir_type_uint, size);
+
+ /* TODO: shifts */
+ ins.src[2] = BIR_INDEX_ZERO;
+
+ /* Force identity swizzle -- bitwise is not swizzleable */
+ for (unsigned i = 0; i < 2; ++i) {
+ for (unsigned j = 0; j < (32 / size); ++j)
+ ins.swizzle[i][j] = j;
+ }
+
+ for (unsigned op = BI_BITWISE_AND; op <= BI_BITWISE_XOR; ++op) {
+ ins.op.bitwise = op;
+
+ for (unsigned mods = 0; mods < 4; ++mods) {
+ ins.bitwise.src_invert[0] = mods & 1;
+ ins.bitwise.src_invert[1] = mods & 2;
+
+ if (!bit_test_single(dev, &ins, input, true, debug)) {
+ fprintf(stderr, "FAIL: bitwise.%u.%u.%u\n",
+ size, op, mods);
+ }
+ }
+ }
+}
+
void
bit_packing(struct panfrost_device *dev, enum bit_debug debug)
{
bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
+ bit_round_helper(dev, (uint32_t *) input32, sz, true, debug);
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
memcpy(mscale_input, input32, sizeof(input32));
mscale_input[3] = 0x7;
bit_fma_mscale_helper(dev, mscale_input, debug);
+
+ for (unsigned sz = 8; sz <= 16; sz *= 2) {
+ bit_select_helper(dev, (uint32_t *) input32, sz, debug);
+ }
+
+ bit_fcmp_helper(dev, (uint32_t *) input32, 32, debug, true);
+ bit_fcmp_helper(dev, (uint32_t *) input32, 16, debug, true);
+
+ for (unsigned sz = 8; sz <= 32; sz *= 2)
+ bit_bitwise_helper(dev, (uint32_t *) input32, sz, debug);
}