bifrost: Add support for nir_op_ishl
authorChris Forbes <chrisforbes@google.com>
Sun, 26 Jul 2020 19:18:54 +0000 (12:18 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 28 Jul 2020 01:13:09 +0000 (01:13 +0000)
Bifrost's bitwise ops include the shift capability. Previously we had
hardcoded the shift to zero in all cases.

There's room in future to emit slightly better code if a shift and a
bitwise operation can be folded together, but not going after that for
now.

This change also removes the separate BI_SHIFT instruction class as
BI_BITWISE can cover both cases.

Signed-off-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6091>

src/panfrost/bifrost/bi_pack.c
src/panfrost/bifrost/bi_print.c
src/panfrost/bifrost/bi_tables.c
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/bifrost/compiler.h

index 13d3808cedaf2679ee736ae7550a1dddc473954e..99a155fd56acc4798932ae1ca603a1f08338c708 100644 (file)
@@ -1168,8 +1168,6 @@ bi_pack_fma(bi_clause *clause, bi_bundle bundle, bi_registers *regs)
                 return bi_pack_fma_addmin(bundle.fma, regs);
         case BI_MOV:
                 return bi_pack_fma_1src(bundle.fma, regs, BIFROST_FMA_OP_MOV);
-        case BI_SHIFT:
-                unreachable("Packing todo");
         case BI_SELECT:
                 return bi_pack_fma_select(bundle.fma, regs);
         case BI_ROUND:
@@ -1733,7 +1731,6 @@ bi_pack_add(bi_clause *clause, bi_bundle bundle, bi_registers *regs, gl_shader_s
         case BI_MINMAX:
                 return bi_pack_add_addmin(bundle.add, regs);
         case BI_MOV:
-        case BI_SHIFT:
         case BI_STORE:
                 unreachable("Packing todo");
         case BI_STORE_VAR:
index 7b8164e4f43367d8f7640512676e6697450c25ca..e8fb326271b04c57f53cb519c235d8576d788d28 100644 (file)
@@ -149,7 +149,6 @@ bi_class_name(enum bi_class cl)
         case BI_MINMAX: return "minmax";
         case BI_MOV: return "mov";
         case BI_SELECT: return "select";
-        case BI_SHIFT: return "shift";
         case BI_STORE: return "store";
         case BI_STORE_VAR: return "store_var";
         case BI_SPECIAL: return "special";
index 105c33196ea1d9a82c7625184d91e996ce4fb751..81365937f042718f0e761cf83d1e3384c3757c56 100644 (file)
@@ -49,7 +49,6 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
         [BI_MOV]               = BI_SCHED_ALL,
         [BI_FMOV]               = BI_MODS | BI_SCHED_ALL,
         [BI_REDUCE_FMA]         = BI_SCHED_FMA,
-        [BI_SHIFT]             = BI_SCHED_ALL,
         [BI_STORE]             = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC,
         [BI_STORE_VAR]                 = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC,
         [BI_SPECIAL]           = BI_SCHED_ADD | BI_SCHED_SLOW,
index 2aef98fde6db4c60aca71128a338986c146b3319..3973892d91a49c3c17c13efb53e9f8aa2149ea37 100644 (file)
@@ -516,6 +516,7 @@ bi_class_for_nir_alu(nir_op op)
         case nir_op_ior:
         case nir_op_ixor:
         case nir_op_inot:
+        case nir_op_ishl:
                 return BI_BITWISE;
 
         BI_CASE_CMP(nir_op_flt)
@@ -807,6 +808,16 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                 alu.op.bitwise = BI_BITWISE_OR;
                 alu.bitwise.src_invert[0] = true;
                 alu.src[1] = BIR_INDEX_ZERO;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
+                break;
+        case nir_op_ishl:
+                alu.op.bitwise = BI_BITWISE_OR;
+                /* move src1 to src2 and replace with zero. underlying op is (src0 << src2) | src1 */
+                alu.src[2] = alu.src[1];
+                alu.src_types[2] = alu.src_types[1];
+                alu.src[1] = BIR_INDEX_ZERO;
                 break;
         case nir_op_fmax:
         case nir_op_imax:
@@ -843,12 +854,21 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                 break;
         case nir_op_iand:
                 alu.op.bitwise = BI_BITWISE_AND;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                 break;
         case nir_op_ior:
                 alu.op.bitwise = BI_BITWISE_OR;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                 break;
         case nir_op_ixor:
                 alu.op.bitwise = BI_BITWISE_XOR;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                 break;
         case nir_op_f2i32:
                 alu.roundmode = BIFROST_RTZ;
@@ -889,10 +909,6 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                 bi_fuse_cond(&alu, instr->src[0],
                                 &constants_left, &constant_shift, comps, false);
 #endif
-        } else if (alu.type == BI_BITWISE) {
-                /* Implicit shift argument... at some point we should fold */
-                alu.src[2] = BIR_INDEX_ZERO;
-                alu.src_types[2] = alu.src_types[1];
         }
 
         bi_emit(ctx, alu);
index 95c35609e5ebb6c884bc0becdf6c5bbcbd4045e1..2038a3299d15359b75060225d245fadf37137217 100644 (file)
@@ -70,7 +70,6 @@ enum bi_class {
         BI_MOV,
         BI_REDUCE_FMA,
         BI_SELECT,
-        BI_SHIFT,
         BI_STORE,
         BI_STORE_VAR,
         BI_SPECIAL, /* _FAST on supported GPUs */