From 5f7e0185cd9e93e19e3bbf9236458234f6448044 Mon Sep 17 00:00:00 2001 From: Italo Nicola Date: Fri, 10 Jul 2020 09:36:58 +0000 Subject: [PATCH] pan/mdg: eliminate references to ins->alu.reg_mode In an effort to simplify MIR by not prepacking instructions, this commit removes references to `ins->alu.reg_mode` so that we can later remove the `ins->alu` field from midgard_instruction. Every place that was using reg_mode was changed to now use the generic `ins->src_type` field instead. We then reconstruct the reg_mode field right before emission. Signed-off-by: Italo Nicola Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 3 +- src/panfrost/midgard/midgard_compile.c | 120 ++++++++++---------- src/panfrost/midgard/midgard_emit.c | 28 +++-- src/panfrost/midgard/midgard_print.c | 3 +- src/panfrost/midgard/mir_promote_uniforms.c | 3 - 5 files changed, 80 insertions(+), 77 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index a35a2ba3fd8..8d7af77a324 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -523,6 +523,8 @@ uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits); void mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask); signed mir_upper_override(midgard_instruction *ins, unsigned inst_size); unsigned mir_components_for_type(nir_alu_type T); +unsigned max_bitsize_for_alu(midgard_instruction *ins); +midgard_reg_mode reg_mode_for_bitsize(unsigned bitsize); /* MIR printing */ @@ -555,7 +557,6 @@ v_mov(unsigned src, unsigned dest) .dest_type = nir_type_uint32, .op = midgard_alu_op_imov, .alu = { - .reg_mode = midgard_reg_mode_32, .outmod = midgard_outmod_int_wrap }, }; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index a43d300d9c1..71191083794 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -706,60 +706,6 @@ nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) op = midgard_alu_op_##_op; \ ALU_CHECK_CMP(sext); \ break; - -/* Analyze the sizes of the dest and inputs to determine reg mode. */ - -static midgard_reg_mode -reg_mode_for_nir(nir_alu_instr *instr) -{ - unsigned src_bitsize = nir_src_bit_size(instr->src[0].src); - unsigned dst_bitsize = nir_dest_bit_size(instr->dest.dest); - unsigned max_bitsize = MAX2(src_bitsize, dst_bitsize); - - /* We don't have fp16 LUTs, so we'll want to emit code like: - * - * vlut.fsinr hr0, hr0 - * - * where both input and output are 16-bit but the operation is carried - * out in 32-bit - */ - - switch (instr->op) { - case nir_op_fsqrt: - case nir_op_frcp: - case nir_op_frsq: - case nir_op_fsin: - case nir_op_fcos: - case nir_op_fexp2: - case nir_op_flog2: - max_bitsize = MAX2(max_bitsize, 32); - break; - - /* These get lowered to moves */ - case nir_op_pack_32_4x8: - max_bitsize = 8; - break; - case nir_op_pack_32_2x16: - max_bitsize = 16; - break; - default: - break; - } - - - switch (max_bitsize) { - /* Use 16 pipe for 8 since we don't support vec16 yet */ - case 8: - case 16: - return midgard_reg_mode_16; - case 32: - return midgard_reg_mode_32; - case 64: - return midgard_reg_mode_64; - default: - unreachable("Invalid bit size"); - } -} /* Compare mir_lower_invert */ static bool @@ -912,10 +858,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) unsigned broadcast_swizzle = 0; - /* What register mode should we operate in? */ - midgard_reg_mode reg_mode = - reg_mode_for_nir(instr); - /* Should we swap arguments? */ bool flip_src12 = false; @@ -1220,7 +1162,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ins.mask = mask_of(nr_components); midgard_vector_alu alu = { - .reg_mode = reg_mode, .outmod = outmod, }; @@ -2443,6 +2384,61 @@ inline_alu_constants(compiler_context *ctx, midgard_block *block) } } +unsigned +max_bitsize_for_alu(midgard_instruction *ins) +{ + unsigned max_bitsize = 0; + for (int i = 0; i < MIR_SRC_COUNT; i++) { + if (ins->src[i] == ~0) continue; + unsigned src_bitsize = nir_alu_type_get_type_size(ins->src_types[i]); + max_bitsize = MAX2(src_bitsize, max_bitsize); + } + unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type); + max_bitsize = MAX2(dst_bitsize, max_bitsize); + + /* We don't have fp16 LUTs, so we'll want to emit code like: + * + * vlut.fsinr hr0, hr0 + * + * where both input and output are 16-bit but the operation is carried + * out in 32-bit + */ + + switch (ins->op) { + case midgard_alu_op_fsqrt: + case midgard_alu_op_frcp: + case midgard_alu_op_frsqrt: + case midgard_alu_op_fsin: + case midgard_alu_op_fcos: + case midgard_alu_op_fexp2: + case midgard_alu_op_flog2: + max_bitsize = MAX2(max_bitsize, 32); + break; + + default: + break; + } + + return max_bitsize; +} + +midgard_reg_mode +reg_mode_for_bitsize(unsigned bitsize) +{ + switch (bitsize) { + /* use 16 pipe for 8 since we don't support vec16 yet */ + case 8: + case 16: + return midgard_reg_mode_16; + case 32: + return midgard_reg_mode_32; + case 64: + return midgard_reg_mode_64; + default: + unreachable("invalid bit size"); + } +} + /* Midgard supports two types of constants, embedded constants (128-bit) and * inline constants (16-bit). Sometimes, especially with scalar ops, embedded * constants can be demoted to inline constants, for space savings and @@ -2458,9 +2454,11 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) /* Blend constants must not be inlined by definition */ if (ins->has_blend_constant) continue; + unsigned max_bitsize = max_bitsize_for_alu(ins); + /* We can inline 32-bit (sometimes) or 16-bit (usually) */ - bool is_16 = ins->alu.reg_mode == midgard_reg_mode_16; - bool is_32 = ins->alu.reg_mode == midgard_reg_mode_32; + bool is_16 = max_bitsize == 16; + bool is_32 = max_bitsize == 32; if (!(is_16 || is_32)) continue; diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index 999ae98feb6..92a962db5e9 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -47,7 +47,7 @@ static unsigned mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar) { bool integer = midgard_is_integer_op(ins->op); - unsigned base_size = (8 << ins->alu.reg_mode); + unsigned base_size = max_bitsize_for_alu(ins); unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]); bool half = (sz == (base_size >> 1)); @@ -172,7 +172,7 @@ mir_pack_mask_alu(midgard_instruction *ins) * override to the lower or upper half, shifting the effective mask in * the latter, so AAAA.... becomes AAAA */ - unsigned inst_size = 8 << ins->alu.reg_mode; + unsigned inst_size = max_bitsize_for_alu(ins); signed upper_shift = mir_upper_override(ins, inst_size); if (upper_shift >= 0) { @@ -184,9 +184,9 @@ mir_pack_mask_alu(midgard_instruction *ins) ins->alu.dest_override = midgard_dest_override_none; } - if (ins->alu.reg_mode == midgard_reg_mode_32) + if (inst_size == 32) ins->alu.mask = expand_writemask(effective, 2); - else if (ins->alu.reg_mode == midgard_reg_mode_64) + else if (inst_size == 64) ins->alu.mask = expand_writemask(effective, 1); else ins->alu.mask = effective; @@ -280,8 +280,7 @@ mir_pack_vector_srcs(midgard_instruction *ins) { bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props); - midgard_reg_mode mode = ins->alu.reg_mode; - unsigned base_size = (8 << mode); + unsigned base_size = max_bitsize_for_alu(ins); for (unsigned i = 0; i < 2; ++i) { if (ins->has_inline_constant && (i == 1)) @@ -297,7 +296,7 @@ mir_pack_vector_srcs(midgard_instruction *ins) assert((sz == base_size) || half); unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i], - ins->src_types[i], ins->alu.reg_mode, + ins->src_types[i], reg_mode_for_bitsize(base_size), channeled, &rep_lo, &rep_hi); midgard_vector_alu_src pack = { @@ -488,6 +487,15 @@ mir_lower_roundmode(midgard_instruction *ins) } } +static midgard_vector_alu +vector_alu_from_instr(midgard_instruction *ins) +{ + midgard_vector_alu alu = ins->alu; + alu.op = ins->op; + alu.reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins)); + return alu; +} + static void emit_alu_bundle(compiler_context *ctx, midgard_bundle *bundle, @@ -532,8 +540,7 @@ emit_alu_bundle(compiler_context *ctx, mir_pack_mask_alu(ins); mir_pack_vector_srcs(ins); size = sizeof(midgard_vector_alu); - source_alu = ins->alu; - source_alu.op = ins->op; + source_alu = vector_alu_from_instr(ins); source = &source_alu; } else if (ins->unit == ALU_ENAB_BR_COMPACT) { size = sizeof(midgard_branch_cond); @@ -543,8 +550,7 @@ emit_alu_bundle(compiler_context *ctx, source = &ins->branch_extended; } else { size = sizeof(midgard_scalar_alu); - source_alu = ins->alu; - source_alu.op = ins->op; + source_alu = vector_alu_from_instr(ins); scalarized = vector_to_scalar_alu(source_alu, ins); source = &scalarized; } diff --git a/src/panfrost/midgard/midgard_print.c b/src/panfrost/midgard/midgard_print.c index 44e21966091..244d19fbe90 100644 --- a/src/panfrost/midgard/midgard_print.c +++ b/src/panfrost/midgard/midgard_print.c @@ -244,6 +244,7 @@ mir_print_embedded_constant(midgard_instruction *ins, unsigned src_idx) src = vector_alu_from_unsigned(ins->alu.src2); unsigned *swizzle = ins->swizzle[src_idx]; + midgard_reg_mode reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins)); unsigned comp_mask = effective_writemask(ins->op, ins->mask); unsigned num_comp = util_bitcount(comp_mask); unsigned max_comp = mir_components_for_type(ins->dest_type); @@ -264,7 +265,7 @@ mir_print_embedded_constant(midgard_instruction *ins, unsigned src_idx) printf(", "); mir_print_constant_component(stdout, &ins->constants, - swizzle[comp], ins->alu.reg_mode, + swizzle[comp], reg_mode, src.half, src.mod, ins->op); } diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 4f86c4aaae0..b209af8b368 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -197,9 +197,6 @@ midgard_promote_uniforms(compiler_context *ctx) mov.dest_type = nir_type_uint | type_size; mov.src_types[0] = mov.dest_type; - if (type_size == 64) - mov.alu.reg_mode = midgard_reg_mode_64; - uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size); mir_set_bytemask(&mov, rounded); mir_insert_instruction_before(ctx, ins, mov); -- 2.30.2