From 1108eaa90de8507d405e7751db83764770eaa931 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 8 May 2020 17:41:49 -0400 Subject: [PATCH] pan/mdg: Streamline dest_override handling We can pass it all off to emit time, and let the types in the IR do the heavylifting in the meantime, which is a lot easier to get right. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 3 +-- src/panfrost/midgard/midgard_compile.c | 35 +++++++------------------ src/panfrost/midgard/midgard_emit.c | 20 +++++++++++--- src/panfrost/midgard/midgard_ra.c | 8 +++--- src/panfrost/midgard/midgard_schedule.c | 1 - src/panfrost/midgard/mir.c | 14 +++++----- 6 files changed, 35 insertions(+), 46 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 751df1b108a..9de53918644 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -500,7 +500,7 @@ uint16_t mir_from_bytemask(uint16_t bytemask, unsigned bits); uint16_t mir_bytemask(midgard_instruction *ins); uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits); void mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask); -unsigned mir_upper_override(midgard_instruction *ins); +signed mir_upper_override(midgard_instruction *ins, unsigned inst_size); /* MIR printing */ @@ -534,7 +534,6 @@ v_mov(unsigned src, unsigned dest) .alu = { .op = midgard_alu_op_imov, .reg_mode = midgard_reg_mode_32, - .dest_override = midgard_dest_override_none, .outmod = midgard_outmod_int_wrap }, }; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 070b6e32c2d..a8a400cfd6d 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -535,20 +535,17 @@ nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) assert(src_bitsize == dst_bitsize); \ break; +#define ALU_CHECK_CMP(sext) \ + assert(src_bitsize == 16 || src_bitsize == 32); \ + assert(dst_bitsize == 16 || dst_bitsize == 32); \ + #define ALU_CASE_BCAST(nir, _op, count) \ case nir_op_##nir: \ op = midgard_alu_op_##_op; \ broadcast_swizzle = count; \ - assert(src_bitsize == dst_bitsize); \ + ALU_CHECK_CMP(true); \ break; -#define ALU_CHECK_CMP(sext) \ - if (src_bitsize == 16 && dst_bitsize == 32) { \ - /* inferred */ \ - } else { \ - assert(src_bitsize == dst_bitsize); \ - } \ - #define ALU_CASE_CMP(nir, _op, sext) \ case nir_op_##nir: \ op = midgard_alu_op_##_op; \ @@ -719,12 +716,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) midgard_reg_mode reg_mode = reg_mode_for_nir(instr); - /* Do we need a destination override? Used for inline - * type conversion */ - - midgard_dest_override dest_override = - midgard_dest_override_none; - /* Should we swap arguments? */ bool flip_src12 = false; @@ -819,19 +810,19 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2); ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3); - ALU_CASE(b32all_fequal4, fball_eq); + ALU_CASE_CMP(b32all_fequal4, fball_eq, true); ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2); ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3); - ALU_CASE(b32any_fnequal4, fbany_neq); + ALU_CASE_CMP(b32any_fnequal4, fbany_neq, true); ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2); ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3); - ALU_CASE(b32all_iequal4, iball_eq); + ALU_CASE_CMP(b32all_iequal4, iball_eq, true); ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2); ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3); - ALU_CASE(b32any_inequal4, ibany_neq); + ALU_CASE_CMP(b32any_inequal4, ibany_neq, true); /* Source mods will be shoved in later */ ALU_CASE(fabs, fmov); @@ -864,13 +855,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) else op = midgard_alu_op_imov; - if (dst_bitsize == (src_bitsize * 2)) { - /* inferred */ - } else if (src_bitsize == (dst_bitsize * 2)) { - /* Converting down */ - dest_override = midgard_dest_override_lower; - } - break; } @@ -1020,7 +1004,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) midgard_vector_alu alu = { .op = op, .reg_mode = reg_mode, - .dest_override = dest_override, .outmod = outmod, }; diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index f26685f6d02..e9a67811aa8 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -171,11 +171,16 @@ mir_pack_mask_alu(midgard_instruction *ins) * override to the lower or upper half, shifting the effective mask in * the latter, so AAAA.... becomes AAAA */ - unsigned upper_shift = mir_upper_override(ins); + unsigned inst_size = 8 << ins->alu.reg_mode; + signed upper_shift = mir_upper_override(ins, inst_size); - if (upper_shift) { + if (upper_shift >= 0) { effective >>= upper_shift; - ins->alu.dest_override = midgard_dest_override_upper; + ins->alu.dest_override = upper_shift ? + midgard_dest_override_upper : + midgard_dest_override_lower; + } else { + ins->alu.dest_override = midgard_dest_override_none; } if (ins->alu.reg_mode == midgard_reg_mode_32) @@ -590,7 +595,13 @@ emit_binary_bundle(compiler_context *ctx, ins->texture.type = bundle->tag; ins->texture.next_type = next_tag; - ins->texture.mask = ins->mask; + + signed override = mir_upper_override(ins, 32); + + ins->texture.mask = override > 0 ? + ins->mask >> override : + ins->mask; + mir_pack_swizzle_tex(ins); unsigned osz = nir_alu_type_get_type_size(ins->dest_type); @@ -600,6 +611,7 @@ emit_binary_bundle(compiler_context *ctx, assert(isz == 32 || isz == 16); ins->texture.out_full = (osz == 32); + ins->texture.out_upper = override > 0; ins->texture.in_reg_full = (isz == 32); ins->texture.sampler_type = midgard_sampler_type(ins->dest_type); diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index e293fb43164..ecc9d727e2b 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -723,16 +723,14 @@ install_registers_instr( struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_size[3]); /* First, install the texture coordinate */ - ins->texture.in_reg_full = 1; - ins->texture.in_reg_upper = 0; ins->texture.in_reg_select = coord.reg & 1; offset_swizzle(ins->swizzle[1], coord.offset, coord.size, dest.size, 0); /* Next, install the destination */ - ins->texture.out_full = 1; - ins->texture.out_upper = 0; ins->texture.out_reg_select = dest.reg & 1; - offset_swizzle(ins->swizzle[0], 0, 4, dest.size, dest.offset); + offset_swizzle(ins->swizzle[0], 0, 4, dest.size, + dest_size == 2 ? dest.offset % 8 : + dest.offset); mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset); /* If there is a register LOD/bias, use it */ diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 51f628f42d0..72eabf34031 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -204,7 +204,6 @@ mir_is_scalar(midgard_instruction *ains) /* Only 16/32-bit can run on a scalar unit */ could_scalar &= ains->alu.reg_mode != midgard_reg_mode_8; could_scalar &= ains->alu.reg_mode != midgard_reg_mode_64; - could_scalar &= ains->alu.dest_override == midgard_dest_override_none; if (ains->src[0] != ~0) could_scalar &= (sz0 == 16) || (sz0 == 32); diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c index 2e0960148eb..b7e2917c465 100644 --- a/src/panfrost/midgard/mir.c +++ b/src/panfrost/midgard/mir.c @@ -202,22 +202,20 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask) /* Checks if we should use an upper destination override, rather than the lower * one in the IR. Returns zero if no, returns the bytes to shift otherwise */ -unsigned -mir_upper_override(midgard_instruction *ins) +signed +mir_upper_override(midgard_instruction *ins, unsigned inst_size) { - /* If there is no override, there is no upper override, tautology */ - if (ins->alu.dest_override == midgard_dest_override_none) - return 0; + unsigned type_size = nir_alu_type_get_type_size(ins->dest_type); - /* Make sure we didn't already lower somehow */ - assert(ins->alu.dest_override == midgard_dest_override_lower); + /* If the sizes are the same, there's nothing to override */ + if (type_size == inst_size) + return -1; /* There are 16 bytes per vector, so there are (16/bytes) * components per vector. So the magic half is half of * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits * */ - unsigned type_size = nir_alu_type_get_type_size(ins->dest_type); unsigned threshold = 64 / type_size; /* How many components did we shift over? */ -- 2.30.2