From 70072a20e0086ad2d3714216a2b8fb4a97776385 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 26 Oct 2019 14:06:17 -0400 Subject: [PATCH] pan/midgard: Refactor swizzles Rather than having hw-specific swizzles encoded directly in the instructions, have a unified swizzle arary so we can manipulate swizzles generically. Signed-off-by: Alyssa Rosenzweig --- .gitlab-ci/deqp-panfrost-t760-fails.txt | 1 + src/panfrost/midgard/compiler.h | 26 +-- src/panfrost/midgard/helpers.h | 103 ++---------- src/panfrost/midgard/midgard_compile.c | 118 +++++++------ src/panfrost/midgard/midgard_derivatives.c | 15 +- src/panfrost/midgard/midgard_emit.c | 88 +++++++++- src/panfrost/midgard/midgard_opt_copy_prop.c | 5 +- src/panfrost/midgard/midgard_opt_invert.c | 3 +- .../midgard/midgard_opt_perspective.c | 27 +-- src/panfrost/midgard/midgard_print.c | 15 +- src/panfrost/midgard/midgard_ra.c | 38 ++--- src/panfrost/midgard/midgard_schedule.c | 38 ++--- src/panfrost/midgard/mir.c | 156 ++---------------- src/panfrost/midgard/mir_promote_uniforms.c | 11 +- 14 files changed, 259 insertions(+), 385 deletions(-) diff --git a/.gitlab-ci/deqp-panfrost-t760-fails.txt b/.gitlab-ci/deqp-panfrost-t760-fails.txt index 9257b5490ca..83c7712c2ed 100644 --- a/.gitlab-ci/deqp-panfrost-t760-fails.txt +++ b/.gitlab-ci/deqp-panfrost-t760-fails.txt @@ -686,6 +686,7 @@ dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.44 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.47 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.48 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.57 Fail +dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.6 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.60 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.61 Fail dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.64 Fail diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index df930646b31..e2c3d0f8893 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -77,6 +77,9 @@ typedef struct midgard_branch { * emitted before the register allocation pass. */ +#define MIR_SRC_COUNT 3 +#define MIR_VEC_COMPONENTS 16 + typedef struct midgard_instruction { /* Must be first for casting */ struct list_head link; @@ -88,8 +91,8 @@ typedef struct midgard_instruction { unsigned src[3]; unsigned dest; - /* Swizzle for the conditional for a csel/branch */ - unsigned cond_swizzle; + /* vec16 swizzle, unpacked, per source */ + unsigned swizzle[MIR_SRC_COUNT][MIR_VEC_COMPONENTS]; /* Special fields for an ALU instruction */ midgard_reg_info registers; @@ -500,14 +503,12 @@ nir_dest_index(compiler_context *ctx, nir_dest *dst) /* MIR manipulation */ -unsigned mir_get_swizzle(midgard_instruction *ins, unsigned idx); -void mir_set_swizzle(midgard_instruction *ins, unsigned idx, unsigned new); void mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new); void mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new); void mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new); void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsigned new); void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsigned new); -void mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned swizzle); +void mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle); bool mir_single_use(compiler_context *ctx, unsigned value); bool mir_special_index(compiler_context *ctx, unsigned idx); unsigned mir_use_count(compiler_context *ctx, unsigned value); @@ -530,7 +531,6 @@ void mir_print_block(midgard_block *block); void mir_print_shader(compiler_context *ctx); bool mir_nontrivial_source2_mod(midgard_instruction *ins); bool mir_nontrivial_source2_mod_simple(midgard_instruction *ins); -bool mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask); bool mir_nontrivial_outmod(midgard_instruction *ins); void mir_insert_instruction_before_scheduled(compiler_context *ctx, midgard_block *block, midgard_instruction *tag, midgard_instruction ins); @@ -540,21 +540,12 @@ void mir_compute_temp_count(compiler_context *ctx); /* MIR goodies */ -static const midgard_vector_alu_src blank_alu_src = { - .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), -}; - -static const midgard_vector_alu_src blank_alu_src_xxxx = { - .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X), -}; +static const midgard_vector_alu_src blank_alu_src = {}; static const midgard_scalar_alu_src blank_scalar_alu_src = { .full = true }; -/* Used for encoding the unused source of 1-op instructions */ -static const midgard_vector_alu_src zero_alu_src = { 0 }; - /* 'Intrinsic' move for aliasing */ static inline midgard_instruction @@ -564,13 +555,14 @@ v_mov(unsigned src, midgard_vector_alu_src mod, unsigned dest) .type = TAG_ALU_4, .mask = 0xF, .src = { SSA_UNUSED, src, SSA_UNUSED }, + .swizzle = SWIZZLE_IDENTITY, .dest = dest, .alu = { .op = midgard_alu_op_imov, .reg_mode = midgard_reg_mode_32, .dest_override = midgard_dest_override_none, .outmod = midgard_outmod_int_wrap, - .src1 = vector_alu_srco_unsigned(zero_alu_src), + .src1 = vector_alu_srco_unsigned(blank_alu_src), .src2 = vector_alu_srco_unsigned(mod) }, }; diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index 51b01556c3e..b8600118260 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -167,41 +167,21 @@ quadword_size(int tag) #define SSA_REG_FROM_FIXED(reg) ((((reg) & ~1) >> SSA_FIXED_SHIFT) - 1) #define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0) -/* Swizzle support */ - -#define SWIZZLE(A, B, C, D) (((D) << 6) | ((C) << 4) | ((B) << 2) | ((A) << 0)) -#define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3]) #define COMPONENT_X 0x0 #define COMPONENT_Y 0x1 #define COMPONENT_Z 0x2 #define COMPONENT_W 0x3 -#define SWIZZLE_XXXX SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X) -#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X) -#define SWIZZLE_XYZX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_X) -#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W) -#define SWIZZLE_XYXZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_Z) -#define SWIZZLE_XYZZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_Z) -#define SWIZZLE_XXXY SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_Y) -#define SWIZZLE_ZZZW SWIZZLE(COMPONENT_Z, COMPONENT_Z, COMPONENT_Z, COMPONENT_W) -#define SWIZZLE_ZWWW SWIZZLE(COMPONENT_Z, COMPONENT_W, COMPONENT_W, COMPONENT_W) -#define SWIZZLE_WWWW SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, COMPONENT_W) +#define SWIZZLE_IDENTITY { \ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, \ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, \ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } \ +} -static inline unsigned -swizzle_of(unsigned comp) -{ - switch (comp) { - case 1: - return SWIZZLE_XXXX; - case 2: - return SWIZZLE_XYXX; - case 3: - return SWIZZLE_XYZX; - case 4: - return SWIZZLE_XYZW; - default: - unreachable("Invalid component count"); - } +#define SWIZZLE_IDENTITY_4 { \ + { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ + { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ + { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ } static inline unsigned @@ -210,7 +190,6 @@ mask_of(unsigned nr_comp) return (1 << nr_comp) - 1; } - /* See ISA notes */ #define LDST_NOP (3) @@ -298,44 +277,26 @@ vector_alu_from_unsigned(unsigned u) return s; } -/* Composes two swizzles */ -static inline unsigned -pan_compose_swizzle(unsigned left, unsigned right) +static inline void +mir_compose_swizzle(unsigned *left, unsigned *right, unsigned *final_out) { - unsigned out = 0; + unsigned out[16]; - for (unsigned c = 0; c < 4; ++c) { - unsigned s = (left >> (2*c)) & 0x3; - unsigned q = (right >> (2*s)) & 0x3; + for (unsigned c = 0; c < 16; ++c) + out[c] = right[left[c]]; - out |= (q << (2*c)); - } - - return out; -} - -/* Applies a swizzle to an ALU source */ - -static inline unsigned -vector_alu_apply_swizzle(unsigned src, unsigned swizzle) -{ - midgard_vector_alu_src s = - vector_alu_from_unsigned(src); - - s.swizzle = pan_compose_swizzle(s.swizzle, swizzle); - - return vector_alu_srco_unsigned(s); + memcpy(final_out, out, sizeof(out)); } /* Checks for an xyzw.. swizzle, given a mask */ static inline bool -mir_is_simple_swizzle(unsigned swizzle, unsigned mask) +mir_is_simple_swizzle(unsigned *swizzle, unsigned mask) { for (unsigned i = 0; i < 16; ++i) { if (!(mask & (1 << i))) continue; - if (((swizzle >> (2 * i)) & 0x3) != i) + if (swizzle[i] != i) return false; } @@ -378,34 +339,4 @@ midgard_ldst_pack(midgard_ldst_register_select sel) return packed; } -/* Gets a swizzle like yyyy and returns y */ - -static inline unsigned -swizzle_to_component(unsigned swizzle) -{ - unsigned c = swizzle & 3; - assert(((swizzle >> 2) & 3) == c); - assert(((swizzle >> 4) & 3) == c); - assert(((swizzle >> 6) & 3) == c); - return c; -} - - -static inline unsigned -component_to_swizzle(unsigned c, unsigned count) -{ - switch (count) { - case 1: - return SWIZZLE(c, c, c, c); - case 2: - return SWIZZLE(c, c + 1, c + 1, c + 1); - case 3: - return SWIZZLE(c, c + 1, c + 2, c + 2); - case 4: - return SWIZZLE(c, c + 1, c + 2, c + 3); - default: - unreachable("Invalid component count"); - } -} - #endif diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 73a905fee01..31cb12f5e27 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -133,9 +133,9 @@ schedule_barrier(compiler_context *ctx) .mask = 0xF, \ .dest = ~0, \ .src = { ~0, ~0, ~0 }, \ + .swizzle = SWIZZLE_IDENTITY_4, \ .load_store = { \ .op = midgard_op_##name, \ - .swizzle = SWIZZLE_XYZW, \ .address = address \ } \ }; \ @@ -160,9 +160,9 @@ vector_alu_modifiers(nir_alu_src *src, bool is_int, unsigned broadcast_count, { if (!src) return blank_alu_src; - /* Figure out how many components there are so we can adjust the - * swizzle. Specifically we want to broadcast the last channel so - * things like ball2/3 work + /* Figure out how many components there are so we can adjust. + * Specifically we want to broadcast the last channel so things like + * ball2/3 work. */ if (broadcast_count) { @@ -176,8 +176,7 @@ vector_alu_modifiers(nir_alu_src *src, bool is_int, unsigned broadcast_count, midgard_vector_alu_src alu_src = { .rep_low = 0, .rep_high = 0, - .half = half, - .swizzle = SWIZZLE_FROM_ARRAY(src->swizzle) + .half = half }; if (is_int) { @@ -959,6 +958,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) unsigned src0 = nir_alu_src_index(ctx, &instr->src[0]); unsigned src1 = nr_inputs >= 2 ? nir_alu_src_index(ctx, &instr->src[1]) : ~0; unsigned src2 = nr_inputs == 3 ? nir_alu_src_index(ctx, &instr->src[2]) : ~0; + assert(nr_inputs <= 3); /* Rather than use the instruction generation helpers, we do it * ourselves here to avoid the mess */ @@ -1013,17 +1013,29 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[1], is_int, broadcast_swizzle, half_2, sext_2)), }; - if (nr_inputs == 3) { - ins.cond_swizzle = SWIZZLE_FROM_ARRAY(nirmods[2]->swizzle); - assert(!nirmods[2]->abs); - assert(!nirmods[2]->negate); - } - /* Apply writemask if non-SSA, keeping in mind that we can't write to components that don't exist */ if (!is_ssa) ins.mask &= instr->dest.write_mask; + for (unsigned m = 0; m < 3; ++m) { + if (!nirmods[m]) + continue; + + for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) + ins.swizzle[m][c] = nirmods[m]->swizzle[c]; + + /* Replicate. TODO: remove when vec16 lands */ + for (unsigned c = NIR_MAX_VEC_COMPONENTS; c < MIR_VEC_COMPONENTS; ++c) + ins.swizzle[m][c] = nirmods[m]->swizzle[NIR_MAX_VEC_COMPONENTS - 1]; + } + + if (nr_inputs == 3) { + /* Conditions can't have mods */ + assert(!nirmods[2]->abs); + assert(!nirmods[2]->negate); + } + ins.alu = alu; /* Late fixup for emulated instructions */ @@ -1045,14 +1057,18 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ins.constants[0] = 1; } - ins.alu.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx); + + for (unsigned c = 0; c < 16; ++c) + ins.swizzle[1][c] = 0; } else if (nr_inputs == 1 && !quirk_flipped_r24) { /* Lots of instructions need a 0 plonked in */ ins.has_inline_constant = false; ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); ins.has_constants = true; ins.constants[0] = 0; - ins.alu.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx); + + for (unsigned c = 0; c < 16; ++c) + ins.swizzle[1][c] = 0; } else if (instr->op == nir_op_inot) { ins.invert = true; } @@ -1062,8 +1078,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) * instructions can only operate as if they were scalars. Lower * them here by changing the component. */ - uint8_t original_swizzle[4]; - memcpy(original_swizzle, nirmods[0]->swizzle, sizeof(nirmods[0]->swizzle)); unsigned orig_mask = ins.mask; for (int i = 0; i < nr_components; ++i) { @@ -1076,10 +1090,9 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) if (!ins.mask) continue; - for (int j = 0; j < 4; ++j) - nirmods[0]->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */ + for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j) + ins.swizzle[0][j] = nirmods[0]->swizzle[i]; /* Pull from the correct component */ - ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0], is_int, broadcast_swizzle, half_1, false)); emit_mir_instruction(ctx, ins); } } else { @@ -1209,7 +1222,9 @@ emit_varying_read( midgard_instruction ins = m_ld_vary_32(dest, offset); ins.mask = mask_of(nr_comp); - ins.load_store.swizzle = SWIZZLE_XYZW >> (2 * component); + + for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) + ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W); midgard_varying_parameter p = { .is_varying = 1, @@ -1510,7 +1525,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) midgard_instruction st = m_st_vary_32(reg, offset); st.load_store.arg_1 = 0x9E; st.load_store.arg_2 = 0x1E; - st.load_store.swizzle = swizzle_of(nr_comp) << (2*component); + + for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle[0]); ++i) + st.swizzle[0][i] = MIN2(i + component, nr_comp); + emit_mir_instruction(ctx, st); } else { DBG("Unknown store\n"); @@ -1650,13 +1668,12 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, .mask = 0xF, .dest = nir_dest_index(ctx, &instr->dest), .src = { ~0, ~0, ~0 }, + .swizzle = SWIZZLE_IDENTITY_4, .texture = { .op = midgard_texop, .format = midgard_tex_format(instr->sampler_dim), .texture_handle = texture_index, .sampler_handle = sampler_index, - .swizzle = SWIZZLE_XYZW, - .in_reg_swizzle = SWIZZLE_XYZW, /* TODO: half */ .in_reg_full = 1, @@ -1668,7 +1685,6 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, for (unsigned i = 0; i < instr->num_srcs; ++i) { int index = nir_src_index(ctx, &instr->src[i].src); - midgard_vector_alu_src alu_src = blank_alu_src; unsigned nr_components = nir_src_num_components(instr->src[i].src); switch (instr->src[i].src_type) { @@ -1711,22 +1727,26 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, ld.src[1] = index; ld.mask = 0x3; /* xy */ ld.load_store.arg_1 = 0x20; - ld.load_store.swizzle = alu_src.swizzle; + ld.swizzle[1][3] = COMPONENT_X; emit_mir_instruction(ctx, ld); - ins.src[0] = temp; - ins.texture.in_reg_swizzle = SWIZZLE_XYXX; + ins.src[1] = temp; + /* xyzw -> xyxx */ + ins.swizzle[1][2] = COMPONENT_X; + ins.swizzle[1][3] = COMPONENT_X; } else { - ins.src[0] = index; + ins.src[1] = index; } if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) { /* Array component in w but NIR wants it in z */ - if (nr_components == 3) - ins.texture.in_reg_swizzle = SWIZZLE_XYZZ; - else if (nr_components == 2) - ins.texture.in_reg_swizzle = SWIZZLE_XYXX; - else + if (nr_components == 3) { + ins.swizzle[1][2] = COMPONENT_Z; + ins.swizzle[1][3] = COMPONENT_Z; + } else if (nr_components == 2) { + ins.swizzle[1][2] = COMPONENT_X; + ins.swizzle[1][3] = COMPONENT_X; + } else unreachable("Invalid texture 2D components"); } @@ -1742,7 +1762,7 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, break; ins.texture.lod_register = true; - ins.src[1] = index; + ins.src[2] = index; emit_explicit_constant(ctx, index, index); break; @@ -1982,19 +2002,8 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) break; } - if (flip) { - /* Flip the SSA numbers */ - ins->src[0] = ins->src[1]; - ins->src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); - - /* And flip the modifiers */ - - unsigned src_temp; - - src_temp = ins->alu.src2; - ins->alu.src2 = ins->alu.src1; - ins->alu.src1 = src_temp; - } + if (flip) + mir_flip(ins); } if (ins->src[1] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { @@ -2005,8 +2014,10 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) midgard_vector_alu_src *m = (midgard_vector_alu_src *) &q; src = m; - /* Component is from the swizzle, e.g. r26.w -> w component. TODO: What if x is masked out? */ - int component = src->swizzle & 3; + /* Component is from the swizzle. Take a nonzero component */ + assert(ins->mask); + unsigned first_comp = ffs(ins->mask) - 1; + unsigned component = ins->swizzle[1][first_comp]; /* Scale constant appropriately, if we can legally */ uint16_t scaled_constant = 0; @@ -2043,9 +2054,8 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) continue; } - /* Make sure that the constant is not itself a - * vector by checking if all accessed values - * (by the swizzle) are the same. */ + /* Make sure that the constant is not itself a vector + * by checking if all accessed values are the same. */ uint32_t *cons = ins->constants; uint32_t value = cons[component]; @@ -2053,12 +2063,12 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) bool is_vector = false; unsigned mask = effective_writemask(&ins->alu, ins->mask); - for (int c = 1; c < 4; ++c) { + for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) { /* We only care if this component is actually used */ if (!(mask & (1 << c))) continue; - uint32_t test = cons[(src->swizzle >> (2 * c)) & 3]; + uint32_t test = cons[ins->swizzle[1][c]]; if (test != value) { is_vector = true; diff --git a/src/panfrost/midgard/midgard_derivatives.c b/src/panfrost/midgard/midgard_derivatives.c index bfeae5077fc..ad12293dd33 100644 --- a/src/panfrost/midgard/midgard_derivatives.c +++ b/src/panfrost/midgard/midgard_derivatives.c @@ -99,15 +99,15 @@ midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr) .texture = { .op = mir_derivative_op(instr->op), .format = MALI_TEX_2D, - .swizzle = SWIZZLE_XYXX, - .in_reg_swizzle = SWIZZLE_XYXX, - .in_reg_full = 1, .out_full = 1, .sampler_type = MALI_SAMPLER_FLOAT, } }; + ins.swizzle[0][2] = ins.swizzle[0][3] = COMPONENT_X; + ins.swizzle[1][2] = ins.swizzle[1][3] = COMPONENT_X; + if (!instr->dest.dest.is_ssa) ins.mask &= instr->dest.write_mask; @@ -142,10 +142,11 @@ midgard_lower_derivatives(compiler_context *ctx, midgard_block *block) dup.mask &= 0b1100; /* Fixup swizzles */ - assert(ins->texture.swizzle == SWIZZLE_XYXX); - assert(ins->texture.in_reg_swizzle == SWIZZLE_XYXX); - dup.texture.swizzle = SWIZZLE_XXXY; - dup.texture.in_reg_swizzle = SWIZZLE_ZWWW; + dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X; + dup.swizzle[0][3] = COMPONENT_Y; + + dup.swizzle[1][0] = COMPONENT_Z; + dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W; /* Insert the new instruction */ mir_insert_instruction_before(ctx, mir_next_op(ins), dup); diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index 4f6ac05ab65..f3f38c418d4 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -42,15 +42,13 @@ component_from_mask(unsigned mask) static unsigned vector_to_scalar_source(unsigned u, bool is_int, bool is_full, - unsigned masked_component) + unsigned component) { midgard_vector_alu_src v; memcpy(&v, &u, sizeof(v)); /* TODO: Integers */ - unsigned component = (v.swizzle >> (2*masked_component)) & 3; - midgard_scalar_alu_src s = { 0 }; if (is_full) { @@ -68,10 +66,8 @@ vector_to_scalar_source(unsigned u, bool is_int, bool is_full, if (s.full) s.component = component << 1; - else { - bool upper = false; /* TODO */ - s.component = component + (upper << 2); - } + else + s.component = component; if (is_int) { /* TODO */ @@ -98,8 +94,8 @@ vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins) /* The output component is from the mask */ midgard_scalar_alu s = { .op = v.op, - .src1 = vector_to_scalar_source(v.src1, is_int, is_full, comp), - .src2 = !is_inline_constant ? vector_to_scalar_source(v.src2, is_int, is_full, comp) : 0, + .src1 = vector_to_scalar_source(v.src1, is_int, is_full, ins->swizzle[0][comp]), + .src2 = !is_inline_constant ? vector_to_scalar_source(v.src2, is_int, is_full, ins->swizzle[1][comp]) : 0, .unknown = 0, .outmod = v.outmod, .output_full = is_full, @@ -129,6 +125,76 @@ vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins) return s; } +static void +mir_pack_swizzle_alu(midgard_instruction *ins) +{ + midgard_vector_alu_src src[] = { + vector_alu_from_unsigned(ins->alu.src1), + vector_alu_from_unsigned(ins->alu.src2) + }; + + for (unsigned i = 0; i < 2; ++i) { + unsigned packed = 0; + + /* TODO: non-32-bit, non-vec4 */ + for (unsigned c = 0; c < 4; ++c) { + unsigned v = ins->swizzle[i][c]; + + /* Check vec4 */ + assert(v <= 3); + + packed |= v << (2 * c); + } + + src[i].swizzle = packed; + } + + ins->alu.src1 = vector_alu_srco_unsigned(src[0]); + + if (!ins->has_inline_constant) + ins->alu.src2 = vector_alu_srco_unsigned(src[1]); +} + +static void +mir_pack_swizzle_ldst(midgard_instruction *ins) +{ + /* TODO: non-32-bit, non-vec4 */ + for (unsigned c = 0; c < 4; ++c) { + unsigned v = ins->swizzle[0][c]; + + /* Check vec4 */ + assert(v <= 3); + + ins->load_store.swizzle |= v << (2 * c); + } + + /* TODO: arg_1/2 */ +} + +static void +mir_pack_swizzle_tex(midgard_instruction *ins) +{ + for (unsigned i = 0; i < 2; ++i) { + unsigned packed = 0; + + for (unsigned c = 0; c < 4; ++c) { + unsigned v = ins->swizzle[i][c]; + + /* Check vec4 */ + assert(v <= 3); + + packed |= v << (2 * c); + } + + if (i == 0) + ins->texture.swizzle = packed; + else + ins->texture.in_reg_swizzle = packed; + } + + /* TODO: bias component */ +} + static void emit_alu_bundle(compiler_context *ctx, midgard_bundle *bundle, @@ -168,6 +234,7 @@ emit_alu_bundle(compiler_context *ctx, else ins->alu.mask = ins->mask; + mir_pack_swizzle_alu(ins); size = sizeof(midgard_vector_alu); source = &ins->alu; } else if (ins->unit == ALU_ENAB_BR_COMPACT) { @@ -226,6 +293,8 @@ emit_binary_bundle(compiler_context *ctx, for (unsigned i = 0; i < bundle->instruction_count; ++i) { bundle->instructions[i]->load_store.mask = bundle->instructions[i]->mask; + + mir_pack_swizzle_ldst(bundle->instructions[i]); } memcpy(¤t64, &bundle->instructions[0]->load_store, sizeof(current64)); @@ -256,6 +325,7 @@ emit_binary_bundle(compiler_context *ctx, ins->texture.type = bundle->tag; ins->texture.next_type = next_tag; ins->texture.mask = ins->mask; + mir_pack_swizzle_tex(ins); ctx->texture_op_count--; diff --git a/src/panfrost/midgard/midgard_opt_copy_prop.c b/src/panfrost/midgard/midgard_opt_copy_prop.c index ad3a96d3c5d..7c483b212e5 100644 --- a/src/panfrost/midgard/midgard_opt_copy_prop.c +++ b/src/panfrost/midgard/midgard_opt_copy_prop.c @@ -85,10 +85,7 @@ midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) continue; /* We're clear -- rewrite, composing the swizzle */ - midgard_vector_alu_src src2 = - vector_alu_from_unsigned(ins->alu.src2); - - mir_rewrite_index_src_swizzle(ctx, to, from, src2.swizzle); + mir_rewrite_index_src_swizzle(ctx, to, from, ins->swizzle[1]); mir_remove_instruction(ins); progress |= true; } diff --git a/src/panfrost/midgard/midgard_opt_invert.c b/src/panfrost/midgard/midgard_opt_invert.c index 3a4c455877c..ae91b2940aa 100644 --- a/src/panfrost/midgard/midgard_opt_invert.c +++ b/src/panfrost/midgard/midgard_opt_invert.c @@ -41,6 +41,7 @@ midgard_lower_invert(compiler_context *ctx, midgard_block *block) .type = TAG_ALU_4, .mask = ins->mask, .src = { temp, ~0, ~0 }, + .swizzle = SWIZZLE_IDENTITY, .dest = ins->dest, .has_inline_constant = true, .alu = { @@ -50,7 +51,7 @@ midgard_lower_invert(compiler_context *ctx, midgard_block *block) .dest_override = midgard_dest_override_none, .outmod = midgard_outmod_int_wrap, .src1 = vector_alu_srco_unsigned(blank_alu_src), - .src2 = vector_alu_srco_unsigned(zero_alu_src) + .src2 = vector_alu_srco_unsigned(blank_alu_src) }, }; diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c index aa4c58c470c..219cc93557b 100644 --- a/src/panfrost/midgard/midgard_opt_perspective.c +++ b/src/panfrost/midgard/midgard_opt_perspective.c @@ -37,6 +37,16 @@ #include "compiler.h" +static bool +is_swizzle_0(unsigned *swizzle) +{ + for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) + if (swizzle[c]) + return false; + + return true; +} + bool midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) { @@ -51,14 +61,8 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) /* Check the swizzles */ - midgard_vector_alu_src src1 = - vector_alu_from_unsigned(ins->alu.src1); - - midgard_vector_alu_src src2 = - vector_alu_from_unsigned(ins->alu.src2); - - if (!mir_is_simple_swizzle(src1.swizzle, ins->mask)) continue; - if (src2.swizzle != SWIZZLE_XXXX) continue; + if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue; + if (!is_swizzle_0(ins->swizzle[1])) continue; /* Awesome, we're the right form. Now check where src2 is from */ unsigned frcp = ins->src[1]; @@ -74,10 +78,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) mir_foreach_instr_in_block_safe(block, sub) { if (sub->dest != frcp) continue; - midgard_vector_alu_src s = - vector_alu_from_unsigned(sub->alu.src1); - - frcp_component = s.swizzle & 3; + frcp_component = sub->swizzle[0][0]; frcp_from = sub->src[0]; frcp_found = @@ -116,11 +117,11 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) .mask = ins->mask, .dest = to, .src = { frcp_from, ~0, ~0 }, + .swizzle = SWIZZLE_IDENTITY_4, .load_store = { .op = frcp_component == COMPONENT_W ? midgard_op_ldst_perspective_division_w : midgard_op_ldst_perspective_division_z, - .swizzle = SWIZZLE_XYZW, .arg_1 = 0x20 } }; diff --git a/src/panfrost/midgard/midgard_print.c b/src/panfrost/midgard/midgard_print.c index 8eedcda5315..44fd888d584 100644 --- a/src/panfrost/midgard/midgard_print.c +++ b/src/panfrost/midgard/midgard_print.c @@ -66,6 +66,15 @@ mir_print_mask(unsigned mask) } } +static void +mir_print_swizzle(unsigned *swizzle) +{ + printf("."); + + for (unsigned i = 0; i < 16; ++i) + putchar(components[swizzle[i]]); +} + static const char * mir_get_unit(unsigned unit) { @@ -143,15 +152,19 @@ mir_print_instruction(midgard_instruction *ins) printf(", "); mir_print_index(ins->src[0]); + mir_print_swizzle(ins->swizzle[0]); printf(", "); if (ins->has_inline_constant) printf("#%d", ins->inline_constant); - else + else { mir_print_index(ins->src[1]); + mir_print_swizzle(ins->swizzle[1]); + } printf(", "); mir_print_index(ins->src[2]); + mir_print_swizzle(ins->swizzle[2]); if (ins->has_constants) { uint32_t *uc = ins->constants; diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 3c7be0d1385..4a5de426735 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -73,12 +73,12 @@ struct phys_reg { }; /* Shift each component up by reg_offset and shift all components horizontally - * by dst_offset. TODO: Generalize to !32-bit */ + * by dst_offset. TODO: vec8+ */ -static unsigned -offset_swizzle(unsigned swizzle, unsigned reg_offset, unsigned srcsize, unsigned dst_offset, unsigned dstsize) +static void +offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcsize, unsigned dst_offset, unsigned dstsize) { - unsigned out = 0; + unsigned out[MIR_VEC_COMPONENTS]; signed reg_comp = reg_offset / srcsize; signed dst_comp = dst_offset / dstsize; @@ -86,13 +86,12 @@ offset_swizzle(unsigned swizzle, unsigned reg_offset, unsigned srcsize, unsigned assert(reg_comp * srcsize == reg_offset); assert(dst_comp * dstsize == dst_offset); - for (signed c = 0; c < 4; ++c) { + for (signed c = 0; c < MIR_VEC_COMPONENTS; ++c) { signed comp = MAX2(c - dst_comp, 0); - signed s = (swizzle >> (2*comp)) & 0x3; - out |= (MIN2(s + reg_comp, 3) << (2*c)); + out[c] = MIN2(swizzle[comp] + reg_comp, 4 - 1); } - return out; + memcpy(swizzle, out, sizeof(out)); } /* Helper to return the default phys_reg for a given register */ @@ -688,10 +687,7 @@ install_registers_instr( GET_CHANNEL_COUNT(alu_opcode_props[ins->alu.op].props) ? 0 : dest.offset; - midgard_vector_alu_src mod1 = - vector_alu_from_unsigned(ins->alu.src1); - mod1.swizzle = offset_swizzle(mod1.swizzle, src1.offset, src1.size, dest_offset, dest.size); - ins->alu.src1 = vector_alu_srco_unsigned(mod1); + offset_swizzle(ins->swizzle[0], src1.offset, src1.size, dest_offset, dest.size); ins->registers.src1_reg = src1.reg; @@ -711,7 +707,7 @@ install_registers_instr( } else { midgard_vector_alu_src mod2 = vector_alu_from_unsigned(ins->alu.src2); - mod2.swizzle = offset_swizzle(mod2.swizzle, src2.offset, src2.size, dest_offset, dest.size); + offset_swizzle(ins->swizzle[1], src2.offset, src2.size, dest_offset, dest.size); ins->alu.src2 = vector_alu_srco_unsigned(mod2); ins->registers.src2_reg = src2.reg; @@ -733,12 +729,12 @@ install_registers_instr( assert(src.reg == 26 || src.reg == 27); ins->load_store.reg = src.reg - 26; - ins->load_store.swizzle = offset_swizzle(ins->load_store.swizzle, src.offset, src.size, 0, 4); + offset_swizzle(ins->swizzle[0], src.offset, src.size, 0, 4); } else { struct phys_reg dst = index_to_reg(ctx, g, ins->dest, mir_typesize(ins)); ins->load_store.reg = dst.reg; - ins->load_store.swizzle = offset_swizzle(ins->load_store.swizzle, 0, 4, dst.offset, dst.size); + offset_swizzle(ins->swizzle[0], 0, 4, dst.offset, dst.size); mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset); } @@ -767,8 +763,8 @@ install_registers_instr( case TAG_TEXTURE_4: { /* Grab RA results */ struct phys_reg dest = index_to_reg(ctx, g, ins->dest, mir_typesize(ins)); - struct phys_reg coord = index_to_reg(ctx, g, ins->src[0], mir_srcsize(ins, 0)); - struct phys_reg lod = index_to_reg(ctx, g, ins->src[1], mir_srcsize(ins, 1)); + struct phys_reg coord = index_to_reg(ctx, g, ins->src[1], mir_srcsize(ins, 1)); + struct phys_reg lod = index_to_reg(ctx, g, ins->src[2], mir_srcsize(ins, 2)); assert(dest.reg == 28 || dest.reg == 29); assert(coord.reg == 28 || coord.reg == 29); @@ -777,19 +773,17 @@ install_registers_instr( ins->texture.in_reg_full = 1; ins->texture.in_reg_upper = 0; ins->texture.in_reg_select = coord.reg - 28; - ins->texture.in_reg_swizzle = - offset_swizzle(ins->texture.in_reg_swizzle, coord.offset, coord.size, 0, 4); + offset_swizzle(ins->swizzle[1], coord.offset, coord.size, 0, 4); /* Next, install the destination */ ins->texture.out_full = 1; ins->texture.out_upper = 0; ins->texture.out_reg_select = dest.reg - 28; - ins->texture.swizzle = - offset_swizzle(ins->texture.swizzle, 0, 4, dest.offset, coord.size); + offset_swizzle(ins->swizzle[0], 0, 4, dest.offset, dest.size); mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset); /* If there is a register LOD/bias, use it */ - if (ins->src[1] != ~0) { + if (ins->src[2] != ~0) { assert(!(lod.offset & 3)); midgard_tex_register_select sel = { .select = lod.reg, diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index e2ee39a111a..af3018780ff 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -437,7 +437,7 @@ mir_adjust_constants(midgard_instruction *ins, /* If destructive, let's copy in the new constants and adjust * swizzles to pack it in. */ - uint32_t indices[4] = { 0 }; + unsigned indices[16] = { 0 }; /* Reset count */ count = existing_count; @@ -473,17 +473,12 @@ mir_adjust_constants(midgard_instruction *ins, pred->constant_count = count * sizeof(uint32_t); - /* Cool, we have it in. So use indices as a - * swizzle */ - - unsigned swizzle = SWIZZLE_FROM_ARRAY(indices); - - if (ins->src[0] == r_constant) - ins->alu.src1 = vector_alu_apply_swizzle(ins->alu.src1, swizzle); - - if (ins->src[1] == r_constant) - ins->alu.src2 = vector_alu_apply_swizzle(ins->alu.src2, swizzle); + /* Use indices as a swizzle */ + mir_foreach_src(ins, s) { + if (ins->src[s] == r_constant) + mir_compose_swizzle(ins->swizzle[s], indices, ins->swizzle[s]); + } } return true; @@ -707,12 +702,12 @@ mir_schedule_comparison( midgard_instruction **instructions, struct midgard_predicate *predicate, BITSET_WORD *worklist, unsigned count, - unsigned cond, bool vector, unsigned swizzle, + unsigned cond, bool vector, unsigned *swizzle, midgard_instruction *user) { /* TODO: swizzle when scheduling */ unsigned comp_i = - (!vector && (swizzle == 0)) ? + (!vector && (swizzle[0] == 0)) ? mir_comparison_mobile(ctx, instructions, predicate, count, cond) : ~0; /* If we can, schedule the condition immediately */ @@ -723,12 +718,10 @@ mir_schedule_comparison( } /* Otherwise, we insert a move */ - midgard_vector_alu_src csel = { - .swizzle = swizzle - }; - midgard_instruction mov = v_mov(cond, csel, cond); + midgard_instruction mov = v_mov(cond, blank_alu_src, cond); mov.mask = vector ? 0xF : 0x1; + memcpy(mov.swizzle[1], swizzle, sizeof(mov.swizzle[1])); return mir_insert_instruction_before(ctx, user, mov); } @@ -754,7 +747,7 @@ mir_schedule_condition(compiler_context *ctx, midgard_instruction *cond = mir_schedule_comparison( ctx, instructions, predicate, worklist, count, last->src[condition_index], - vector, last->cond_swizzle, last); + vector, last->swizzle[2], last); /* We have exclusive reign over this (possibly move) conditional * instruction. We can rewrite into a pipeline conditional register */ @@ -769,7 +762,8 @@ mir_schedule_condition(compiler_context *ctx, if (cond->src[s] == ~0) continue; - mir_set_swizzle(cond, s, (mir_get_swizzle(cond, s) << (2*3)) & 0xFF); + for (unsigned q = 0; q < 4; ++q) + cond->swizzle[s][q + COMPONENT_W] = cond->swizzle[s][q]; } } @@ -1156,9 +1150,9 @@ v_load_store_scratch( .mask = mask, .dest = ~0, .src = { ~0, ~0, ~0 }, + .swizzle = SWIZZLE_IDENTITY_4, .load_store = { .op = is_store ? midgard_op_st_int4 : midgard_op_ld_int4, - .swizzle = SWIZZLE_XYZW, /* For register spilling - to thread local storage */ .arg_1 = 0xEA, @@ -1331,7 +1325,7 @@ static void mir_spill_register( midgard_instruction *before = ins; - /* For a csel, go back one more not to break up the bundle */ + /* TODO: Remove me I'm a fossil */ if (ins->type == TAG_ALU_4 && OP_IS_CSEL(ins->alu.op)) before = mir_prev_op(before); @@ -1347,7 +1341,7 @@ static void mir_spill_register( } /* Mask the load based on the component count - * actually needed to prvent RA loops */ + * actually needed to prevent RA loops */ st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32); diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c index c5b7280b40c..fef2fd8e0f8 100644 --- a/src/panfrost/midgard/mir.c +++ b/src/panfrost/midgard/mir.c @@ -45,137 +45,14 @@ mir_get_alu_src(midgard_instruction *ins, unsigned idx) return vector_alu_from_unsigned(b); } -unsigned -mir_get_swizzle(midgard_instruction *ins, unsigned idx) -{ - if (ins->type == TAG_ALU_4) { - if (idx == 2 || ins->compact_branch) - return ins->cond_swizzle; - - return (mir_get_alu_src(ins, idx)).swizzle; - } else if (ins->type == TAG_LOAD_STORE_4) { - switch (idx) { - case 0: - return ins->load_store.swizzle; - case 1: - case 2: { - uint8_t raw = - (idx == 2) ? ins->load_store.arg_2 : ins->load_store.arg_1; - - /* TODO: Integrate component count with properties */ - unsigned components = 1; - switch (ins->load_store.op) { - case midgard_op_ld_int4: - components = (idx == 0) ? 2 : 1; - break; - case midgard_op_st_int4: - components = (idx == 1) ? 2 : 1; - break; - case midgard_op_ld_cubemap_coords: - components = 3; - break; - case midgard_op_ldst_perspective_division_z: - components = 3; - break; - case midgard_op_ldst_perspective_division_w: - components = 4; - break; - default: - components = 1; - break; - } - - return component_to_swizzle(midgard_ldst_select(raw).component, components); - } - default: - unreachable("Unknown load/store source"); - } - } else if (ins->type == TAG_TEXTURE_4) { - switch (idx) { - case 0: - return ins->texture.in_reg_swizzle; - case 1: - /* Swizzle on bias doesn't make sense */ - return 0; - default: - unreachable("Unknown texture source"); - } - } else { - unreachable("Unknown type"); - } -} - -void -mir_set_swizzle(midgard_instruction *ins, unsigned idx, unsigned new) -{ - if (ins->type == TAG_ALU_4) { - if (idx == 2 || ins->compact_branch) { - ins->cond_swizzle = new; - return; - } - - unsigned b = (idx == 0) ? ins->alu.src1 : ins->alu.src2; - - midgard_vector_alu_src s = - vector_alu_from_unsigned(b); - - s.swizzle = new; - unsigned pack = vector_alu_srco_unsigned(s); - - if (idx == 0) - ins->alu.src1 = pack; - else - ins->alu.src2 = pack; - } else if (ins->type == TAG_LOAD_STORE_4) { - switch (idx) { - case 0: - ins->load_store.swizzle = new; - break; - case 1: - case 2: { - uint8_t raw = - (idx == 2) ? ins->load_store.arg_2 : ins->load_store.arg_1; - - midgard_ldst_register_select sel - = midgard_ldst_select(raw); - sel.component = swizzle_to_component(new); - uint8_t packed = midgard_ldst_pack(sel); - - if (idx == 2) - ins->load_store.arg_2 = packed; - else - ins->load_store.arg_1 = packed; - - break; - } - default: - assert(new == 0); - break; - } - } else if (ins->type == TAG_TEXTURE_4) { - switch (idx) { - case 0: - ins->texture.in_reg_swizzle = new; - break; - default: - assert(new == 0); - break; - } - } else { - unreachable("Unknown type"); - } -} - static void -mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned swizzle) +mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned *swizzle) { for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) { if (ins->src[i] != old) continue; ins->src[i] = new; - - mir_set_swizzle(ins, i, - pan_compose_swizzle(mir_get_swizzle(ins, i), swizzle)); + mir_compose_swizzle(ins->swizzle[i], swizzle, ins->swizzle[i]); } } @@ -188,7 +65,7 @@ mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new) } void -mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned swizzle) +mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle) { mir_foreach_instr_global(ctx, ins) { mir_rewrite_index_src_single_swizzle(ins, old, new, swizzle); @@ -245,18 +122,17 @@ mir_nontrivial_raw_mod(midgard_vector_alu_src src, bool is_int) return src.mod; } -bool -mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask) +static bool +mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask, unsigned *swizzle) { if (mir_nontrivial_raw_mod(src, is_int)) return true; /* size-conversion */ if (src.half) return true; - /* swizzle */ - for (unsigned c = 0; c < 4; ++c) { + for (unsigned c = 0; c < 16; ++c) { if (!(mask & (1 << c))) continue; - if (((src.swizzle >> (2*c)) & 3) != c) return true; + if (swizzle[c] != c) return true; } return false; @@ -270,7 +146,7 @@ mir_nontrivial_source2_mod(midgard_instruction *ins) midgard_vector_alu_src src2 = vector_alu_from_unsigned(ins->alu.src2); - return mir_nontrivial_mod(src2, is_int, ins->mask); + return mir_nontrivial_mod(src2, is_int, ins->mask, ins->swizzle[1]); } bool @@ -532,15 +408,13 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask) */ static uint16_t -mir_bytemask_of_read_components_single(unsigned swizzle, unsigned inmask, midgard_reg_mode mode) +mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, midgard_reg_mode mode) { unsigned cmask = 0; - for (unsigned c = 0; c < 4; ++c) { + for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) { if (!(inmask & (1 << c))) continue; - - unsigned comp = (swizzle >> (2*c)) & 3; - cmask |= (1 << comp); + cmask |= (1 << swizzle[c]); } return mir_to_bytemask(mode, cmask); @@ -580,8 +454,7 @@ mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node) qmask = mask_of(channel_override); } - unsigned swizzle = mir_get_swizzle(ins, i); - mask |= mir_bytemask_of_read_components_single(swizzle, qmask, mir_srcsize(ins, i)); + mask |= mir_bytemask_of_read_components_single(ins->swizzle[i], qmask, mir_srcsize(ins, i)); } return mask; @@ -715,6 +588,11 @@ mir_flip(midgard_instruction *ins) temp = ins->alu.src1; ins->alu.src1 = ins->alu.src2; ins->alu.src2 = temp; + + unsigned temp_swizzle[16]; + memcpy(temp_swizzle, ins->swizzle[0], sizeof(ins->swizzle[0])); + memcpy(ins->swizzle[0], ins->swizzle[1], sizeof(ins->swizzle[0])); + memcpy(ins->swizzle[1], temp_swizzle, sizeof(ins->swizzle[0])); } /* Before squashing, calculate ctx->temp_count just by observing the MIR */ diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 5e31cb446bb..5fbde641dd9 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -87,21 +87,12 @@ midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count) bool needs_move = ins->dest & IS_REG; needs_move |= mir_special_index(ctx, ins->dest); - /* Check the component count from the mask so we can setup a - * swizzle appropriately when promoting. The idea is to ensure - * the component count is preserved so RA can be smarter if we - * need to spill */ - - unsigned mask = ins->mask; - unsigned nr_components = sizeof(mask)*8 - __builtin_clz(mask); - if (needs_move) { midgard_instruction mov = v_mov(promoted, blank_alu_src, ins->dest); mov.mask = ins->mask; mir_insert_instruction_before(ctx, ins, mov); } else { - mir_rewrite_index_src_swizzle(ctx, ins->dest, - promoted, swizzle_of(nr_components)); + mir_rewrite_index_src(ctx, ins->dest, promoted); } mir_remove_instruction(ins); -- 2.30.2