X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fmidgard%2Fmidgard_compile.c;h=4bddea40fdb1328b2deaf9e45a3b22b556f38efe;hb=4ea512844c2c06f1d59f5bae5a6e80b67804361d;hp=c519193a56a98c52fc0b73b6552df48be55e75c2;hpb=5da0a33fab0b3cc6ea03c102bd1e156904d983e5;p=mesa.git diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index c519193a56a..4bddea40fdb 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -83,6 +83,9 @@ midgard_block_add_successor(midgard_block *block, midgard_block *successor) #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__)); #define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W) +#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X) +#define SWIZZLE_XXXX SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X) +#define SWIZZLE_WWWW SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, COMPONENT_W) #define M_LOAD_STORE(name, rname, uname) \ static midgard_instruction m_##name(unsigned ssa, unsigned address) { \ @@ -351,9 +354,11 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_lower_regs_to_ssa); NIR_PASS(progress, nir, midgard_nir_lower_fdot2); + NIR_PASS(progress, nir, nir_lower_idiv); nir_lower_tex_options lower_tex_options = { - .lower_rect = true + .lower_rect = true, + .lower_txp = ~0 }; NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); @@ -539,14 +544,14 @@ emit_condition(compiler_context *ctx, nir_src *src, bool for_branch, unsigned co .unit = for_branch ? UNIT_SMUL : UNIT_SADD, .ssa_args = { - .src0 = condition, .src1 = condition, .dest = SSA_FIXED_REGISTER(31), }, + .alu = { .op = midgard_alu_op_iand, - .outmod = midgard_outmod_int, + .outmod = midgard_outmod_int_wrap, .reg_mode = midgard_reg_mode_32, .dest_override = midgard_dest_override_none, .mask = (0x3 << 6), /* w */ @@ -585,7 +590,7 @@ emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp) }, .alu = { .op = midgard_alu_op_iand, - .outmod = midgard_outmod_int, + .outmod = midgard_outmod_int_wrap, .reg_mode = midgard_reg_mode_32, .dest_override = midgard_dest_override_none, .mask = expand_writemask((1 << nr_comp) - 1), @@ -616,7 +621,7 @@ emit_indirect_offset(compiler_context *ctx, nir_src *src) }, .alu = { .op = midgard_alu_op_imov, - .outmod = midgard_outmod_int, + .outmod = midgard_outmod_int_wrap, .reg_mode = midgard_reg_mode_32, .dest_override = midgard_dest_override_none, .mask = (0x3 << 6), /* w */ @@ -682,7 +687,10 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(iadd, iadd); ALU_CASE(isub, isub); ALU_CASE(imul, imul); - ALU_CASE(iabs, iabs); + + /* Zero shoved as second-arg */ + ALU_CASE(iabs, iabsdiff); + ALU_CASE(mov, imov); ALU_CASE(feq32, feq); @@ -727,10 +735,11 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(fsin, fsin); ALU_CASE(fcos, fcos); + /* Second op implicit #0 */ + ALU_CASE(inot, inor); ALU_CASE(iand, iand); ALU_CASE(ior, ior); ALU_CASE(ixor, ixor); - ALU_CASE(inot, inand); ALU_CASE(ishl, ishl); ALU_CASE(ishr, iasr); ALU_CASE(ushr, ilsr); @@ -816,12 +825,14 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) } /* Midgard can perform certain modifiers on output of an ALU op */ - midgard_outmod outmod = - midgard_is_integer_out_op(op) ? midgard_outmod_int : - instr->dest.saturate ? midgard_outmod_sat : midgard_outmod_none; + unsigned outmod; - if (instr->op == nir_op_fsat) - outmod = midgard_outmod_sat; + if (midgard_is_integer_out_op(op)) { + outmod = midgard_outmod_int_wrap; + } else { + bool sat = instr->dest.saturate || instr->op == nir_op_fsat; + outmod = sat ? midgard_outmod_sat : midgard_outmod_none; + } /* fmax(a, 0.0) can turn into a .pos modifier as an optimization */ @@ -927,7 +938,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) } ins.alu.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx); - } else if (instr->op == nir_op_f2b32 || instr->op == nir_op_i2b32) { + } else if (nr_inputs == 1 && !quirk_flipped_r24) { + /* Lots of instructions need a 0 plonked in */ ins.ssa_args.inline_constant = false; ins.ssa_args.src1 = SSA_FIXED_REGISTER(REGISTER_CONSTANT); ins.has_constants = true; @@ -998,6 +1010,42 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src } } +static void +emit_varying_read( + compiler_context *ctx, + unsigned dest, unsigned offset, + unsigned nr_comp, unsigned component, + nir_src *indirect_offset) +{ + /* XXX: Half-floats? */ + /* TODO: swizzle, mask */ + + midgard_instruction ins = m_ld_vary_32(dest, offset); + ins.load_store.mask = (1 << nr_comp) - 1; + ins.load_store.swizzle = SWIZZLE_XYZW >> (2 * component); + + midgard_varying_parameter p = { + .is_varying = 1, + .interpolation = midgard_interp_default, + .flat = /*var->data.interpolation == INTERP_MODE_FLAT*/ 0 + }; + + unsigned u; + memcpy(&u, &p, sizeof(p)); + ins.load_store.varying_parameters = u; + + if (indirect_offset) { + /* We need to add in the dynamic index, moved to r27.w */ + emit_indirect_offset(ctx, indirect_offset); + ins.load_store.unknown = 0x79e; /* xxx: what is this? */ + } else { + /* Just a direct load */ + ins.load_store.unknown = 0x1e9e; /* xxx: what is this? */ + } + + emit_mir_instruction(ctx, ins); +} + static void emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr) { @@ -1120,38 +1168,15 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) offset += nir_src_as_uint(instr->src[0]); } + /* We may need to apply a fractional offset */ + int component = instr->intrinsic == nir_intrinsic_load_input ? + nir_intrinsic_component(instr) : 0; reg = nir_dest_index(ctx, &instr->dest); if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) { emit_uniform_read(ctx, reg, ctx->sysval_count + offset, !direct ? &instr->src[0] : NULL); } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) { - /* XXX: Half-floats? */ - /* TODO: swizzle, mask */ - - midgard_instruction ins = m_ld_vary_32(reg, offset); - ins.load_store.mask = (1 << nr_comp) - 1; - - midgard_varying_parameter p = { - .is_varying = 1, - .interpolation = midgard_interp_default, - .flat = /*var->data.interpolation == INTERP_MODE_FLAT*/ 0 - }; - - unsigned u; - memcpy(&u, &p, sizeof(p)); - ins.load_store.varying_parameters = u; - - if (direct) { - /* We have the offset totally ready */ - ins.load_store.unknown = 0x1e9e; /* xxx: what is this? */ - } else { - /* We have it partially ready, but we need to - * add in the dynamic index, moved to r27.w */ - emit_indirect_offset(ctx, &instr->src[0]); - ins.load_store.unknown = 0x79e; /* xxx: what is this? */ - } - - emit_mir_instruction(ctx, ins); + emit_varying_read(ctx, reg, offset, nr_comp, component, !direct ? &instr->src[0] : NULL); } else if (ctx->is_blend) { /* For blend shaders, load the input color, which is * preloaded to r0 */ @@ -1221,44 +1246,23 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) ctx->fragment_output = reg; } else if (ctx->stage == MESA_SHADER_VERTEX) { /* Varyings are written into one of two special - * varying register, r26 or r27. The register itself is selected as the register - * in the st_vary instruction, minus the base of 26. E.g. write into r27 and then call st_vary(1) - * - * Normally emitting fmov's is frowned upon, - * but due to unique constraints of - * REGISTER_VARYING, fmov emission + a - * dedicated cleanup pass is the only way to - * guarantee correctness when considering some - * (common) edge cases XXX: FIXME */ - - /* If this varying corresponds to a constant (why?!), - * emit that now since it won't get picked up by - * hoisting (since there is no corresponding move - * emitted otherwise) */ - - void *constant_value = _mesa_hash_table_u64_search(ctx->ssa_constants, reg + 1); - - if (constant_value) { - /* Special case: emit the varying write - * directly to r26 (looks funny in asm but it's - * fine) and emit the store _now_. Possibly - * slightly slower, but this is a really stupid - * special case anyway (why on earth would you - * have a constant varying? Your own fault for - * slightly worse perf :P) */ - - midgard_instruction ins = v_fmov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), blank_alu_src, SSA_FIXED_REGISTER(26)); - attach_constants(ctx, &ins, constant_value, reg + 1); - emit_mir_instruction(ctx, ins); + * varying register, r26 or r27. The register itself is + * selected as the register in the st_vary instruction, + * minus the base of 26. E.g. write into r27 and then + * call st_vary(1) */ - midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset); - st.load_store.unknown = 0x1E9E; /* XXX: What is this? */ - emit_mir_instruction(ctx, st); - } else { - /* Do not emit the varying yet -- instead, just mark down that we need to later */ + midgard_instruction ins = v_fmov(reg, blank_alu_src, SSA_FIXED_REGISTER(26)); + emit_mir_instruction(ctx, ins); - _mesa_hash_table_u64_insert(ctx->ssa_varyings, reg + 1, (void *) ((uintptr_t) (offset + 1))); - } + /* We should have been vectorized. That also lets us + * ignore the mask. because the mask component on + * st_vary is (as far as I can tell) ignored [the blob + * sets it to zero] */ + assert(nir_intrinsic_component(instr) == 0); + + midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset); + st.load_store.unknown = 0x1E9E; /* XXX: What is this? */ + emit_mir_instruction(ctx, st); } else { DBG("Unknown store\n"); assert(0); @@ -1309,13 +1313,26 @@ midgard_tex_format(enum glsl_sampler_dim dim) } } +static unsigned +midgard_tex_op(nir_texop op) +{ + switch (op) { + case nir_texop_tex: + case nir_texop_txb: + return TEXTURE_OP_NORMAL; + case nir_texop_txl: + return TEXTURE_OP_LOD; + default: + unreachable("Unhanlded texture op"); + } +} + static void emit_tex(compiler_context *ctx, nir_tex_instr *instr) { /* TODO */ //assert (!instr->sampler); //assert (!instr->texture_array_size); - assert (instr->op == nir_texop_tex); /* Allocate registers via a round robin scheme to alternate between the two registers */ int reg = ctx->texture_op_count & 1; @@ -1330,18 +1347,17 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) int sampler_index = texture_index; for (unsigned i = 0; i < instr->num_srcs; ++i) { + int reg = SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + in_reg); + int index = nir_src_index(ctx, &instr->src[i].src); + midgard_vector_alu_src alu_src = blank_alu_src; + switch (instr->src[i].src_type) { case nir_tex_src_coord: { - int index = nir_src_index(ctx, &instr->src[i].src); - - midgard_vector_alu_src alu_src = blank_alu_src; - - int reg = SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + in_reg); - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { /* For cubemaps, we need to load coords into * special r27, and then use a special ld/st op - * to copy into the texture register */ + * to select the face and copy the xy into the + * texture register */ alu_src.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_X); @@ -1350,7 +1366,7 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) midgard_instruction st = m_st_cubemap_coords(reg, 0); st.load_store.unknown = 0x24; /* XXX: What is this? */ - st.load_store.mask = 0x3; /* xy? */ + st.load_store.mask = 0x3; /* xy */ st.load_store.swizzle = alu_src.swizzle; emit_mir_instruction(ctx, st); @@ -1358,12 +1374,26 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) alu_src.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X); midgard_instruction ins = v_fmov(index, alu_src, reg); + ins.alu.mask = expand_writemask(0x3); /* xy */ emit_mir_instruction(ctx, ins); } break; } + case nir_tex_src_bias: + case nir_tex_src_lod: { + /* To keep RA simple, we put the bias/LOD into the w + * component of the input source, which is otherwise in xy */ + + alu_src.swizzle = SWIZZLE_XXXX; + + midgard_instruction ins = v_fmov(index, alu_src, reg); + ins.alu.mask = expand_writemask(1 << COMPONENT_W); + emit_mir_instruction(ctx, ins); + break; + }; + default: { DBG("Unknown source type\n"); //assert(0); @@ -1376,26 +1406,22 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) midgard_instruction ins = { .type = TAG_TEXTURE_4, .texture = { - .op = TEXTURE_OP_NORMAL, + .op = midgard_tex_op(instr->op), .format = midgard_tex_format(instr->sampler_dim), .texture_handle = texture_index, .sampler_handle = sampler_index, - /* TODO: Don't force xyzw */ - .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), + /* TODO: Regalloc it in */ + .swizzle = SWIZZLE_XYZW, .mask = 0xF, /* TODO: half */ - //.in_reg_full = 1, + .in_reg_full = 1, + .in_reg_swizzle = SWIZZLE_XYXX, .out_full = 1, - .filter = 1, - /* Always 1 */ .unknown7 = 1, - - /* Assume we can continue; hint it out later */ - .cont = 1, } }; @@ -1403,15 +1429,24 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) ins.texture.in_reg_select = in_reg; ins.texture.out_reg_select = out_reg; - /* TODO: Dynamic swizzle input selection, half-swizzles? */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_3D) { - ins.texture.in_reg_swizzle_right = COMPONENT_X; - ins.texture.in_reg_swizzle_left = COMPONENT_Y; - //ins.texture.in_reg_swizzle_third = COMPONENT_Z; - } else { - ins.texture.in_reg_swizzle_left = COMPONENT_X; - ins.texture.in_reg_swizzle_right = COMPONENT_Y; - //ins.texture.in_reg_swizzle_third = COMPONENT_X; + /* Setup bias/LOD if necessary. Only register mode support right now. + * TODO: Immediate mode for performance gains */ + + if (instr->op == nir_texop_txb || instr->op == nir_texop_txl) { + ins.texture.lod_register = true; + + midgard_tex_register_select sel = { + .select = in_reg, + .full = 1, + + /* w */ + .component_lo = 1, + .component_hi = 1 + }; + + uint8_t packed; + memcpy(&packed, &sel, sizeof(packed)); + ins.texture.bias = packed; } emit_mir_instruction(ctx, ins); @@ -1683,6 +1718,10 @@ embedded_to_inline_constant(compiler_context *ctx) static void map_ssa_to_alias(compiler_context *ctx, int *ref) { + /* Sign is used quite deliberately for unused */ + if (*ref < 0) + return; + unsigned int alias = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_alias, *ref + 1); if (alias) { @@ -1721,6 +1760,35 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block) return progress; } +/* Dead code elimination for branches at the end of a block - only one branch + * per block is legal semantically */ + +static void +midgard_opt_cull_dead_branch(compiler_context *ctx, midgard_block *block) +{ + bool branched = false; + + mir_foreach_instr_in_block_safe(block, ins) { + if (!midgard_is_branch_unit(ins->unit)) continue; + + /* We ignore prepacked branches since the fragment epilogue is + * just generally special */ + if (ins->prepacked_branch) continue; + + /* Discards are similarly special and may not correspond to the + * end of a block */ + + if (ins->branch.target_type == TARGET_DISCARD) continue; + + if (branched) { + /* We already branched, so this is dead */ + mir_remove_instruction(ins); + } + + branched = true; + } +} + static bool mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask) { @@ -1736,6 +1804,30 @@ mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask) return false; } +static bool +mir_nontrivial_source2_mod(midgard_instruction *ins) +{ + unsigned mask = squeeze_writemask(ins->alu.mask); + bool is_int = midgard_is_integer_op(ins->alu.op); + + midgard_vector_alu_src src2 = + vector_alu_from_unsigned(ins->alu.src2); + + return mir_nontrivial_mod(src2, is_int, mask); +} + +static bool +mir_nontrivial_outmod(midgard_instruction *ins) +{ + bool is_int = midgard_is_integer_op(ins->alu.op); + unsigned mod = ins->alu.outmod; + + if (is_int) + return mod != midgard_outmod_int_wrap; + else + return mod != midgard_outmod_none; +} + static bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) { @@ -1759,15 +1851,8 @@ midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) if (ins->ssa_args.inline_constant) continue; if (ins->has_constants) continue; - /* Also, if the move has side effects, we're helpless */ - - midgard_vector_alu_src src = - vector_alu_from_unsigned(ins->alu.src2); - unsigned mask = squeeze_writemask(ins->alu.mask); - bool is_int = midgard_is_integer_op(ins->alu.op); - - if (mir_nontrivial_mod(src, is_int, mask)) continue; - if (ins->alu.outmod != midgard_outmod_none) continue; + if (mir_nontrivial_source2_mod(ins)) continue; + if (mir_nontrivial_outmod(ins)) continue; /* We're clear -- rewrite */ mir_rewrite_index_src(ctx, to, from); @@ -1778,6 +1863,68 @@ midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) return progress; } +/* fmov.pos is an idiom for fpos. Propoagate the .pos up to the source, so then + * the move can be propagated away entirely */ + +static bool +mir_compose_float_outmod(midgard_outmod_float *outmod, midgard_outmod_float comp) +{ + /* Nothing to do */ + if (comp == midgard_outmod_none) + return true; + + if (*outmod == midgard_outmod_none) { + *outmod = comp; + return true; + } + + /* TODO: Compose rules */ + return false; +} + +static bool +midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block) +{ + bool progress = false; + + mir_foreach_instr_in_block_safe(block, ins) { + if (ins->type != TAG_ALU_4) continue; + if (ins->alu.op != midgard_alu_op_fmov) continue; + if (ins->alu.outmod != midgard_outmod_pos) continue; + + /* TODO: Registers? */ + unsigned src = ins->ssa_args.src1; + if (src >= ctx->func->impl->ssa_alloc) continue; + assert(!mir_has_multiple_writes(ctx, src)); + + /* There might be a source modifier, too */ + if (mir_nontrivial_source2_mod(ins)) continue; + + /* Backpropagate the modifier */ + mir_foreach_instr_in_block_from_rev(block, v, mir_prev_op(ins)) { + if (v->type != TAG_ALU_4) continue; + if (v->ssa_args.dest != src) continue; + + /* Can we even take a float outmod? */ + if (midgard_is_integer_out_op(v->alu.op)) continue; + + midgard_outmod_float temp = v->alu.outmod; + progress |= mir_compose_float_outmod(&temp, ins->alu.outmod); + + /* Throw in the towel.. */ + if (!progress) break; + + /* Otherwise, transfer the modifier */ + v->alu.outmod = temp; + ins->alu.outmod = midgard_outmod_none; + + break; + } + } + + return progress; +} + static bool midgard_opt_copy_prop_tex(compiler_context *ctx, midgard_block *block) { @@ -1878,40 +2025,6 @@ midgard_pair_load_store(compiler_context *ctx, midgard_block *block) } } -/* Emit varying stores late */ - -static void -midgard_emit_store(compiler_context *ctx, midgard_block *block) { - /* Iterate in reverse to get the final write, rather than the first */ - - mir_foreach_instr_in_block_safe_rev(block, ins) { - /* Check if what we just wrote needs a store */ - int idx = ins->ssa_args.dest; - uintptr_t varying = ((uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_varyings, idx + 1)); - - if (!varying) continue; - - varying -= 1; - - /* We need to store to the appropriate varying, so emit the - * move/store */ - - /* TODO: Integrate with special purpose RA (and scheduler?) */ - bool high_varying_register = false; - - midgard_instruction mov = v_fmov(idx, blank_alu_src, SSA_FIXED_REGISTER(REGISTER_VARYING_BASE + high_varying_register)); - - midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying); - st.load_store.unknown = 0x1E9E; /* XXX: What is this? */ - - mir_insert_instruction_before(mir_next_op(ins), st); - mir_insert_instruction_before(mir_next_op(ins), mov); - - /* We no longer need to store this varying */ - _mesa_hash_table_u64_remove(ctx->ssa_varyings, idx + 1); - } -} - /* If there are leftovers after the below pass, emit actual fmov * instructions for the slow-but-correct path */ @@ -2030,7 +2143,7 @@ emit_blend_epilogue(compiler_context *ctx) .op = midgard_alu_op_imov, .reg_mode = midgard_reg_mode_8, .dest_override = midgard_dest_override_none, - .outmod = midgard_outmod_int, + .outmod = midgard_outmod_int_wrap, .mask = 0xFF, .src1 = vector_alu_srco_unsigned(blank_alu_src), .src2 = vector_alu_srco_unsigned(blank_alu_src), @@ -2077,7 +2190,6 @@ emit_block(compiler_context *ctx, nir_block *block) /* Perform heavylifting for aliasing */ actualise_ssa_to_alias(ctx); - midgard_emit_store(ctx, this_block); midgard_pair_load_store(ctx, this_block); /* Append fragment shader epilogue (value writeout) */ @@ -2287,16 +2399,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl /* TODO: Decide this at runtime */ ctx->uniform_cutoff = 8; - /* Assign var locations early, so the epilogue can use them if necessary */ - - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size); - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size); - nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size); - /* Initialize at a global (not block) level hash tables */ ctx->ssa_constants = _mesa_hash_table_u64_create(NULL); - ctx->ssa_varyings = _mesa_hash_table_u64_create(NULL); ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL); ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL); @@ -2307,16 +2412,22 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl struct exec_list *varyings = ctx->stage == MESA_SHADER_VERTEX ? &nir->outputs : &nir->inputs; + unsigned max_varying = 0; nir_foreach_variable(var, varyings) { unsigned loc = var->data.driver_location; unsigned sz = glsl_type_size(var->type, FALSE); - for (int c = 0; c < sz; ++c) { - program->varyings[loc + c] = var->data.location; + for (int c = loc; c < (loc + sz); ++c) { + program->varyings[c] = var->data.location; + max_varying = MAX2(max_varying, c); } } - /* Lower gl_Position pre-optimisation */ + /* Lower gl_Position pre-optimisation, but after lowering vars to ssa + * (so we don't accidentally duplicate the epilogue since mesa/st has + * messed with our I/O quite a bit already) */ + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); if (ctx->stage == MESA_SHADER_VERTEX) NIR_PASS_V(nir, nir_lower_viewport_transform); @@ -2349,7 +2460,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count); program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0; - program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0); + program->varying_count = max_varying + 1; /* Fencepost off-by-one */ nir_foreach_function(func, nir) { if (!func->impl) @@ -2375,12 +2486,20 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl progress = false; mir_foreach_block(ctx, block) { + progress |= midgard_opt_pos_propagate(ctx, block); progress |= midgard_opt_copy_prop(ctx, block); progress |= midgard_opt_copy_prop_tex(ctx, block); progress |= midgard_opt_dead_code_eliminate(ctx, block); } } while (progress); + /* Nested control-flow can result in dead branches at the end of the + * block. This messes with our analysis and is just dead code, so cull + * them */ + mir_foreach_block(ctx, block) { + midgard_opt_cull_dead_branch(ctx, block); + } + /* Schedule! */ schedule_program(ctx); @@ -2536,7 +2655,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl * last is an ALU, then it's also 1... */ mir_foreach_block(ctx, block) { - util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) { + mir_foreach_bundle_in_block(block, bundle) { int lookahead = 1; if (current_bundle + 1 < bundle_count) {