From: Alyssa Rosenzweig Date: Tue, 16 Jul 2019 21:10:08 +0000 (-0700) Subject: panfrost: Promote uniform registers late X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3174bc9972fa812989c2bbb4be8d0651024d84f2;p=mesa.git panfrost: Promote uniform registers late Rather than creating either a load or a uniform register read with a fixed beginning offset, we always create a load and then promote to a uniform register later. This will allow us to promote in a register pressure aware manner. Signed-off-by: Alyssa Rosenzweig --- diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index bd7c11efab3..73ec4b56fb3 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -345,6 +345,11 @@ mir_next_op(struct midgard_instruction *ins) mir_foreach_block(ctx, v_block) \ mir_foreach_instr_in_block(v_block, v) +#define mir_foreach_instr_global_safe(ctx, v) \ + mir_foreach_block(ctx, v_block) \ + mir_foreach_instr_in_block_safe(v_block, v) + + static inline midgard_instruction * mir_last_in_block(struct midgard_block *block) @@ -440,6 +445,18 @@ bool mir_has_multiple_writes(compiler_context *ctx, int src); void mir_create_pipeline_registers(compiler_context *ctx); +void +midgard_promote_uniforms(compiler_context *ctx, unsigned pressure); + +void +emit_ubo_read( + compiler_context *ctx, + unsigned dest, + unsigned offset, + nir_src *indirect_offset, + unsigned index); + + /* Final emission */ void emit_binary_bundle( diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index ef854dc60c1..13b868b1739 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -40,6 +40,12 @@ op == midgard_alu_op_imov \ ) +#define OP_IS_UBO_READ(op) ( \ + op == midgard_op_ld_uniform_32 || \ + op == midgard_op_ld_uniform_16 || \ + op == midgard_op_ld_uniform_32i \ + ) + /* ALU control words are single bit fields with a lot of space */ #define ALU_ENAB_VEC_MUL (1 << 17) diff --git a/src/panfrost/midgard/meson.build b/src/panfrost/midgard/meson.build index cbe26004e2d..b467f835032 100644 --- a/src/panfrost/midgard/meson.build +++ b/src/panfrost/midgard/meson.build @@ -29,6 +29,7 @@ libpanfrost_midgard_files = files( 'midgard_ra_pipeline.c', 'midgard_liveness.c', 'midgard_ops.c', + 'mir_promote_uniforms.c', 'cppwrap.cpp', 'disassemble.c', ) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 81484078348..37436e133fc 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1157,7 +1157,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) /* Uniforms and UBOs use a shared code path, as uniforms are just (slightly * optimized) versions of UBO #0 */ -static void +void emit_ubo_read( compiler_context *ctx, unsigned dest, @@ -1167,36 +1167,20 @@ emit_ubo_read( { /* TODO: half-floats */ - if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) { - /* Fast path: For the first 16 uniforms, direct accesses are - * 0-cycle, since they're just a register fetch in the usual - * case. So, we alias the registers while we're still in - * SSA-space */ - - int reg_slot = 23 - offset; - alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot)); - } else { - /* Otherwise, read from the 'special' UBO to access - * higher-indexed uniforms, at a performance cost. More - * generally, we're emitting a UBO read instruction. */ - - midgard_instruction ins = m_ld_uniform_32(dest, offset); - - /* TODO: Don't split */ - ins.load_store.varying_parameters = (offset & 7) << 7; - ins.load_store.address = offset >> 3; + midgard_instruction ins = m_ld_uniform_32(dest, offset); - if (indirect_offset) { - emit_indirect_offset(ctx, indirect_offset); - ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */ - } else { - ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */ - } - - /* TODO respect index */ + /* TODO: Don't split */ + ins.load_store.varying_parameters = (offset & 7) << 7; + ins.load_store.address = offset >> 3; - emit_mir_instruction(ctx, ins); + if (indirect_offset) { + emit_indirect_offset(ctx, indirect_offset); + ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */ + } else { + ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */ } + + emit_mir_instruction(ctx, ins); } static void @@ -2228,57 +2212,6 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block) return progress; } -/* The following passes reorder MIR instructions to enable better scheduling */ - -static void -midgard_pair_load_store(compiler_context *ctx, midgard_block *block) -{ - mir_foreach_instr_in_block_safe(block, ins) { - if (ins->type != TAG_LOAD_STORE_4) continue; - - /* We've found a load/store op. Check if next is also load/store. */ - midgard_instruction *next_op = mir_next_op(ins); - if (&next_op->link != &block->instructions) { - if (next_op->type == TAG_LOAD_STORE_4) { - /* If so, we're done since we're a pair */ - ins = mir_next_op(ins); - continue; - } - - /* Maximum search distance to pair, to avoid register pressure disasters */ - int search_distance = 8; - - /* Otherwise, we have an orphaned load/store -- search for another load */ - mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) { - /* Terminate search if necessary */ - if (!(search_distance--)) break; - - if (c->type != TAG_LOAD_STORE_4) continue; - - /* Stores cannot be reordered, since they have - * dependencies. For the same reason, indirect - * loads cannot be reordered as their index is - * loaded in r27.w */ - - if (OP_IS_STORE(c->load_store.op)) continue; - - /* It appears the 0x800 bit is set whenever a - * load is direct, unset when it is indirect. - * Skip indirect loads. */ - - if (!(c->load_store.unknown & 0x800)) continue; - - /* We found one! Move it up to pair and remove it from the old location */ - - mir_insert_instruction_before(ins, *c); - mir_remove_instruction(c); - - break; - } - } - } -} - /* If there are leftovers after the below pass, emit actual fmov * instructions for the slow-but-correct path */ @@ -2358,8 +2291,6 @@ emit_block(compiler_context *ctx, nir_block *block) /* Perform heavylifting for aliasing */ actualise_ssa_to_alias(ctx); - midgard_pair_load_store(ctx, this_block); - /* Append fragment shader epilogue (value writeout) */ if (ctx->stage == MESA_SHADER_FRAGMENT) { if (block == nir_impl_last_block(ctx->func->impl)) { @@ -2564,7 +2495,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl compiler_context *ctx = &ictx; - /* TODO: Decide this at runtime */ + /* Start off with a safe cutoff, allowing usage of all 16 work + * registers. Later, we'll promote uniform reads to uniform registers + * if we determine it is beneficial to do so */ ctx->uniform_cutoff = 8; /* Initialize at a global (not block) level hash tables */ diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index ccc641c7b86..97e06d743fa 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -524,6 +524,59 @@ schedule_block(compiler_context *ctx, midgard_block *block) block->is_scheduled = true; } +/* The following passes reorder MIR instructions to enable better scheduling */ + +static void +midgard_pair_load_store(compiler_context *ctx, midgard_block *block) +{ + mir_foreach_instr_in_block_safe(block, ins) { + if (ins->type != TAG_LOAD_STORE_4) continue; + + /* We've found a load/store op. Check if next is also load/store. */ + midgard_instruction *next_op = mir_next_op(ins); + if (&next_op->link != &block->instructions) { + if (next_op->type == TAG_LOAD_STORE_4) { + /* If so, we're done since we're a pair */ + ins = mir_next_op(ins); + continue; + } + + /* Maximum search distance to pair, to avoid register pressure disasters */ + int search_distance = 8; + + /* Otherwise, we have an orphaned load/store -- search for another load */ + mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) { + /* Terminate search if necessary */ + if (!(search_distance--)) break; + + if (c->type != TAG_LOAD_STORE_4) continue; + + /* Stores cannot be reordered, since they have + * dependencies. For the same reason, indirect + * loads cannot be reordered as their index is + * loaded in r27.w */ + + if (OP_IS_STORE(c->load_store.op)) continue; + + /* It appears the 0x800 bit is set whenever a + * load is direct, unset when it is indirect. + * Skip indirect loads. */ + + if (!(c->load_store.unknown & 0x800)) continue; + + /* We found one! Move it up to pair and remove it from the old location */ + + mir_insert_instruction_before(ins, *c); + mir_remove_instruction(c); + + break; + } + } + } +} + + + void schedule_program(compiler_context *ctx) { @@ -531,6 +584,12 @@ schedule_program(compiler_context *ctx) bool spilled = false; int iter_count = 10; /* max iterations */ + midgard_promote_uniforms(ctx, 8); + + mir_foreach_block(ctx, block) { + midgard_pair_load_store(ctx, block); + } + do { /* We would like to run RA after scheduling, but spilling can * complicate this */ diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c new file mode 100644 index 00000000000..7206cb70165 --- /dev/null +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#include "compiler.h" + +/* This pass promotes reads from uniforms from load/store ops to uniform + * registers if it is beneficial to do so. Normally, this saves both + * instructions and total register pressure, but it does take a toll on the + * number of work registers that are available, so this is a balance. + * + * To cope, we take as an argument the maximum work register pressure in the + * program so we allow that many registers through at minimum, to prevent + * spilling. If we spill anyway, I mean, it's a lose-lose at that point. */ + +void +midgard_promote_uniforms(compiler_context *ctx, unsigned register_pressure) +{ + /* For our purposes, pressure is capped at the number of vec4 work + * registers, not live values which would consider spills */ + register_pressure = MAX2(register_pressure, 16); + + mir_foreach_instr_global_safe(ctx, ins) { + if (ins->type != TAG_LOAD_STORE_4) continue; + if (!OP_IS_UBO_READ(ins->load_store.op)) continue; + + unsigned lo = ins->load_store.varying_parameters >> 7; + unsigned hi = ins->load_store.address; + + /* TODO: Combine fields logically */ + unsigned address = (hi << 3) | lo; + + /* Check this is UBO 0 */ + if (ins->load_store.unknown & 0xF) continue; + + /* Check we're accessing directly */ + if (ins->load_store.unknown != 0x1E00) continue; + + /* Check if it's a promotable range */ + unsigned uniform_reg = 23 - address; + + if (address > 16) continue; + if (register_pressure > uniform_reg) continue; + + /* It is, great! Let's promote */ + + ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1); + + unsigned promoted = SSA_FIXED_REGISTER(uniform_reg); + mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted); + + mir_remove_instruction(ins); + } +}