src/panfrost/midgard/mir_promote_uniforms.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include "compiler.h"
  28 #include "util/u_math.h"
  29 #include "util/u_memory.h"
  30
  31 /* This pass promotes reads from uniforms from load/store ops to uniform
  32  * registers if it is beneficial to do so. Normally, this saves both
  33  * instructions and total register pressure, but it does take a toll on the
  34  * number of work registers that are available, so this is a balance.
  35  *
  36  * We use a heuristic to determine the ideal count, implemented by
  37  * mir_work_heuristic, which returns the ideal number of work registers.
  38  */
  39
  40 static bool
  41 mir_is_promoteable_ubo(midgard_instruction *ins)
  42 {
  43         /* TODO: promote unaligned access via swizzle? */
  44
  45         return (ins->type == TAG_LOAD_STORE_4) &&
  46                 (OP_IS_UBO_READ(ins->load_store.op)) &&
  47                 !(ins->constants[0] & 0xF) &&
  48                 !(ins->load_store.arg_1) &&
  49                 (ins->load_store.arg_2 == 0x1E) &&
  50                 ((ins->constants[0] / 16) < 16);
  51 }
  52
  53 static unsigned
  54 mir_promoteable_uniform_count(compiler_context *ctx)
  55 {
  56         unsigned count = 0;
  57
  58         mir_foreach_instr_global(ctx, ins) {
  59                 if (mir_is_promoteable_ubo(ins))
  60                         count = MAX2(count, ins->constants[0] / 16);
  61         }
  62
  63         return count;
  64 }
  65
  66 static unsigned
  67 mir_count_live(uint16_t *live, unsigned temp_count)
  68 {
  69         unsigned count = 0;
  70
  71         for (unsigned i = 0; i < temp_count; ++i)
  72                 count += util_bitcount(live[i]);
  73
  74         return count;
  75 }
  76
  77 static unsigned
  78 mir_estimate_pressure(compiler_context *ctx)
  79 {
  80         mir_invalidate_liveness(ctx);
  81         mir_compute_liveness(ctx);
  82
  83         unsigned max_live = 0;
  84
  85         mir_foreach_block(ctx, block) {
  86                 uint16_t *live = mem_dup(block->live_out, ctx->temp_count * sizeof(uint16_t));
  87
  88                 mir_foreach_instr_in_block_rev(block, ins) {
  89                         unsigned count = mir_count_live(live, ctx->temp_count);
  90                         max_live = MAX2(max_live, count);
  91                         mir_liveness_ins_update(live, ins, ctx->temp_count);
  92                 }
  93
  94                 free(live);
  95         }
  96
  97         return DIV_ROUND_UP(max_live, 16);
  98 }
  99
 100 static unsigned
 101 mir_work_heuristic(compiler_context *ctx)
 102 {
 103         unsigned uniform_count = mir_promoteable_uniform_count(ctx);
 104
 105         /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
 106          * allow as many work registers as needed */
 107
 108         if (uniform_count <= 8)
 109                 return 16;
 110
 111         /* Otherwise, estimate the register pressure */
 112
 113         unsigned pressure = mir_estimate_pressure(ctx);
 114
 115         /* Prioritize not spilling above all else. The relation between the
 116          * pressure estimate and the actual register pressure is a little
 117          * murkier than we might like (due to scheduling, pipeline registers,
 118          * failure to pack vector registers, load/store registers, texture
 119          * registers...), hence why this is a heuristic parameter */
 120
 121         if (pressure > 6)
 122                 return 16;
 123
 124         /* If there's no chance of spilling, prioritize UBOs and thread count */
 125
 126         return 8;
 127 }
 128
 129 void
 130 midgard_promote_uniforms(compiler_context *ctx)
 131 {
 132         unsigned work_count = mir_work_heuristic(ctx);
 133         unsigned promoted_count = 24 - work_count;
 134
 135         mir_foreach_instr_global_safe(ctx, ins) {
 136                 if (!mir_is_promoteable_ubo(ins)) continue;
 137
 138                 unsigned off = ins->constants[0];
 139                 unsigned address = off / 16;
 140
 141                 /* Check if it's a promotable range */
 142                 unsigned uniform_reg = 23 - address;
 143
 144                 if (address >= promoted_count) continue;
 145
 146                 /* It is, great! Let's promote */
 147
 148                 ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
 149                 unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
 150
 151                 /* We do need the move for safety for a non-SSA dest, or if
 152                  * we're being fed into a special class */
 153
 154                 bool needs_move = ins->dest & IS_REG;
 155                 needs_move |= mir_special_index(ctx, ins->dest);
 156
 157                 if (needs_move) {
 158                         midgard_instruction mov = v_mov(promoted, ins->dest);
 159
 160                         if (ins->load_64)
 161                                 mov.alu.reg_mode = midgard_reg_mode_64;
 162
 163                         mir_set_bytemask(&mov, mir_bytemask(ins));
 164                         mir_insert_instruction_before(ctx, ins, mov);
 165                 } else {
 166                         mir_rewrite_index_src(ctx, ins->dest, promoted);
 167                 }
 168
 169                 mir_remove_instruction(ins);
 170         }
 171 }