pan/midgard: Fix disassembler cycle/quadword counting
[mesa.git] / src / panfrost / midgard / mir_promote_uniforms.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30
31 /* This pass promotes reads from uniforms from load/store ops to uniform
32 * registers if it is beneficial to do so. Normally, this saves both
33 * instructions and total register pressure, but it does take a toll on the
34 * number of work registers that are available, so this is a balance.
35 *
36 * We use a heuristic to determine the ideal count, implemented by
37 * mir_work_heuristic, which returns the ideal number of work registers.
38 */
39
40 static bool
41 mir_is_promoteable_ubo(midgard_instruction *ins)
42 {
43 /* TODO: promote unaligned access via swizzle? */
44
45 return (ins->type == TAG_LOAD_STORE_4) &&
46 (OP_IS_UBO_READ(ins->load_store.op)) &&
47 !(ins->constants[0] & 0xF) &&
48 !(ins->load_store.arg_1) &&
49 (ins->load_store.arg_2 == 0x1E) &&
50 ((ins->constants[0] / 16) < 16);
51 }
52
53 static unsigned
54 mir_promoteable_uniform_count(compiler_context *ctx)
55 {
56 unsigned count = 0;
57
58 mir_foreach_instr_global(ctx, ins) {
59 if (mir_is_promoteable_ubo(ins))
60 count = MAX2(count, ins->constants[0] / 16);
61 }
62
63 return count;
64 }
65
66 static unsigned
67 mir_count_live(uint16_t *live, unsigned temp_count)
68 {
69 unsigned count = 0;
70
71 for (unsigned i = 0; i < temp_count; ++i)
72 count += util_bitcount(live[i]);
73
74 return count;
75 }
76
77 static unsigned
78 mir_estimate_pressure(compiler_context *ctx)
79 {
80 mir_invalidate_liveness(ctx);
81 mir_compute_liveness(ctx);
82
83 unsigned max_live = 0;
84
85 mir_foreach_block(ctx, block) {
86 uint16_t *live = mem_dup(block->live_out, ctx->temp_count * sizeof(uint16_t));
87
88 mir_foreach_instr_in_block_rev(block, ins) {
89 unsigned count = mir_count_live(live, ctx->temp_count);
90 max_live = MAX2(max_live, count);
91 mir_liveness_ins_update(live, ins, ctx->temp_count);
92 }
93
94 free(live);
95 }
96
97 return DIV_ROUND_UP(max_live, 16);
98 }
99
100 static unsigned
101 mir_work_heuristic(compiler_context *ctx)
102 {
103 unsigned uniform_count = mir_promoteable_uniform_count(ctx);
104
105 /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
106 * allow as many work registers as needed */
107
108 if (uniform_count <= 8)
109 return 16;
110
111 /* Otherwise, estimate the register pressure */
112
113 unsigned pressure = mir_estimate_pressure(ctx);
114
115 /* Prioritize not spilling above all else. The relation between the
116 * pressure estimate and the actual register pressure is a little
117 * murkier than we might like (due to scheduling, pipeline registers,
118 * failure to pack vector registers, load/store registers, texture
119 * registers...), hence why this is a heuristic parameter */
120
121 if (pressure > 6)
122 return 16;
123
124 /* If there's no chance of spilling, prioritize UBOs and thread count */
125
126 return 8;
127 }
128
129 void
130 midgard_promote_uniforms(compiler_context *ctx)
131 {
132 unsigned work_count = mir_work_heuristic(ctx);
133 unsigned promoted_count = 24 - work_count;
134
135 mir_foreach_instr_global_safe(ctx, ins) {
136 if (!mir_is_promoteable_ubo(ins)) continue;
137
138 unsigned off = ins->constants[0];
139 unsigned address = off / 16;
140
141 /* Check if it's a promotable range */
142 unsigned uniform_reg = 23 - address;
143
144 if (address >= promoted_count) continue;
145
146 /* It is, great! Let's promote */
147
148 ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
149 unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
150
151 /* We do need the move for safety for a non-SSA dest, or if
152 * we're being fed into a special class */
153
154 bool needs_move = ins->dest & IS_REG;
155 needs_move |= mir_special_index(ctx, ins->dest);
156
157 if (needs_move) {
158 midgard_instruction mov = v_mov(promoted, ins->dest);
159
160 if (ins->load_64)
161 mov.alu.reg_mode = midgard_reg_mode_64;
162
163 mir_set_bytemask(&mov, mir_bytemask(ins));
164 mir_insert_instruction_before(ctx, ins, mov);
165 } else {
166 mir_rewrite_index_src(ctx, ins->dest, promoted);
167 }
168
169 mir_remove_instruction(ins);
170 }
171 }