panfrost/midgard: Remove pinning
[mesa.git] / src / gallium / drivers / panfrost / midgard / midgard_ra.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler.h"
25 #include "util/register_allocate.h"
26
27 /* When we're 'squeezing down' the values in the IR, we maintain a hash
28 * as such */
29
30 static unsigned
31 find_or_allocate_temp(compiler_context *ctx, unsigned hash)
32 {
33 if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
34 return hash;
35
36 unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
37
38 if (temp)
39 return temp - 1;
40
41 /* If no temp is find, allocate one */
42 temp = ctx->temp_count++;
43 ctx->max_hash = MAX2(ctx->max_hash, hash);
44
45 _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
46
47 return temp;
48 }
49
50 /* Callback for register allocation selection, trivial default for now */
51
52 static unsigned int
53 midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
54 {
55 /* Choose the first available register to minimise reported register pressure */
56
57 for (int i = 0; i < 16; ++i) {
58 if (BITSET_TEST(regs, i)) {
59 return i;
60 }
61 }
62
63 assert(0);
64 return 0;
65 }
66
67 /* Determine the actual hardware from the index based on the RA results or special values */
68
69 static int
70 dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
71 {
72 if (reg >= SSA_FIXED_MINIMUM)
73 return SSA_REG_FROM_FIXED(reg);
74
75 if (reg >= 0) {
76 assert(reg < maxreg);
77 assert(g);
78 int r = ra_get_node_reg(g, reg);
79 ctx->work_registers = MAX2(ctx->work_registers, r);
80 return r;
81 }
82
83 switch (reg) {
84 case SSA_UNUSED_0:
85 case SSA_UNUSED_1:
86 return REGISTER_UNUSED;
87
88 default:
89 unreachable("Unknown SSA register alias");
90 }
91 }
92
93 /* This routine performs the actual register allocation. It should be succeeded
94 * by install_registers */
95
96 struct ra_graph *
97 allocate_registers(compiler_context *ctx)
98 {
99 /* First, initialize the RA */
100 struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
101
102 /* Create a primary (general purpose) class, as well as special purpose
103 * pipeline register classes */
104
105 int primary_class = ra_alloc_reg_class(regs);
106 int varying_class = ra_alloc_reg_class(regs);
107
108 /* Add the full set of work registers */
109 int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
110 for (int i = 0; i < work_count; ++i)
111 ra_class_add_reg(regs, primary_class, i);
112
113 /* Add special registers */
114 ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
115 ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
116
117 /* We're done setting up */
118 ra_set_finalize(regs, NULL);
119
120 /* Transform the MIR into squeezed index form */
121 mir_foreach_block(ctx, block) {
122 mir_foreach_instr_in_block(block, ins) {
123 if (ins->compact_branch) continue;
124
125 ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
126 ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
127 ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
128 }
129 }
130
131 /* No register allocation to do with no SSA */
132
133 if (!ctx->temp_count)
134 return NULL;
135
136 /* Let's actually do register allocation */
137 int nodes = ctx->temp_count;
138 struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
139
140 /* Set everything to the work register class, unless it has somewhere
141 * special to go */
142
143 mir_foreach_block(ctx, block) {
144 mir_foreach_instr_in_block(block, ins) {
145 if (ins->compact_branch) continue;
146
147 if (ins->ssa_args.dest < 0) continue;
148
149 if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
150
151 int class = primary_class;
152
153 ra_set_node_class(g, ins->ssa_args.dest, class);
154 }
155 }
156
157 /* Determine liveness */
158
159 int *live_start = malloc(nodes * sizeof(int));
160 int *live_end = malloc(nodes * sizeof(int));
161
162 /* Initialize as non-existent */
163
164 for (int i = 0; i < nodes; ++i) {
165 live_start[i] = live_end[i] = -1;
166 }
167
168 int d = 0;
169
170 mir_foreach_block(ctx, block) {
171 mir_foreach_instr_in_block(block, ins) {
172 if (ins->compact_branch) continue;
173
174 /* Dest is < 0 for st_vary instructions, which break
175 * the usual SSA conventions. Liveness analysis doesn't
176 * make sense on these instructions, so skip them to
177 * avoid memory corruption */
178
179 if (ins->ssa_args.dest < 0) continue;
180
181 if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
182 /* If this destination is not yet live, it is now since we just wrote it */
183
184 int dest = ins->ssa_args.dest;
185
186 if (live_start[dest] == -1)
187 live_start[dest] = d;
188 }
189
190 /* Since we just used a source, the source might be
191 * dead now. Scan the rest of the block for
192 * invocations, and if there are none, the source dies
193 * */
194
195 int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
196
197 for (int src = 0; src < 2; ++src) {
198 int s = sources[src];
199
200 if (s < 0) continue;
201
202 if (s >= SSA_FIXED_MINIMUM) continue;
203
204 if (!mir_is_live_after(ctx, block, ins, s)) {
205 live_end[s] = d;
206 }
207 }
208
209 ++d;
210 }
211 }
212
213 /* If a node still hasn't been killed, kill it now */
214
215 for (int i = 0; i < nodes; ++i) {
216 /* live_start == -1 most likely indicates a pinned output */
217
218 if (live_end[i] == -1)
219 live_end[i] = d;
220 }
221
222 /* Setup interference between nodes that are live at the same time */
223
224 for (int i = 0; i < nodes; ++i) {
225 for (int j = i + 1; j < nodes; ++j) {
226 if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
227 ra_add_node_interference(g, i, j);
228 }
229 }
230
231 ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
232
233 if (!ra_allocate(g)) {
234 unreachable("Error allocating registers\n");
235 }
236
237 /* Cleanup */
238 free(live_start);
239 free(live_end);
240
241 return g;
242 }
243
244 /* Once registers have been decided via register allocation
245 * (allocate_registers), we need to rewrite the MIR to use registers instead of
246 * SSA */
247
248 void
249 install_registers(compiler_context *ctx, struct ra_graph *g)
250 {
251 mir_foreach_block(ctx, block) {
252 mir_foreach_instr_in_block(block, ins) {
253 if (ins->compact_branch) continue;
254
255 ssa_args args = ins->ssa_args;
256
257 switch (ins->type) {
258 case TAG_ALU_4:
259 ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
260
261 ins->registers.src2_imm = args.inline_constant;
262
263 if (args.inline_constant) {
264 /* Encode inline 16-bit constant as a vector by default */
265
266 ins->registers.src2_reg = ins->inline_constant >> 11;
267
268 int lower_11 = ins->inline_constant & ((1 << 12) - 1);
269
270 uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
271 ins->alu.src2 = imm << 2;
272 } else {
273 ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
274 }
275
276 ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
277
278 break;
279
280 case TAG_LOAD_STORE_4: {
281 if (OP_IS_STORE_VARY(ins->load_store.op)) {
282 /* TODO: use ssa_args for st_vary */
283 ins->load_store.reg = 0;
284 } else {
285 bool has_dest = args.dest >= 0;
286 int ssa_arg = has_dest ? args.dest : args.src0;
287
288 ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
289 }
290
291 break;
292 }
293
294 default:
295 break;
296 }
297 }
298 }
299
300 }