v3d: do not report alpha-test as supported
[mesa.git] / src / panfrost / bifrost / bifrost_sched.c
1 /*
2 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "util/register_allocate.h"
25 #include "compiler_defines.h"
26 #include "bifrost_sched.h"
27 #include "bifrost_compile.h"
28 #include "bifrost_print.h"
29
30 #define BI_DEBUG
31 const unsigned max_primary_reg = 64; /* Overestimate because of special regs */
32 const unsigned max_vec2_reg = 32;
33 const unsigned max_vec3_reg = 16; // XXX: Do we need to align vec3 to vec4 boundary?
34 const unsigned max_vec4_reg = 16;
35 const unsigned max_registers = 128; /* Sum of classes */
36 const unsigned primary_base = 0;
37 const unsigned vec2_base = 64;
38 const unsigned vec3_base = 96; /* above base + max_class_reg */
39 const unsigned vec4_base = 112;
40 const unsigned vec4_end = 128;
41
42 static unsigned
43 find_or_allocate_temp(compiler_context *ctx, unsigned hash)
44 {
45 if (hash >= SSA_FIXED_MINIMUM)
46 return hash;
47
48 unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
49
50 if (temp)
51 return temp - 1;
52
53 /* If no temp is find, allocate one */
54 temp = ctx->num_temps++;
55 ctx->max_hash = MAX2(ctx->max_hash, hash);
56
57 _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
58
59 return temp;
60 }
61
62 static bool
63 is_live_in_instr(bifrost_instruction *instr, unsigned temp)
64 {
65 if (instr->ssa_args.src0 == temp) return true;
66 if (instr->ssa_args.src1 == temp) return true;
67 if (instr->ssa_args.src2 == temp) return true;
68 if (instr->ssa_args.src3 == temp) return true;
69
70 return false;
71 }
72
73 static bool
74 is_live_after_instr(compiler_context *ctx, bifrost_block *blk, bifrost_instruction *instr, unsigned temp)
75 {
76 // Scan forward in the block from this location to see if we are still live.
77
78 mir_foreach_instr_in_block_from(blk, ins, mir_next_instr(instr)) {
79 if (is_live_in_instr(ins, temp))
80 return true;
81 }
82
83 // XXX: Walk all successor blocks and ensure the value isn't used there
84
85 return false;
86 }
87
88 static uint32_t
89 ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
90 {
91 for (int i = primary_base; i < vec4_end; ++i) {
92 if (BITSET_TEST(regs, i)) {
93 return i;
94 }
95 }
96
97 assert(0);
98 return 0;
99 }
100
101 static uint32_t
102 ra_get_phys_reg(compiler_context *ctx, struct ra_graph *g, unsigned temp, unsigned max_reg)
103 {
104 if (temp == SSA_INVALID_VALUE ||
105 temp >= SSA_FIXED_UREG_MINIMUM ||
106 temp == SSA_FIXED_CONST_0)
107 return temp;
108
109 if (temp >= SSA_FIXED_MINIMUM)
110 return SSA_REG_FROM_FIXED(temp);
111
112 assert(temp < max_reg);
113 uint32_t r = ra_get_node_reg(g, temp);
114 if (r >= vec4_base)
115 return (r - vec4_base) * 4;
116 else if (r >= vec3_base)
117 return (r - vec3_base) * 4;
118 else if (r >= vec2_base)
119 return (r - vec2_base) * 2;
120
121 return r;
122 }
123
124 static void
125 allocate_registers(compiler_context *ctx)
126 {
127 struct ra_regs *regs = ra_alloc_reg_set(NULL, max_registers, true);
128
129 int primary_class = ra_alloc_reg_class(regs);
130 int vec2_class = ra_alloc_reg_class(regs);
131 int vec3_class = ra_alloc_reg_class(regs);
132 int vec4_class = ra_alloc_reg_class(regs);
133
134 // Allocate our register classes and conflicts
135 {
136 unsigned reg = 0;
137 unsigned primary_base = 0;
138
139 // Add all of our primary scalar registers
140 for (unsigned i = 0; i < max_primary_reg; ++i) {
141 ra_class_add_reg(regs, primary_class, reg);
142 reg++;
143 }
144
145 // Add all of our vec2 class registers
146 // These alias with the scalar registers
147 for (unsigned i = 0; i < max_vec2_reg; ++i) {
148 ra_class_add_reg(regs, vec2_class, reg);
149
150 // Tell RA that this conflicts with primary class registers
151 // Make sure to tell the RA utility all conflict slots
152 ra_add_reg_conflict(regs, reg, primary_base + i*2 + 0);
153 ra_add_reg_conflict(regs, reg, primary_base + i*2 + 1);
154
155 reg++;
156 }
157
158 // Add all of our vec3 class registers
159 // These alias with the scalar registers
160 for (unsigned i = 0; i < max_vec3_reg; ++i) {
161 ra_class_add_reg(regs, vec3_class, reg);
162
163 // Tell RA that this conflicts with primary class registers
164 // Make sure to tell the RA utility all conflict slots
165 // These are aligned to vec4 even though they only conflict with a vec3 wide slot
166 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 0);
167 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 1);
168 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 2);
169
170 // State that this class conflicts with the vec2 class
171 ra_add_reg_conflict(regs, reg, vec2_base + i*2 + 0);
172 ra_add_reg_conflict(regs, reg, vec2_base + i*2 + 1);
173
174 reg++;
175 }
176
177 // Add all of our vec4 class registers
178 // These alias with the scalar registers
179 for (unsigned i = 0; i < max_vec4_reg; ++i) {
180 ra_class_add_reg(regs, vec4_class, reg);
181
182 // Tell RA that this conflicts with primary class registers
183 // Make sure to tell the RA utility all conflict slots
184 // These are aligned to vec4 even though they only conflict with a vec3 wide slot
185 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 0);
186 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 1);
187 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 2);
188 ra_add_reg_conflict(regs, reg, primary_base + i*4 + 3);
189
190 // State that this class conflicts with the vec2 class
191 ra_add_reg_conflict(regs, reg, vec2_base + i*2 + 0);
192 ra_add_reg_conflict(regs, reg, vec2_base + i*2 + 1);
193
194 // State that this class conflicts with the vec3 class
195 // They conflict on the exact same location due to alignments
196 ra_add_reg_conflict(regs, reg, vec3_base + i);
197
198 reg++;
199 }
200 }
201
202 ra_set_finalize(regs, NULL);
203 mir_foreach_block(ctx, block) {
204 mir_foreach_instr_in_block(block, instr) {
205 instr->ssa_args.src0 = find_or_allocate_temp(ctx, instr->ssa_args.src0);
206 instr->ssa_args.src1 = find_or_allocate_temp(ctx, instr->ssa_args.src1);
207 instr->ssa_args.src2 = find_or_allocate_temp(ctx, instr->ssa_args.src2);
208 instr->ssa_args.src3 = find_or_allocate_temp(ctx, instr->ssa_args.src3);
209 instr->ssa_args.dest = find_or_allocate_temp(ctx, instr->ssa_args.dest);
210 }
211 }
212
213 uint32_t nodes = ctx->num_temps;
214 struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
215
216 mir_foreach_block(ctx, block) {
217 mir_foreach_instr_in_block(block, instr) {
218 if (instr->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
219 if (instr->dest_components == 4)
220 ra_set_node_class(g, instr->ssa_args.dest, vec4_class);
221 else if (instr->dest_components == 3)
222 ra_set_node_class(g, instr->ssa_args.dest, vec3_class);
223 else if (instr->dest_components == 2)
224 ra_set_node_class(g, instr->ssa_args.dest, vec2_class);
225 else
226 ra_set_node_class(g, instr->ssa_args.dest, primary_class);
227 }
228 }
229
230 uint32_t *live_start = malloc(nodes * sizeof(uint32_t));
231 uint32_t *live_end = malloc(nodes * sizeof(uint32_t));
232
233 memset(live_start, 0xFF, nodes * sizeof(uint32_t));
234 memset(live_end, 0xFF, nodes * sizeof(uint32_t));
235
236 uint32_t location = 0;
237 mir_foreach_block(ctx, block) {
238 mir_foreach_instr_in_block(block, instr) {
239 if (instr->ssa_args.dest < SSA_FIXED_MINIMUM) {
240 // If the destination isn't yet live before this point
241 // then this is the point it becomes live since we wrote to it
242 if (live_start[instr->ssa_args.dest] == ~0U) {
243 live_start[instr->ssa_args.dest] = location;
244 }
245 }
246
247 uint32_t sources[4] = {
248 instr->ssa_args.src0,
249 instr->ssa_args.src1,
250 instr->ssa_args.src2,
251 instr->ssa_args.src3,
252 };
253
254 for (unsigned i = 0; i < 4; ++i) {
255 if (sources[i] >= SSA_FIXED_MINIMUM)
256 continue;
257
258 // If the source is no longer live after this instruction then we can end its liveness
259 if (!is_live_after_instr(ctx, block, instr, sources[i])) {
260 live_end[sources[i]] = location;
261 }
262 }
263 ++location;
264 }
265 }
266
267 // Spin through the nodes quick and ensure they are all killed by the end of the program
268 for (unsigned i = 0; i < nodes; ++i) {
269 if (live_end[i] == ~0U)
270 live_end[i] = location;
271 }
272
273 for (int i = 0; i < nodes; ++i) {
274 for (int j = i + 1; j < nodes; ++j) {
275 if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i])) {
276 ra_add_node_interference(g, i, j);
277 }
278 }
279 }
280
281 ra_set_select_reg_callback(g, ra_select_callback, NULL);
282
283 if (!ra_allocate(g)) {
284 assert(0);
285 }
286
287 free(live_start);
288 free(live_end);
289
290 mir_foreach_block(ctx, block) {
291 mir_foreach_instr_in_block(block, instr) {
292 instr->args.src0 = ra_get_phys_reg(ctx, g, instr->ssa_args.src0, nodes);
293 instr->args.src1 = ra_get_phys_reg(ctx, g, instr->ssa_args.src1, nodes);
294 instr->args.src2 = ra_get_phys_reg(ctx, g, instr->ssa_args.src2, nodes);
295 instr->args.src3 = ra_get_phys_reg(ctx, g, instr->ssa_args.src3, nodes);
296 instr->args.dest = ra_get_phys_reg(ctx, g, instr->ssa_args.dest, nodes);
297 }
298 }
299 }
300
301 static void
302 bundle_block(compiler_context *ctx, bifrost_block *block)
303 {
304 }
305
306 static void
307 remove_create_vectors(compiler_context *ctx, bifrost_block *block)
308 {
309 mir_foreach_instr_in_block_safe(block, instr) {
310 if (instr->op != op_create_vector) continue;
311
312 uint32_t vector_ssa_sources[4] = {
313 instr->ssa_args.src0,
314 instr->ssa_args.src1,
315 instr->ssa_args.src2,
316 instr->ssa_args.src3,
317 };
318
319 mir_foreach_instr_in_block_from_rev(block, next_instr, instr) {
320 // Walk our block backwards and find the creators of this vector creation instruction
321 for (unsigned i = 0; i < instr->dest_components; ++i) {
322 // If this instruction is ther one that writes this register then forward it to the real register
323 if (vector_ssa_sources[i] == next_instr->ssa_args.dest) {
324 next_instr->ssa_args.dest = vector_ssa_sources[i];
325 // Source instruction destination is a vector register of size dest_components
326 // So dest + i gets the components of it
327 next_instr->args.dest = instr->args.dest + i;
328 }
329 }
330 }
331
332 // Remove the instruction now that we have copied over all the sources
333 mir_remove_instr(instr);
334 }
335 }
336
337 static void
338 remove_extract_elements(compiler_context *ctx, bifrost_block *block)
339 {
340 mir_foreach_instr_in_block_safe(block, instr) {
341 if (instr->op != op_extract_element) continue;
342
343 mir_foreach_instr_in_block_from(block, next_instr, instr) {
344 // Walk our block forward to replace uses of this register with a real register
345 // src0 = vector
346 // src1 = index in to vector
347 uint32_t vector_ssa_sources[4] = {
348 next_instr->ssa_args.src0,
349 next_instr->ssa_args.src1,
350 next_instr->ssa_args.src2,
351 next_instr->ssa_args.src3,
352 };
353 uint32_t *vector_sources[4] = {
354 &next_instr->args.src0,
355 &next_instr->args.src1,
356 &next_instr->args.src2,
357 &next_instr->args.src3,
358 };
359
360 for (unsigned i = 0; i < 4; ++i) {
361 if (vector_ssa_sources[i] == instr->ssa_args.dest) {
362 // This source uses this vector extraction
363 // Replace its usage with the real register
364 // src0 is a vector register and src1 is the constant element of the vector
365 *vector_sources[i] = instr->args.src0 + instr->literal_args[0];
366 }
367 }
368
369 }
370
371 // Remove the instruction now that we have copied over all the sources
372 mir_remove_instr(instr);
373 }
374 }
375
376
377 void schedule_program(compiler_context *ctx)
378 {
379 // XXX: we should move instructions together before RA that can feed in to each other and be scheduled in the same clause
380 allocate_registers(ctx);
381
382 mir_foreach_block(ctx, block) {
383 remove_create_vectors(ctx, block);
384 remove_extract_elements(ctx, block);
385 }
386
387 mir_foreach_block(ctx, block) {
388 #ifdef BI_DEBUG
389 print_mir_block(block, true);
390 #endif
391
392 bundle_block(ctx, block);
393 }
394 }
395