2 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "util/register_allocate.h"
25 #include "compiler_defines.h"
26 #include "bifrost_sched.h"
27 #include "bifrost_compile.h"
28 #include "bifrost_print.h"
31 const unsigned max_primary_reg
= 64; /* Overestimate because of special regs */
32 const unsigned max_vec2_reg
= 32;
33 const unsigned max_vec3_reg
= 16; // XXX: Do we need to align vec3 to vec4 boundary?
34 const unsigned max_vec4_reg
= 16;
35 const unsigned max_registers
= 128; /* Sum of classes */
36 const unsigned primary_base
= 0;
37 const unsigned vec2_base
= 64;
38 const unsigned vec3_base
= 96; /* above base + max_class_reg */
39 const unsigned vec4_base
= 112;
40 const unsigned vec4_end
= 128;
43 find_or_allocate_temp(compiler_context
*ctx
, unsigned hash
)
45 if (hash
>= SSA_FIXED_MINIMUM
)
48 unsigned temp
= (uintptr_t) _mesa_hash_table_u64_search(ctx
->hash_to_temp
, hash
+ 1);
53 /* If no temp is find, allocate one */
54 temp
= ctx
->num_temps
++;
55 ctx
->max_hash
= MAX2(ctx
->max_hash
, hash
);
57 _mesa_hash_table_u64_insert(ctx
->hash_to_temp
, hash
+ 1, (void *) ((uintptr_t) temp
+ 1));
63 is_live_in_instr(bifrost_instruction
*instr
, unsigned temp
)
65 if (instr
->ssa_args
.src0
== temp
) return true;
66 if (instr
->ssa_args
.src1
== temp
) return true;
67 if (instr
->ssa_args
.src2
== temp
) return true;
68 if (instr
->ssa_args
.src3
== temp
) return true;
74 is_live_after_instr(compiler_context
*ctx
, bifrost_block
*blk
, bifrost_instruction
*instr
, unsigned temp
)
76 // Scan forward in the block from this location to see if we are still live.
78 mir_foreach_instr_in_block_from(blk
, ins
, mir_next_instr(instr
)) {
79 if (is_live_in_instr(ins
, temp
))
83 // XXX: Walk all successor blocks and ensure the value isn't used there
89 ra_select_callback(struct ra_graph
*g
, BITSET_WORD
*regs
, void *data
)
91 for (int i
= primary_base
; i
< vec4_end
; ++i
) {
92 if (BITSET_TEST(regs
, i
)) {
102 ra_get_phys_reg(compiler_context
*ctx
, struct ra_graph
*g
, unsigned temp
, unsigned max_reg
)
104 if (temp
== SSA_INVALID_VALUE
||
105 temp
>= SSA_FIXED_UREG_MINIMUM
||
106 temp
== SSA_FIXED_CONST_0
)
109 if (temp
>= SSA_FIXED_MINIMUM
)
110 return SSA_REG_FROM_FIXED(temp
);
112 assert(temp
< max_reg
);
113 uint32_t r
= ra_get_node_reg(g
, temp
);
115 return (r
- vec4_base
) * 4;
116 else if (r
>= vec3_base
)
117 return (r
- vec3_base
) * 4;
118 else if (r
>= vec2_base
)
119 return (r
- vec2_base
) * 2;
125 allocate_registers(compiler_context
*ctx
)
127 struct ra_regs
*regs
= ra_alloc_reg_set(NULL
, max_registers
, true);
129 int primary_class
= ra_alloc_reg_class(regs
);
130 int vec2_class
= ra_alloc_reg_class(regs
);
131 int vec3_class
= ra_alloc_reg_class(regs
);
132 int vec4_class
= ra_alloc_reg_class(regs
);
134 // Allocate our register classes and conflicts
137 unsigned primary_base
= 0;
139 // Add all of our primary scalar registers
140 for (unsigned i
= 0; i
< max_primary_reg
; ++i
) {
141 ra_class_add_reg(regs
, primary_class
, reg
);
145 // Add all of our vec2 class registers
146 // These alias with the scalar registers
147 for (unsigned i
= 0; i
< max_vec2_reg
; ++i
) {
148 ra_class_add_reg(regs
, vec2_class
, reg
);
150 // Tell RA that this conflicts with primary class registers
151 // Make sure to tell the RA utility all conflict slots
152 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*2 + 0);
153 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*2 + 1);
158 // Add all of our vec3 class registers
159 // These alias with the scalar registers
160 for (unsigned i
= 0; i
< max_vec3_reg
; ++i
) {
161 ra_class_add_reg(regs
, vec3_class
, reg
);
163 // Tell RA that this conflicts with primary class registers
164 // Make sure to tell the RA utility all conflict slots
165 // These are aligned to vec4 even though they only conflict with a vec3 wide slot
166 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 0);
167 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 1);
168 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 2);
170 // State that this class conflicts with the vec2 class
171 ra_add_reg_conflict(regs
, reg
, vec2_base
+ i
*2 + 0);
172 ra_add_reg_conflict(regs
, reg
, vec2_base
+ i
*2 + 1);
177 // Add all of our vec4 class registers
178 // These alias with the scalar registers
179 for (unsigned i
= 0; i
< max_vec4_reg
; ++i
) {
180 ra_class_add_reg(regs
, vec4_class
, reg
);
182 // Tell RA that this conflicts with primary class registers
183 // Make sure to tell the RA utility all conflict slots
184 // These are aligned to vec4 even though they only conflict with a vec3 wide slot
185 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 0);
186 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 1);
187 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 2);
188 ra_add_reg_conflict(regs
, reg
, primary_base
+ i
*4 + 3);
190 // State that this class conflicts with the vec2 class
191 ra_add_reg_conflict(regs
, reg
, vec2_base
+ i
*2 + 0);
192 ra_add_reg_conflict(regs
, reg
, vec2_base
+ i
*2 + 1);
194 // State that this class conflicts with the vec3 class
195 // They conflict on the exact same location due to alignments
196 ra_add_reg_conflict(regs
, reg
, vec3_base
+ i
);
202 ra_set_finalize(regs
, NULL
);
203 mir_foreach_block(ctx
, block
) {
204 mir_foreach_instr_in_block(block
, instr
) {
205 instr
->ssa_args
.src0
= find_or_allocate_temp(ctx
, instr
->ssa_args
.src0
);
206 instr
->ssa_args
.src1
= find_or_allocate_temp(ctx
, instr
->ssa_args
.src1
);
207 instr
->ssa_args
.src2
= find_or_allocate_temp(ctx
, instr
->ssa_args
.src2
);
208 instr
->ssa_args
.src3
= find_or_allocate_temp(ctx
, instr
->ssa_args
.src3
);
209 instr
->ssa_args
.dest
= find_or_allocate_temp(ctx
, instr
->ssa_args
.dest
);
213 uint32_t nodes
= ctx
->num_temps
;
214 struct ra_graph
*g
= ra_alloc_interference_graph(regs
, nodes
);
216 mir_foreach_block(ctx
, block
) {
217 mir_foreach_instr_in_block(block
, instr
) {
218 if (instr
->ssa_args
.dest
>= SSA_FIXED_MINIMUM
) continue;
219 if (instr
->dest_components
== 4)
220 ra_set_node_class(g
, instr
->ssa_args
.dest
, vec4_class
);
221 else if (instr
->dest_components
== 3)
222 ra_set_node_class(g
, instr
->ssa_args
.dest
, vec3_class
);
223 else if (instr
->dest_components
== 2)
224 ra_set_node_class(g
, instr
->ssa_args
.dest
, vec2_class
);
226 ra_set_node_class(g
, instr
->ssa_args
.dest
, primary_class
);
230 uint32_t *live_start
= malloc(nodes
* sizeof(uint32_t));
231 uint32_t *live_end
= malloc(nodes
* sizeof(uint32_t));
233 memset(live_start
, 0xFF, nodes
* sizeof(uint32_t));
234 memset(live_end
, 0xFF, nodes
* sizeof(uint32_t));
236 uint32_t location
= 0;
237 mir_foreach_block(ctx
, block
) {
238 mir_foreach_instr_in_block(block
, instr
) {
239 if (instr
->ssa_args
.dest
< SSA_FIXED_MINIMUM
) {
240 // If the destination isn't yet live before this point
241 // then this is the point it becomes live since we wrote to it
242 if (live_start
[instr
->ssa_args
.dest
] == ~0U) {
243 live_start
[instr
->ssa_args
.dest
] = location
;
247 uint32_t sources
[4] = {
248 instr
->ssa_args
.src0
,
249 instr
->ssa_args
.src1
,
250 instr
->ssa_args
.src2
,
251 instr
->ssa_args
.src3
,
254 for (unsigned i
= 0; i
< 4; ++i
) {
255 if (sources
[i
] >= SSA_FIXED_MINIMUM
)
258 // If the source is no longer live after this instruction then we can end its liveness
259 if (!is_live_after_instr(ctx
, block
, instr
, sources
[i
])) {
260 live_end
[sources
[i
]] = location
;
267 // Spin through the nodes quick and ensure they are all killed by the end of the program
268 for (unsigned i
= 0; i
< nodes
; ++i
) {
269 if (live_end
[i
] == ~0U)
270 live_end
[i
] = location
;
273 for (int i
= 0; i
< nodes
; ++i
) {
274 for (int j
= i
+ 1; j
< nodes
; ++j
) {
275 if (!(live_start
[i
] >= live_end
[j
] || live_start
[j
] >= live_end
[i
])) {
276 ra_add_node_interference(g
, i
, j
);
281 ra_set_select_reg_callback(g
, ra_select_callback
, NULL
);
283 if (!ra_allocate(g
)) {
290 mir_foreach_block(ctx
, block
) {
291 mir_foreach_instr_in_block(block
, instr
) {
292 instr
->args
.src0
= ra_get_phys_reg(ctx
, g
, instr
->ssa_args
.src0
, nodes
);
293 instr
->args
.src1
= ra_get_phys_reg(ctx
, g
, instr
->ssa_args
.src1
, nodes
);
294 instr
->args
.src2
= ra_get_phys_reg(ctx
, g
, instr
->ssa_args
.src2
, nodes
);
295 instr
->args
.src3
= ra_get_phys_reg(ctx
, g
, instr
->ssa_args
.src3
, nodes
);
296 instr
->args
.dest
= ra_get_phys_reg(ctx
, g
, instr
->ssa_args
.dest
, nodes
);
302 bundle_block(compiler_context
*ctx
, bifrost_block
*block
)
307 remove_create_vectors(compiler_context
*ctx
, bifrost_block
*block
)
309 mir_foreach_instr_in_block_safe(block
, instr
) {
310 if (instr
->op
!= op_create_vector
) continue;
312 uint32_t vector_ssa_sources
[4] = {
313 instr
->ssa_args
.src0
,
314 instr
->ssa_args
.src1
,
315 instr
->ssa_args
.src2
,
316 instr
->ssa_args
.src3
,
319 mir_foreach_instr_in_block_from_rev(block
, next_instr
, instr
) {
320 // Walk our block backwards and find the creators of this vector creation instruction
321 for (unsigned i
= 0; i
< instr
->dest_components
; ++i
) {
322 // If this instruction is ther one that writes this register then forward it to the real register
323 if (vector_ssa_sources
[i
] == next_instr
->ssa_args
.dest
) {
324 next_instr
->ssa_args
.dest
= vector_ssa_sources
[i
];
325 // Source instruction destination is a vector register of size dest_components
326 // So dest + i gets the components of it
327 next_instr
->args
.dest
= instr
->args
.dest
+ i
;
332 // Remove the instruction now that we have copied over all the sources
333 mir_remove_instr(instr
);
338 remove_extract_elements(compiler_context
*ctx
, bifrost_block
*block
)
340 mir_foreach_instr_in_block_safe(block
, instr
) {
341 if (instr
->op
!= op_extract_element
) continue;
343 mir_foreach_instr_in_block_from(block
, next_instr
, instr
) {
344 // Walk our block forward to replace uses of this register with a real register
346 // src1 = index in to vector
347 uint32_t vector_ssa_sources
[4] = {
348 next_instr
->ssa_args
.src0
,
349 next_instr
->ssa_args
.src1
,
350 next_instr
->ssa_args
.src2
,
351 next_instr
->ssa_args
.src3
,
353 uint32_t *vector_sources
[4] = {
354 &next_instr
->args
.src0
,
355 &next_instr
->args
.src1
,
356 &next_instr
->args
.src2
,
357 &next_instr
->args
.src3
,
360 for (unsigned i
= 0; i
< 4; ++i
) {
361 if (vector_ssa_sources
[i
] == instr
->ssa_args
.dest
) {
362 // This source uses this vector extraction
363 // Replace its usage with the real register
364 // src0 is a vector register and src1 is the constant element of the vector
365 *vector_sources
[i
] = instr
->args
.src0
+ instr
->literal_args
[0];
371 // Remove the instruction now that we have copied over all the sources
372 mir_remove_instr(instr
);
377 void schedule_program(compiler_context
*ctx
)
379 // XXX: we should move instructions together before RA that can feed in to each other and be scheduled in the same clause
380 allocate_registers(ctx
);
382 mir_foreach_block(ctx
, block
) {
383 remove_create_vectors(ctx
, block
);
384 remove_extract_elements(ctx
, block
);
387 mir_foreach_block(ctx
, block
) {
389 print_mir_block(block
, true);
392 bundle_block(ctx
, block
);