2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
31 struct partial_update_state
{
32 struct qinst
*insts
[4];
37 int_hash(const void *key
)
39 return _mesa_hash_data(key
, sizeof(int));
43 int_compare(const void *key1
, const void *key2
)
45 return *(const int *)key1
== *(const int *)key2
;
49 vir_reg_to_var(struct qreg reg
)
51 if (reg
.file
== QFILE_TEMP
)
58 vir_setup_use(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
61 int var
= vir_reg_to_var(src
);
65 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
66 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
68 /* The use[] bitset marks when the block makes
69 * use of a variable without having completely
70 * defined that variable within the block.
72 if (!BITSET_TEST(block
->def
, var
))
73 BITSET_SET(block
->use
, var
);
76 static struct partial_update_state
*
77 get_partial_update_state(struct hash_table
*partial_update_ht
,
80 struct hash_entry
*entry
=
81 _mesa_hash_table_search(partial_update_ht
,
86 struct partial_update_state
*state
=
87 rzalloc(partial_update_ht
, struct partial_update_state
);
89 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
95 vir_setup_def(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
96 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
98 if (inst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
)
101 /* The def[] bitset marks when an initialization in a
102 * block completely screens off previous updates of
105 int var
= vir_reg_to_var(inst
->dst
);
109 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
110 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
112 /* Mark the block as having a (partial) def of the var. */
113 BITSET_SET(block
->defout
, var
);
115 /* If we've already tracked this as a def that screens off previous
116 * uses, or already used it within the block, there's nothing to do.
118 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
121 /* Easy, common case: unconditional full register update.
123 * We treat conditioning on the exec mask as the same as not being
124 * conditional. This makes sure that if the register gets set on
125 * either side of an if, it is treated as being screened off before
126 * the if. Otherwise, if there was no intervening def, its live
127 * interval doesn't extend back to the start of he program, and if too
128 * many registers did that we'd fail to register allocate.
130 if (((inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
131 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) ||
132 inst
->cond_is_exec_mask
) &&
133 inst
->qpu
.alu
.add
.output_pack
== V3D_QPU_PACK_NONE
&&
134 inst
->qpu
.alu
.mul
.output_pack
== V3D_QPU_PACK_NONE
) {
135 BITSET_SET(block
->def
, var
);
139 /* Finally, look at the condition code and packing and mark it as a
140 * def. We need to make sure that we understand sequences
153 * as defining the temp within the block, because otherwise dst's live
154 * range will get extended up the control flow to the top of the
157 struct partial_update_state
*state
=
158 get_partial_update_state(partial_update_ht
, inst
);
159 uint8_t mask
= 0xf; /* XXX vir_channels_written(inst); */
161 if (inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
162 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) {
163 state
->channels
|= mask
;
165 for (int i
= 0; i
< 4; i
++) {
166 if (!(mask
& (1 << i
)))
169 /* XXXif (state->insts[i] &&
170 state->insts[i]->cond ==
171 qpu_cond_complement(inst->cond))
172 state->channels |= 1 << i;
175 state
->insts
[i
] = inst
;
179 if (state
->channels
== 0xf)
180 BITSET_SET(block
->def
, var
);
184 sf_state_clear(struct hash_table
*partial_update_ht
)
186 hash_table_foreach(partial_update_ht
, entry
) {
187 struct partial_update_state
*state
= entry
->data
;
189 for (int i
= 0; i
< 4; i
++) {
190 if (state
->insts
[i
] &&
191 (state
->insts
[i
]->qpu
.flags
.ac
!= V3D_QPU_COND_NONE
||
192 state
->insts
[i
]->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
))
193 state
->insts
[i
] = NULL
;
198 /* Sets up the def/use arrays for when variables are used-before-defined or
199 * defined-before-used in the block.
201 * Also initializes the temp_start/temp_end to cover just the instruction IPs
202 * where the variable is used, which will be extended later in
203 * vir_compute_start_end().
206 vir_setup_def_use(struct v3d_compile
*c
)
208 struct hash_table
*partial_update_ht
=
209 _mesa_hash_table_create(c
, int_hash
, int_compare
);
212 vir_for_each_block(block
, c
) {
213 block
->start_ip
= ip
;
215 _mesa_hash_table_clear(partial_update_ht
, NULL
);
217 vir_for_each_inst(inst
, block
) {
218 for (int i
= 0; i
< vir_get_nsrc(inst
); i
++)
219 vir_setup_use(c
, block
, ip
, inst
->src
[i
]);
221 vir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
223 if (false /* XXX inst->uf */)
224 sf_state_clear(partial_update_ht
);
226 /* Payload registers: r0/1/2 contain W, centroid W,
227 * and Z at program start. Register allocation will
228 * force their nodes to R0/1/2.
230 if (inst
->src
[0].file
== QFILE_REG
) {
231 switch (inst
->src
[0].index
) {
235 c
->temp_start
[inst
->dst
.index
] = 0;
245 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
249 vir_live_variables_dataflow(struct v3d_compile
*c
, int bitset_words
)
253 vir_for_each_block_rev(block
, c
) {
254 /* Update live_out: Any successor using the variable
255 * on entrance needs us to have the variable live on
258 vir_for_each_successor(succ
, block
) {
259 for (int i
= 0; i
< bitset_words
; i
++) {
260 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
261 ~block
->live_out
[i
]);
263 block
->live_out
[i
] |= new_live_out
;
270 for (int i
= 0; i
< bitset_words
; i
++) {
271 BITSET_WORD new_live_in
= (block
->use
[i
] |
272 (block
->live_out
[i
] &
274 if (new_live_in
& ~block
->live_in
[i
]) {
275 block
->live_in
[i
] |= new_live_in
;
285 vir_live_variables_defin_defout_dataflow(struct v3d_compile
*c
, int bitset_words
)
289 vir_for_each_block_rev(block
, c
) {
290 /* Propagate defin/defout down the successors to produce the
291 * union of blocks with a reachable (partial) definition of
294 * This keeps a conditional first write to a reg from
295 * extending its lifetime back to the start of the program.
297 vir_for_each_successor(succ
, block
) {
298 for (int i
= 0; i
< bitset_words
; i
++) {
299 BITSET_WORD new_def
= (block
->defout
[i
] &
301 succ
->defin
[i
] |= new_def
;
302 succ
->defout
[i
] |= new_def
;
312 * Extend the start/end ranges for each variable to account for the
313 * new information calculated from control flow.
316 vir_compute_start_end(struct v3d_compile
*c
, int num_vars
)
318 vir_for_each_block(block
, c
) {
319 for (int i
= 0; i
< num_vars
; i
++) {
320 if (BITSET_TEST(block
->live_in
, i
) &&
321 BITSET_TEST(block
->defin
, i
)) {
322 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
324 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
328 if (BITSET_TEST(block
->live_out
, i
) &&
329 BITSET_TEST(block
->defout
, i
)) {
330 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
332 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
340 vir_calculate_live_intervals(struct v3d_compile
*c
)
342 int bitset_words
= BITSET_WORDS(c
->num_temps
);
344 /* We may be called more than once if we've rearranged the program to
345 * try to get register allocation to succeed.
348 ralloc_free(c
->temp_start
);
349 ralloc_free(c
->temp_end
);
351 vir_for_each_block(block
, c
) {
352 ralloc_free(block
->def
);
353 ralloc_free(block
->use
);
354 ralloc_free(block
->live_in
);
355 ralloc_free(block
->live_out
);
359 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
360 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
362 for (int i
= 0; i
< c
->num_temps
; i
++) {
363 c
->temp_start
[i
] = MAX_INSTRUCTION
;
367 vir_for_each_block(block
, c
) {
368 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
369 block
->defin
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
370 block
->defout
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
371 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
372 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
373 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
376 vir_setup_def_use(c
);
378 while (vir_live_variables_dataflow(c
, bitset_words
))
381 while (vir_live_variables_defin_defout_dataflow(c
, bitset_words
))
384 vir_compute_start_end(c
, c
->num_temps
);
386 c
->live_intervals_valid
= true;