2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
31 struct partial_update_state
{
32 struct qinst
*insts
[4];
37 vir_reg_to_var(struct qreg reg
)
39 if (reg
.file
== QFILE_TEMP
)
46 vir_setup_use(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
49 int var
= vir_reg_to_var(src
);
53 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
54 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
56 /* The use[] bitset marks when the block makes
57 * use of a variable without having completely
58 * defined that variable within the block.
60 if (!BITSET_TEST(block
->def
, var
))
61 BITSET_SET(block
->use
, var
);
64 static struct partial_update_state
*
65 get_partial_update_state(struct hash_table
*partial_update_ht
,
68 struct hash_entry
*entry
=
69 _mesa_hash_table_search(partial_update_ht
,
74 struct partial_update_state
*state
=
75 rzalloc(partial_update_ht
, struct partial_update_state
);
77 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
83 vir_setup_def(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
84 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
86 if (inst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
)
89 /* The def[] bitset marks when an initialization in a
90 * block completely screens off previous updates of
93 int var
= vir_reg_to_var(inst
->dst
);
97 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
98 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
100 /* Mark the block as having a (partial) def of the var. */
101 BITSET_SET(block
->defout
, var
);
103 /* If we've already tracked this as a def that screens off previous
104 * uses, or already used it within the block, there's nothing to do.
106 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
109 /* Easy, common case: unconditional full register update.*/
110 if ((inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
111 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) &&
112 inst
->qpu
.alu
.add
.output_pack
== V3D_QPU_PACK_NONE
&&
113 inst
->qpu
.alu
.mul
.output_pack
== V3D_QPU_PACK_NONE
) {
114 BITSET_SET(block
->def
, var
);
118 /* Finally, look at the condition code and packing and mark it as a
119 * def. We need to make sure that we understand sequences
132 * as defining the temp within the block, because otherwise dst's live
133 * range will get extended up the control flow to the top of the
136 struct partial_update_state
*state
=
137 get_partial_update_state(partial_update_ht
, inst
);
138 uint8_t mask
= 0xf; /* XXX vir_channels_written(inst); */
140 if (inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
141 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) {
142 state
->channels
|= mask
;
144 for (int i
= 0; i
< 4; i
++) {
145 if (!(mask
& (1 << i
)))
148 /* XXXif (state->insts[i] &&
149 state->insts[i]->cond ==
150 qpu_cond_complement(inst->cond))
151 state->channels |= 1 << i;
154 state
->insts
[i
] = inst
;
158 if (state
->channels
== 0xf)
159 BITSET_SET(block
->def
, var
);
163 sf_state_clear(struct hash_table
*partial_update_ht
)
165 hash_table_foreach(partial_update_ht
, entry
) {
166 struct partial_update_state
*state
= entry
->data
;
168 for (int i
= 0; i
< 4; i
++) {
169 if (state
->insts
[i
] &&
170 (state
->insts
[i
]->qpu
.flags
.ac
!= V3D_QPU_COND_NONE
||
171 state
->insts
[i
]->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
))
172 state
->insts
[i
] = NULL
;
177 /* Sets up the def/use arrays for when variables are used-before-defined or
178 * defined-before-used in the block.
180 * Also initializes the temp_start/temp_end to cover just the instruction IPs
181 * where the variable is used, which will be extended later in
182 * vir_compute_start_end().
185 vir_setup_def_use(struct v3d_compile
*c
)
187 struct hash_table
*partial_update_ht
=
188 _mesa_hash_table_create(c
, _mesa_hash_int
, _mesa_key_int_equal
);
191 vir_for_each_block(block
, c
) {
192 block
->start_ip
= ip
;
194 _mesa_hash_table_clear(partial_update_ht
, NULL
);
196 vir_for_each_inst(inst
, block
) {
197 for (int i
= 0; i
< vir_get_nsrc(inst
); i
++)
198 vir_setup_use(c
, block
, ip
, inst
->src
[i
]);
200 vir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
202 if (false /* XXX inst->uf */)
203 sf_state_clear(partial_update_ht
);
205 /* Payload registers: r0/1/2 contain W, centroid W,
206 * and Z at program start. Register allocation will
207 * force their nodes to R0/1/2.
209 if (inst
->src
[0].file
== QFILE_REG
) {
210 switch (inst
->src
[0].index
) {
214 c
->temp_start
[inst
->dst
.index
] = 0;
224 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
228 vir_live_variables_dataflow(struct v3d_compile
*c
, int bitset_words
)
232 vir_for_each_block_rev(block
, c
) {
233 /* Update live_out: Any successor using the variable
234 * on entrance needs us to have the variable live on
237 vir_for_each_successor(succ
, block
) {
238 for (int i
= 0; i
< bitset_words
; i
++) {
239 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
240 ~block
->live_out
[i
]);
242 block
->live_out
[i
] |= new_live_out
;
249 for (int i
= 0; i
< bitset_words
; i
++) {
250 BITSET_WORD new_live_in
= (block
->use
[i
] |
251 (block
->live_out
[i
] &
253 if (new_live_in
& ~block
->live_in
[i
]) {
254 block
->live_in
[i
] |= new_live_in
;
264 vir_live_variables_defin_defout_dataflow(struct v3d_compile
*c
, int bitset_words
)
268 vir_for_each_block_rev(block
, c
) {
269 /* Propagate defin/defout down the successors to produce the
270 * union of blocks with a reachable (partial) definition of
273 * This keeps a conditional first write to a reg from
274 * extending its lifetime back to the start of the program.
276 vir_for_each_successor(succ
, block
) {
277 for (int i
= 0; i
< bitset_words
; i
++) {
278 BITSET_WORD new_def
= (block
->defout
[i
] &
280 succ
->defin
[i
] |= new_def
;
281 succ
->defout
[i
] |= new_def
;
291 * Extend the start/end ranges for each variable to account for the
292 * new information calculated from control flow.
295 vir_compute_start_end(struct v3d_compile
*c
, int num_vars
)
297 vir_for_each_block(block
, c
) {
298 for (int i
= 0; i
< num_vars
; i
++) {
299 if (BITSET_TEST(block
->live_in
, i
) &&
300 BITSET_TEST(block
->defin
, i
)) {
301 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
303 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
307 if (BITSET_TEST(block
->live_out
, i
) &&
308 BITSET_TEST(block
->defout
, i
)) {
309 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
311 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
319 vir_calculate_live_intervals(struct v3d_compile
*c
)
321 int bitset_words
= BITSET_WORDS(c
->num_temps
);
323 /* We may be called more than once if we've rearranged the program to
324 * try to get register allocation to succeed.
327 ralloc_free(c
->temp_start
);
328 ralloc_free(c
->temp_end
);
330 vir_for_each_block(block
, c
) {
331 ralloc_free(block
->def
);
332 ralloc_free(block
->use
);
333 ralloc_free(block
->live_in
);
334 ralloc_free(block
->live_out
);
338 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
339 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
341 for (int i
= 0; i
< c
->num_temps
; i
++) {
342 c
->temp_start
[i
] = MAX_INSTRUCTION
;
346 vir_for_each_block(block
, c
) {
347 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
348 block
->defin
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
349 block
->defout
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
350 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
351 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
352 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
355 vir_setup_def_use(c
);
357 while (vir_live_variables_dataflow(c
, bitset_words
))
360 while (vir_live_variables_defin_defout_dataflow(c
, bitset_words
))
363 vir_compute_start_end(c
, c
->num_temps
);
365 c
->live_intervals_valid
= true;