2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
31 struct partial_update_state
{
32 struct qinst
*insts
[4];
37 int_hash(const void *key
)
39 return _mesa_hash_data(key
, sizeof(int));
43 int_compare(const void *key1
, const void *key2
)
45 return *(const int *)key1
== *(const int *)key2
;
49 vir_reg_to_var(struct qreg reg
)
51 if (reg
.file
== QFILE_TEMP
)
58 vir_setup_use(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
61 int var
= vir_reg_to_var(src
);
65 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
66 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
68 /* The use[] bitset marks when the block makes
69 * use of a variable without having completely
70 * defined that variable within the block.
72 if (!BITSET_TEST(block
->def
, var
))
73 BITSET_SET(block
->use
, var
);
76 static struct partial_update_state
*
77 get_partial_update_state(struct hash_table
*partial_update_ht
,
80 struct hash_entry
*entry
=
81 _mesa_hash_table_search(partial_update_ht
,
86 struct partial_update_state
*state
=
87 rzalloc(partial_update_ht
, struct partial_update_state
);
89 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
95 vir_setup_def(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
96 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
98 if (inst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
)
101 /* The def[] bitset marks when an initialization in a
102 * block completely screens off previous updates of
105 int var
= vir_reg_to_var(inst
->dst
);
109 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
110 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
112 /* Mark the block as having a (partial) def of the var. */
113 BITSET_SET(block
->defout
, var
);
115 /* If we've already tracked this as a def that screens off previous
116 * uses, or already used it within the block, there's nothing to do.
118 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
121 /* Easy, common case: unconditional full register update.*/
122 if ((inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
123 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) &&
124 inst
->qpu
.alu
.add
.output_pack
== V3D_QPU_PACK_NONE
&&
125 inst
->qpu
.alu
.mul
.output_pack
== V3D_QPU_PACK_NONE
) {
126 BITSET_SET(block
->def
, var
);
130 /* Finally, look at the condition code and packing and mark it as a
131 * def. We need to make sure that we understand sequences
144 * as defining the temp within the block, because otherwise dst's live
145 * range will get extended up the control flow to the top of the
148 struct partial_update_state
*state
=
149 get_partial_update_state(partial_update_ht
, inst
);
150 uint8_t mask
= 0xf; /* XXX vir_channels_written(inst); */
152 if (inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
153 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) {
154 state
->channels
|= mask
;
156 for (int i
= 0; i
< 4; i
++) {
157 if (!(mask
& (1 << i
)))
160 /* XXXif (state->insts[i] &&
161 state->insts[i]->cond ==
162 qpu_cond_complement(inst->cond))
163 state->channels |= 1 << i;
166 state
->insts
[i
] = inst
;
170 if (state
->channels
== 0xf)
171 BITSET_SET(block
->def
, var
);
175 sf_state_clear(struct hash_table
*partial_update_ht
)
177 hash_table_foreach(partial_update_ht
, entry
) {
178 struct partial_update_state
*state
= entry
->data
;
180 for (int i
= 0; i
< 4; i
++) {
181 if (state
->insts
[i
] &&
182 (state
->insts
[i
]->qpu
.flags
.ac
!= V3D_QPU_COND_NONE
||
183 state
->insts
[i
]->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
))
184 state
->insts
[i
] = NULL
;
189 /* Sets up the def/use arrays for when variables are used-before-defined or
190 * defined-before-used in the block.
192 * Also initializes the temp_start/temp_end to cover just the instruction IPs
193 * where the variable is used, which will be extended later in
194 * vir_compute_start_end().
197 vir_setup_def_use(struct v3d_compile
*c
)
199 struct hash_table
*partial_update_ht
=
200 _mesa_hash_table_create(c
, int_hash
, int_compare
);
203 vir_for_each_block(block
, c
) {
204 block
->start_ip
= ip
;
206 _mesa_hash_table_clear(partial_update_ht
, NULL
);
208 vir_for_each_inst(inst
, block
) {
209 for (int i
= 0; i
< vir_get_nsrc(inst
); i
++)
210 vir_setup_use(c
, block
, ip
, inst
->src
[i
]);
212 vir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
214 if (false /* XXX inst->uf */)
215 sf_state_clear(partial_update_ht
);
217 /* Payload registers: r0/1/2 contain W, centroid W,
218 * and Z at program start. Register allocation will
219 * force their nodes to R0/1/2.
221 if (inst
->src
[0].file
== QFILE_REG
) {
222 switch (inst
->src
[0].index
) {
226 c
->temp_start
[inst
->dst
.index
] = 0;
236 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
240 vir_live_variables_dataflow(struct v3d_compile
*c
, int bitset_words
)
244 vir_for_each_block_rev(block
, c
) {
245 /* Update live_out: Any successor using the variable
246 * on entrance needs us to have the variable live on
249 vir_for_each_successor(succ
, block
) {
250 for (int i
= 0; i
< bitset_words
; i
++) {
251 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
252 ~block
->live_out
[i
]);
254 block
->live_out
[i
] |= new_live_out
;
261 for (int i
= 0; i
< bitset_words
; i
++) {
262 BITSET_WORD new_live_in
= (block
->use
[i
] |
263 (block
->live_out
[i
] &
265 if (new_live_in
& ~block
->live_in
[i
]) {
266 block
->live_in
[i
] |= new_live_in
;
276 vir_live_variables_defin_defout_dataflow(struct v3d_compile
*c
, int bitset_words
)
280 vir_for_each_block_rev(block
, c
) {
281 /* Propagate defin/defout down the successors to produce the
282 * union of blocks with a reachable (partial) definition of
285 * This keeps a conditional first write to a reg from
286 * extending its lifetime back to the start of the program.
288 vir_for_each_successor(succ
, block
) {
289 for (int i
= 0; i
< bitset_words
; i
++) {
290 BITSET_WORD new_def
= (block
->defout
[i
] &
292 succ
->defin
[i
] |= new_def
;
293 succ
->defout
[i
] |= new_def
;
303 * Extend the start/end ranges for each variable to account for the
304 * new information calculated from control flow.
307 vir_compute_start_end(struct v3d_compile
*c
, int num_vars
)
309 vir_for_each_block(block
, c
) {
310 for (int i
= 0; i
< num_vars
; i
++) {
311 if (BITSET_TEST(block
->live_in
, i
) &&
312 BITSET_TEST(block
->defin
, i
)) {
313 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
315 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
319 if (BITSET_TEST(block
->live_out
, i
) &&
320 BITSET_TEST(block
->defout
, i
)) {
321 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
323 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
331 vir_calculate_live_intervals(struct v3d_compile
*c
)
333 int bitset_words
= BITSET_WORDS(c
->num_temps
);
335 /* We may be called more than once if we've rearranged the program to
336 * try to get register allocation to succeed.
339 ralloc_free(c
->temp_start
);
340 ralloc_free(c
->temp_end
);
342 vir_for_each_block(block
, c
) {
343 ralloc_free(block
->def
);
344 ralloc_free(block
->use
);
345 ralloc_free(block
->live_in
);
346 ralloc_free(block
->live_out
);
350 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
351 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
353 for (int i
= 0; i
< c
->num_temps
; i
++) {
354 c
->temp_start
[i
] = MAX_INSTRUCTION
;
358 vir_for_each_block(block
, c
) {
359 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
360 block
->defin
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
361 block
->defout
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
362 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
363 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
364 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
367 vir_setup_def_use(c
);
369 while (vir_live_variables_dataflow(c
, bitset_words
))
372 while (vir_live_variables_defin_defout_dataflow(c
, bitset_words
))
375 vir_compute_start_end(c
, c
->num_temps
);
377 c
->live_intervals_valid
= true;