2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
31 struct partial_update_state
{
32 struct qinst
*insts
[4];
37 int_hash(const void *key
)
39 return _mesa_hash_data(key
, sizeof(int));
43 int_compare(const void *key1
, const void *key2
)
45 return *(const int *)key1
== *(const int *)key2
;
49 vir_reg_to_var(struct qreg reg
)
51 if (reg
.file
== QFILE_TEMP
)
58 vir_setup_use(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
61 int var
= vir_reg_to_var(src
);
65 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
66 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
68 /* The use[] bitset marks when the block makes
69 * use of a variable without having completely
70 * defined that variable within the block.
72 if (!BITSET_TEST(block
->def
, var
))
73 BITSET_SET(block
->use
, var
);
76 static struct partial_update_state
*
77 get_partial_update_state(struct hash_table
*partial_update_ht
,
80 struct hash_entry
*entry
=
81 _mesa_hash_table_search(partial_update_ht
,
86 struct partial_update_state
*state
=
87 rzalloc(partial_update_ht
, struct partial_update_state
);
89 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
95 vir_setup_def(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
96 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
98 if (inst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
)
101 /* The def[] bitset marks when an initialization in a
102 * block completely screens off previous updates of
105 int var
= vir_reg_to_var(inst
->dst
);
109 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
110 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
112 /* If we've already tracked this as a def, or already used it within
113 * the block, there's nothing to do.
115 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
118 /* Easy, common case: unconditional full register update.
120 * We treat conditioning on the exec mask as the same as not being
121 * conditional. This makes sure that if the register gets set on
122 * either side of an if, it is treated as being screened off before
123 * the if. Otherwise, if there was no intervening def, its live
124 * interval doesn't extend back to the start of he program, and if too
125 * many registers did that we'd fail to register allocate.
127 if (((inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
128 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) ||
129 inst
->cond_is_exec_mask
) &&
130 inst
->qpu
.alu
.add
.output_pack
== V3D_QPU_PACK_NONE
&&
131 inst
->qpu
.alu
.mul
.output_pack
== V3D_QPU_PACK_NONE
) {
132 BITSET_SET(block
->def
, var
);
136 /* Finally, look at the condition code and packing and mark it as a
137 * def. We need to make sure that we understand sequences
150 * as defining the temp within the block, because otherwise dst's live
151 * range will get extended up the control flow to the top of the
154 struct partial_update_state
*state
=
155 get_partial_update_state(partial_update_ht
, inst
);
156 uint8_t mask
= 0xf; /* XXX vir_channels_written(inst); */
158 if (inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
159 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) {
160 state
->channels
|= mask
;
162 for (int i
= 0; i
< 4; i
++) {
163 if (!(mask
& (1 << i
)))
166 /* XXXif (state->insts[i] &&
167 state->insts[i]->cond ==
168 qpu_cond_complement(inst->cond))
169 state->channels |= 1 << i;
172 state
->insts
[i
] = inst
;
176 if (state
->channels
== 0xf)
177 BITSET_SET(block
->def
, var
);
181 sf_state_clear(struct hash_table
*partial_update_ht
)
183 struct hash_entry
*entry
;
185 hash_table_foreach(partial_update_ht
, entry
) {
186 struct partial_update_state
*state
= entry
->data
;
188 for (int i
= 0; i
< 4; i
++) {
189 if (state
->insts
[i
] &&
190 (state
->insts
[i
]->qpu
.flags
.ac
!= V3D_QPU_COND_NONE
||
191 state
->insts
[i
]->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
))
192 state
->insts
[i
] = NULL
;
197 /* Sets up the def/use arrays for when variables are used-before-defined or
198 * defined-before-used in the block.
200 * Also initializes the temp_start/temp_end to cover just the instruction IPs
201 * where the variable is used, which will be extended later in
202 * vir_compute_start_end().
205 vir_setup_def_use(struct v3d_compile
*c
)
207 struct hash_table
*partial_update_ht
=
208 _mesa_hash_table_create(c
, int_hash
, int_compare
);
211 vir_for_each_block(block
, c
) {
212 block
->start_ip
= ip
;
214 _mesa_hash_table_clear(partial_update_ht
, NULL
);
216 vir_for_each_inst(inst
, block
) {
217 for (int i
= 0; i
< vir_get_nsrc(inst
); i
++)
218 vir_setup_use(c
, block
, ip
, inst
->src
[i
]);
220 vir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
222 if (false /* XXX inst->uf */)
223 sf_state_clear(partial_update_ht
);
225 /* Payload registers: r0/1/2 contain W, centroid W,
226 * and Z at program start. Register allocation will
227 * force their nodes to R0/1/2.
229 if (inst
->src
[0].file
== QFILE_REG
) {
230 switch (inst
->src
[0].index
) {
234 c
->temp_start
[inst
->dst
.index
] = 0;
244 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
248 vir_live_variables_dataflow(struct v3d_compile
*c
, int bitset_words
)
252 vir_for_each_block_rev(block
, c
) {
253 /* Update live_out: Any successor using the variable
254 * on entrance needs us to have the variable live on
257 vir_for_each_successor(succ
, block
) {
258 for (int i
= 0; i
< bitset_words
; i
++) {
259 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
260 ~block
->live_out
[i
]);
262 block
->live_out
[i
] |= new_live_out
;
269 for (int i
= 0; i
< bitset_words
; i
++) {
270 BITSET_WORD new_live_in
= (block
->use
[i
] |
271 (block
->live_out
[i
] &
273 if (new_live_in
& ~block
->live_in
[i
]) {
274 block
->live_in
[i
] |= new_live_in
;
284 * Extend the start/end ranges for each variable to account for the
285 * new information calculated from control flow.
288 vir_compute_start_end(struct v3d_compile
*c
, int num_vars
)
290 vir_for_each_block(block
, c
) {
291 for (int i
= 0; i
< num_vars
; i
++) {
292 if (BITSET_TEST(block
->live_in
, i
)) {
293 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
295 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
299 if (BITSET_TEST(block
->live_out
, i
)) {
300 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
302 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
310 vir_calculate_live_intervals(struct v3d_compile
*c
)
312 int bitset_words
= BITSET_WORDS(c
->num_temps
);
314 /* We may be called more than once if we've rearranged the program to
315 * try to get register allocation to succeed.
318 ralloc_free(c
->temp_start
);
319 ralloc_free(c
->temp_end
);
321 vir_for_each_block(block
, c
) {
322 ralloc_free(block
->def
);
323 ralloc_free(block
->use
);
324 ralloc_free(block
->live_in
);
325 ralloc_free(block
->live_out
);
329 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
330 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
332 for (int i
= 0; i
< c
->num_temps
; i
++) {
333 c
->temp_start
[i
] = MAX_INSTRUCTION
;
337 vir_for_each_block(block
, c
) {
338 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
339 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
340 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
341 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
344 vir_setup_def_use(c
);
346 while (vir_live_variables_dataflow(c
, bitset_words
))
349 vir_compute_start_end(c
, c
->num_temps
);
351 c
->live_intervals_valid
= true;