2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
31 struct partial_update_state
{
32 struct qinst
*insts
[4];
37 int_hash(const void *key
)
39 return _mesa_hash_data(key
, sizeof(int));
43 int_compare(const void *key1
, const void *key2
)
45 return *(const int *)key1
== *(const int *)key2
;
49 vir_reg_to_var(struct qreg reg
)
51 if (reg
.file
== QFILE_TEMP
)
58 vir_setup_use(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
61 int var
= vir_reg_to_var(src
);
65 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
66 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
68 /* The use[] bitset marks when the block makes
69 * use of a variable without having completely
70 * defined that variable within the block.
72 if (!BITSET_TEST(block
->def
, var
))
73 BITSET_SET(block
->use
, var
);
76 static struct partial_update_state
*
77 get_partial_update_state(struct hash_table
*partial_update_ht
,
80 struct hash_entry
*entry
=
81 _mesa_hash_table_search(partial_update_ht
,
86 struct partial_update_state
*state
=
87 rzalloc(partial_update_ht
, struct partial_update_state
);
89 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
95 vir_setup_def(struct v3d_compile
*c
, struct qblock
*block
, int ip
,
96 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
98 if (inst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
)
101 /* The def[] bitset marks when an initialization in a
102 * block completely screens off previous updates of
105 int var
= vir_reg_to_var(inst
->dst
);
109 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
110 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
112 /* If we've already tracked this as a def, or already used it within
113 * the block, there's nothing to do.
115 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
118 /* Easy, common case: unconditional full register update.
120 * We treat conditioning on the exec mask as the same as not being
121 * conditional. This makes sure that if the register gets set on
122 * either side of an if, it is treated as being screened off before
123 * the if. Otherwise, if there was no intervening def, its live
124 * interval doesn't extend back to the start of he program, and if too
125 * many registers did that we'd fail to register allocate.
127 if (((inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
128 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) ||
129 inst
->cond_is_exec_mask
) &&
130 inst
->qpu
.alu
.add
.output_pack
== V3D_QPU_PACK_NONE
&&
131 inst
->qpu
.alu
.mul
.output_pack
== V3D_QPU_PACK_NONE
) {
132 BITSET_SET(block
->def
, var
);
136 /* Finally, look at the condition code and packing and mark it as a
137 * def. We need to make sure that we understand sequences
150 * as defining the temp within the block, because otherwise dst's live
151 * range will get extended up the control flow to the top of the
154 struct partial_update_state
*state
=
155 get_partial_update_state(partial_update_ht
, inst
);
156 uint8_t mask
= 0xf; /* XXX vir_channels_written(inst); */
158 if (inst
->qpu
.flags
.ac
== V3D_QPU_COND_NONE
&&
159 inst
->qpu
.flags
.mc
== V3D_QPU_COND_NONE
) {
160 state
->channels
|= mask
;
162 for (int i
= 0; i
< 4; i
++) {
163 if (!(mask
& (1 << i
)))
166 /* XXXif (state->insts[i] &&
167 state->insts[i]->cond ==
168 qpu_cond_complement(inst->cond))
169 state->channels |= 1 << i;
172 state
->insts
[i
] = inst
;
176 if (state
->channels
== 0xf)
177 BITSET_SET(block
->def
, var
);
181 sf_state_clear(struct hash_table
*partial_update_ht
)
183 hash_table_foreach(partial_update_ht
, entry
) {
184 struct partial_update_state
*state
= entry
->data
;
186 for (int i
= 0; i
< 4; i
++) {
187 if (state
->insts
[i
] &&
188 (state
->insts
[i
]->qpu
.flags
.ac
!= V3D_QPU_COND_NONE
||
189 state
->insts
[i
]->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
))
190 state
->insts
[i
] = NULL
;
195 /* Sets up the def/use arrays for when variables are used-before-defined or
196 * defined-before-used in the block.
198 * Also initializes the temp_start/temp_end to cover just the instruction IPs
199 * where the variable is used, which will be extended later in
200 * vir_compute_start_end().
203 vir_setup_def_use(struct v3d_compile
*c
)
205 struct hash_table
*partial_update_ht
=
206 _mesa_hash_table_create(c
, int_hash
, int_compare
);
209 vir_for_each_block(block
, c
) {
210 block
->start_ip
= ip
;
212 _mesa_hash_table_clear(partial_update_ht
, NULL
);
214 vir_for_each_inst(inst
, block
) {
215 for (int i
= 0; i
< vir_get_nsrc(inst
); i
++)
216 vir_setup_use(c
, block
, ip
, inst
->src
[i
]);
218 vir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
220 if (false /* XXX inst->uf */)
221 sf_state_clear(partial_update_ht
);
223 /* Payload registers: r0/1/2 contain W, centroid W,
224 * and Z at program start. Register allocation will
225 * force their nodes to R0/1/2.
227 if (inst
->src
[0].file
== QFILE_REG
) {
228 switch (inst
->src
[0].index
) {
232 c
->temp_start
[inst
->dst
.index
] = 0;
242 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
246 vir_live_variables_dataflow(struct v3d_compile
*c
, int bitset_words
)
250 vir_for_each_block_rev(block
, c
) {
251 /* Update live_out: Any successor using the variable
252 * on entrance needs us to have the variable live on
255 vir_for_each_successor(succ
, block
) {
256 for (int i
= 0; i
< bitset_words
; i
++) {
257 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
258 ~block
->live_out
[i
]);
260 block
->live_out
[i
] |= new_live_out
;
267 for (int i
= 0; i
< bitset_words
; i
++) {
268 BITSET_WORD new_live_in
= (block
->use
[i
] |
269 (block
->live_out
[i
] &
271 if (new_live_in
& ~block
->live_in
[i
]) {
272 block
->live_in
[i
] |= new_live_in
;
282 * Extend the start/end ranges for each variable to account for the
283 * new information calculated from control flow.
286 vir_compute_start_end(struct v3d_compile
*c
, int num_vars
)
288 vir_for_each_block(block
, c
) {
289 for (int i
= 0; i
< num_vars
; i
++) {
290 if (BITSET_TEST(block
->live_in
, i
)) {
291 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
293 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
297 if (BITSET_TEST(block
->live_out
, i
)) {
298 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
300 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
308 vir_calculate_live_intervals(struct v3d_compile
*c
)
310 int bitset_words
= BITSET_WORDS(c
->num_temps
);
312 /* We may be called more than once if we've rearranged the program to
313 * try to get register allocation to succeed.
316 ralloc_free(c
->temp_start
);
317 ralloc_free(c
->temp_end
);
319 vir_for_each_block(block
, c
) {
320 ralloc_free(block
->def
);
321 ralloc_free(block
->use
);
322 ralloc_free(block
->live_in
);
323 ralloc_free(block
->live_out
);
327 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
328 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
330 for (int i
= 0; i
< c
->num_temps
; i
++) {
331 c
->temp_start
[i
] = MAX_INSTRUCTION
;
335 vir_for_each_block(block
, c
) {
336 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
337 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
338 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
339 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
342 vir_setup_def_use(c
);
344 while (vir_live_variables_dataflow(c
, bitset_words
))
347 vir_compute_start_end(c
, c
->num_temps
);
349 c
->live_intervals_valid
= true;