2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "vc4_context.h"
32 struct partial_update_state
{
33 struct qinst
*insts
[4];
38 int_hash(const void *key
)
40 return _mesa_hash_data(key
, sizeof(int));
44 int_compare(const void *key1
, const void *key2
)
46 return *(const int *)key1
== *(const int *)key2
;
50 qir_reg_to_var(struct qreg reg
)
52 if (reg
.file
== QFILE_TEMP
)
59 qir_setup_use(struct vc4_compile
*c
, struct qblock
*block
, int ip
,
62 int var
= qir_reg_to_var(src
);
66 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
67 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
69 /* The use[] bitset marks when the block makes
70 * use of a variable without having completely
71 * defined that variable within the block.
73 if (!BITSET_TEST(block
->def
, var
))
74 BITSET_SET(block
->use
, var
);
77 static struct partial_update_state
*
78 get_partial_update_state(struct hash_table
*partial_update_ht
,
81 struct hash_entry
*entry
=
82 _mesa_hash_table_search(partial_update_ht
,
87 struct partial_update_state
*state
=
88 rzalloc(partial_update_ht
, struct partial_update_state
);
90 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
96 qir_setup_def(struct vc4_compile
*c
, struct qblock
*block
, int ip
,
97 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
99 /* The def[] bitset marks when an initialization in a
100 * block completely screens off previous updates of
103 int var
= qir_reg_to_var(inst
->dst
);
107 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
108 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
110 /* If we've already tracked this as a def, or already used it within
111 * the block, there's nothing to do.
113 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
116 /* Easy, common case: unconditional full register update.
118 * We treat conditioning on the exec mask as the same as not being
119 * conditional. This makes sure that if the register gets set on
120 * either side of an if, it is treated as being screened off before
121 * the if. Otherwise, if there was no intervening def, its live
122 * interval doesn't extend back to the start of he program, and if too
123 * many registers did that we'd fail to register allocate.
125 if ((inst
->cond
== QPU_COND_ALWAYS
||
126 inst
->cond_is_exec_mask
) && !inst
->dst
.pack
) {
127 BITSET_SET(block
->def
, var
);
131 /* Finally, look at the condition code and packing and mark it as a
132 * def. We need to make sure that we understand sequences
145 * as defining the temp within the block, because otherwise dst's live
146 * range will get extended up the control flow to the top of the
149 struct partial_update_state
*state
=
150 get_partial_update_state(partial_update_ht
, inst
);
151 uint8_t mask
= qir_channels_written(inst
);
153 if (inst
->cond
== QPU_COND_ALWAYS
) {
154 state
->channels
|= mask
;
156 for (int i
= 0; i
< 4; i
++) {
157 if (!(mask
& (1 << i
)))
160 if (state
->insts
[i
] &&
161 state
->insts
[i
]->cond
==
162 qpu_cond_complement(inst
->cond
))
163 state
->channels
|= 1 << i
;
165 state
->insts
[i
] = inst
;
169 if (state
->channels
== 0xf)
170 BITSET_SET(block
->def
, var
);
174 sf_state_clear(struct hash_table
*partial_update_ht
)
176 struct hash_entry
*entry
;
178 hash_table_foreach(partial_update_ht
, entry
) {
179 struct partial_update_state
*state
= entry
->data
;
181 for (int i
= 0; i
< 4; i
++) {
182 if (state
->insts
[i
] && state
->insts
[i
]->cond
)
183 state
->insts
[i
] = NULL
;
188 /* Sets up the def/use arrays for when variables are used-before-defined or
189 * defined-before-used in the block.
191 * Also initializes the temp_start/temp_end to cover just the instruction IPs
192 * where the variable is used, which will be extended later in
193 * qir_compute_start_end().
196 qir_setup_def_use(struct vc4_compile
*c
)
198 struct hash_table
*partial_update_ht
=
199 _mesa_hash_table_create(c
, int_hash
, int_compare
);
202 qir_for_each_block(block
, c
) {
203 block
->start_ip
= ip
;
205 _mesa_hash_table_clear(partial_update_ht
, NULL
);
207 qir_for_each_inst(inst
, block
) {
208 for (int i
= 0; i
< qir_get_op_nsrc(inst
->op
); i
++)
209 qir_setup_use(c
, block
, ip
, inst
->src
[i
]);
211 qir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
214 sf_state_clear(partial_update_ht
);
219 /* The payload registers have values
220 * implicitly loaded at the start of the
223 if (inst
->dst
.file
== QFILE_TEMP
)
224 c
->temp_start
[inst
->dst
.index
] = 0;
234 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
238 qir_live_variables_dataflow(struct vc4_compile
*c
, int bitset_words
)
242 qir_for_each_block_rev(block
, c
) {
243 /* Update live_out: Any successor using the variable
244 * on entrance needs us to have the variable live on
247 qir_for_each_successor(succ
, block
) {
248 for (int i
= 0; i
< bitset_words
; i
++) {
249 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
250 ~block
->live_out
[i
]);
252 block
->live_out
[i
] |= new_live_out
;
259 for (int i
= 0; i
< bitset_words
; i
++) {
260 BITSET_WORD new_live_in
= (block
->use
[i
] |
261 (block
->live_out
[i
] &
263 if (new_live_in
& ~block
->live_in
[i
]) {
264 block
->live_in
[i
] |= new_live_in
;
274 * Extend the start/end ranges for each variable to account for the
275 * new information calculated from control flow.
278 qir_compute_start_end(struct vc4_compile
*c
, int num_vars
)
280 qir_for_each_block(block
, c
) {
281 for (int i
= 0; i
< num_vars
; i
++) {
282 if (BITSET_TEST(block
->live_in
, i
)) {
283 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
285 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
289 if (BITSET_TEST(block
->live_out
, i
)) {
290 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
292 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
300 qir_calculate_live_intervals(struct vc4_compile
*c
)
302 int bitset_words
= BITSET_WORDS(c
->num_temps
);
304 c
->temp_start
= reralloc(c
, c
->temp_start
, int, c
->num_temps
);
305 c
->temp_end
= reralloc(c
, c
->temp_end
, int, c
->num_temps
);
307 for (int i
= 0; i
< c
->num_temps
; i
++) {
308 c
->temp_start
[i
] = MAX_INSTRUCTION
;
312 qir_for_each_block(block
, c
) {
313 block
->def
= reralloc(c
, block
->def
, BITSET_WORD
, bitset_words
);
314 block
->use
= reralloc(c
, block
->use
, BITSET_WORD
, bitset_words
);
315 block
->live_in
= reralloc(c
, block
->live_in
, BITSET_WORD
, bitset_words
);
316 block
->live_out
= reralloc(c
, block
->live_out
, BITSET_WORD
, bitset_words
);
319 qir_setup_def_use(c
);
321 while (qir_live_variables_dataflow(c
, bitset_words
))
324 qir_compute_start_end(c
, c
->num_temps
);