2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #define MAX_INSTRUCTION (1 << 30)
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "vc4_context.h"
32 struct partial_update_state
{
33 struct qinst
*insts
[4];
38 qir_reg_to_var(struct qreg reg
)
40 if (reg
.file
== QFILE_TEMP
)
47 qir_setup_use(struct vc4_compile
*c
, struct qblock
*block
, int ip
,
50 int var
= qir_reg_to_var(src
);
54 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
55 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
57 /* The use[] bitset marks when the block makes
58 * use of a variable without having completely
59 * defined that variable within the block.
61 if (!BITSET_TEST(block
->def
, var
))
62 BITSET_SET(block
->use
, var
);
65 static struct partial_update_state
*
66 get_partial_update_state(struct hash_table
*partial_update_ht
,
69 struct hash_entry
*entry
=
70 _mesa_hash_table_search(partial_update_ht
,
75 struct partial_update_state
*state
=
76 rzalloc(partial_update_ht
, struct partial_update_state
);
78 _mesa_hash_table_insert(partial_update_ht
, &inst
->dst
.index
, state
);
84 qir_setup_def(struct vc4_compile
*c
, struct qblock
*block
, int ip
,
85 struct hash_table
*partial_update_ht
, struct qinst
*inst
)
87 /* The def[] bitset marks when an initialization in a
88 * block completely screens off previous updates of
91 int var
= qir_reg_to_var(inst
->dst
);
95 c
->temp_start
[var
] = MIN2(c
->temp_start
[var
], ip
);
96 c
->temp_end
[var
] = MAX2(c
->temp_end
[var
], ip
);
98 /* If we've already tracked this as a def, or already used it within
99 * the block, there's nothing to do.
101 if (BITSET_TEST(block
->use
, var
) || BITSET_TEST(block
->def
, var
))
104 /* Easy, common case: unconditional full register update.
106 * We treat conditioning on the exec mask as the same as not being
107 * conditional. This makes sure that if the register gets set on
108 * either side of an if, it is treated as being screened off before
109 * the if. Otherwise, if there was no intervening def, its live
110 * interval doesn't extend back to the start of he program, and if too
111 * many registers did that we'd fail to register allocate.
113 if ((inst
->cond
== QPU_COND_ALWAYS
||
114 inst
->cond_is_exec_mask
) && !inst
->dst
.pack
) {
115 BITSET_SET(block
->def
, var
);
119 /* Finally, look at the condition code and packing and mark it as a
120 * def. We need to make sure that we understand sequences
133 * as defining the temp within the block, because otherwise dst's live
134 * range will get extended up the control flow to the top of the
137 struct partial_update_state
*state
=
138 get_partial_update_state(partial_update_ht
, inst
);
139 uint8_t mask
= qir_channels_written(inst
);
141 if (inst
->cond
== QPU_COND_ALWAYS
) {
142 state
->channels
|= mask
;
144 for (int i
= 0; i
< 4; i
++) {
145 if (!(mask
& (1 << i
)))
148 if (state
->insts
[i
] &&
149 state
->insts
[i
]->cond
==
150 qpu_cond_complement(inst
->cond
))
151 state
->channels
|= 1 << i
;
153 state
->insts
[i
] = inst
;
157 if (state
->channels
== 0xf)
158 BITSET_SET(block
->def
, var
);
162 sf_state_clear(struct hash_table
*partial_update_ht
)
164 hash_table_foreach(partial_update_ht
, entry
) {
165 struct partial_update_state
*state
= entry
->data
;
167 for (int i
= 0; i
< 4; i
++) {
168 if (state
->insts
[i
] && state
->insts
[i
]->cond
)
169 state
->insts
[i
] = NULL
;
174 /* Sets up the def/use arrays for when variables are used-before-defined or
175 * defined-before-used in the block.
177 * Also initializes the temp_start/temp_end to cover just the instruction IPs
178 * where the variable is used, which will be extended later in
179 * qir_compute_start_end().
182 qir_setup_def_use(struct vc4_compile
*c
)
184 struct hash_table
*partial_update_ht
=
185 _mesa_hash_table_create(c
, _mesa_hash_int
, _mesa_key_int_equal
);
188 qir_for_each_block(block
, c
) {
189 block
->start_ip
= ip
;
191 _mesa_hash_table_clear(partial_update_ht
, NULL
);
193 qir_for_each_inst(inst
, block
) {
194 for (int i
= 0; i
< qir_get_nsrc(inst
); i
++)
195 qir_setup_use(c
, block
, ip
, inst
->src
[i
]);
197 qir_setup_def(c
, block
, ip
, partial_update_ht
, inst
);
200 sf_state_clear(partial_update_ht
);
205 /* The payload registers have values
206 * implicitly loaded at the start of the
209 if (inst
->dst
.file
== QFILE_TEMP
)
210 c
->temp_start
[inst
->dst
.index
] = 0;
220 _mesa_hash_table_destroy(partial_update_ht
, NULL
);
224 qir_live_variables_dataflow(struct vc4_compile
*c
, int bitset_words
)
228 qir_for_each_block_rev(block
, c
) {
229 /* Update live_out: Any successor using the variable
230 * on entrance needs us to have the variable live on
233 qir_for_each_successor(succ
, block
) {
234 for (int i
= 0; i
< bitset_words
; i
++) {
235 BITSET_WORD new_live_out
= (succ
->live_in
[i
] &
236 ~block
->live_out
[i
]);
238 block
->live_out
[i
] |= new_live_out
;
245 for (int i
= 0; i
< bitset_words
; i
++) {
246 BITSET_WORD new_live_in
= (block
->use
[i
] |
247 (block
->live_out
[i
] &
249 if (new_live_in
& ~block
->live_in
[i
]) {
250 block
->live_in
[i
] |= new_live_in
;
260 * Extend the start/end ranges for each variable to account for the
261 * new information calculated from control flow.
264 qir_compute_start_end(struct vc4_compile
*c
, int num_vars
)
266 qir_for_each_block(block
, c
) {
267 for (int i
= 0; i
< num_vars
; i
++) {
268 if (BITSET_TEST(block
->live_in
, i
)) {
269 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
271 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
275 if (BITSET_TEST(block
->live_out
, i
)) {
276 c
->temp_start
[i
] = MIN2(c
->temp_start
[i
],
278 c
->temp_end
[i
] = MAX2(c
->temp_end
[i
],
286 qir_calculate_live_intervals(struct vc4_compile
*c
)
288 int bitset_words
= BITSET_WORDS(c
->num_temps
);
290 /* If we called this function more than once, then we should be
291 * freeing the previous arrays.
293 assert(!c
->temp_start
);
295 c
->temp_start
= rzalloc_array(c
, int, c
->num_temps
);
296 c
->temp_end
= rzalloc_array(c
, int, c
->num_temps
);
298 for (int i
= 0; i
< c
->num_temps
; i
++) {
299 c
->temp_start
[i
] = MAX_INSTRUCTION
;
303 qir_for_each_block(block
, c
) {
304 block
->def
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
305 block
->use
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
306 block
->live_in
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
307 block
->live_out
= rzalloc_array(c
, BITSET_WORD
, bitset_words
);
310 qir_setup_def_use(c
);
312 while (qir_live_variables_dataflow(c
, bitset_words
))
315 qir_compute_start_end(c
, c
->num_temps
);
317 if (vc4_debug
& VC4_DEBUG_SHADERDB
) {
319 for (int i
= 0; i
< c
->num_temps
; i
++)
320 last_ip
= MAX2(last_ip
, c
->temp_end
[i
]);
322 int reg_pressure
= 0;
323 int max_reg_pressure
= 0;
324 for (int i
= 0; i
< last_ip
; i
++) {
325 for (int j
= 0; j
< c
->num_temps
; j
++) {
326 if (c
->temp_start
[j
] == i
)
328 if (c
->temp_end
[j
] == i
)
331 max_reg_pressure
= MAX2(max_reg_pressure
, reg_pressure
);
334 fprintf(stderr
, "SHADER-DB: %s prog %d/%d: %d max temps\n",
335 qir_get_stage_name(c
->stage
),
336 c
->program_id
, c
->variant_id
,