v3d: Restrict live intervals to the blocks reachable from any def.
[mesa.git] / src / broadcom / compiler / vir_live_variables.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #define MAX_INSTRUCTION (1 << 30)
26
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
30
31 struct partial_update_state {
32 struct qinst *insts[4];
33 uint8_t channels;
34 };
35
36 static uint32_t
37 int_hash(const void *key)
38 {
39 return _mesa_hash_data(key, sizeof(int));
40 }
41
42 static bool
43 int_compare(const void *key1, const void *key2)
44 {
45 return *(const int *)key1 == *(const int *)key2;
46 }
47
48 static int
49 vir_reg_to_var(struct qreg reg)
50 {
51 if (reg.file == QFILE_TEMP)
52 return reg.index;
53
54 return -1;
55 }
56
57 static void
58 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
59 struct qreg src)
60 {
61 int var = vir_reg_to_var(src);
62 if (var == -1)
63 return;
64
65 c->temp_start[var] = MIN2(c->temp_start[var], ip);
66 c->temp_end[var] = MAX2(c->temp_end[var], ip);
67
68 /* The use[] bitset marks when the block makes
69 * use of a variable without having completely
70 * defined that variable within the block.
71 */
72 if (!BITSET_TEST(block->def, var))
73 BITSET_SET(block->use, var);
74 }
75
76 static struct partial_update_state *
77 get_partial_update_state(struct hash_table *partial_update_ht,
78 struct qinst *inst)
79 {
80 struct hash_entry *entry =
81 _mesa_hash_table_search(partial_update_ht,
82 &inst->dst.index);
83 if (entry)
84 return entry->data;
85
86 struct partial_update_state *state =
87 rzalloc(partial_update_ht, struct partial_update_state);
88
89 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
90
91 return state;
92 }
93
94 static void
95 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
96 struct hash_table *partial_update_ht, struct qinst *inst)
97 {
98 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
99 return;
100
101 /* The def[] bitset marks when an initialization in a
102 * block completely screens off previous updates of
103 * that variable.
104 */
105 int var = vir_reg_to_var(inst->dst);
106 if (var == -1)
107 return;
108
109 c->temp_start[var] = MIN2(c->temp_start[var], ip);
110 c->temp_end[var] = MAX2(c->temp_end[var], ip);
111
112 /* Mark the block as having a (partial) def of the var. */
113 BITSET_SET(block->defout, var);
114
115 /* If we've already tracked this as a def that screens off previous
116 * uses, or already used it within the block, there's nothing to do.
117 */
118 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
119 return;
120
121 /* Easy, common case: unconditional full register update.
122 *
123 * We treat conditioning on the exec mask as the same as not being
124 * conditional. This makes sure that if the register gets set on
125 * either side of an if, it is treated as being screened off before
126 * the if. Otherwise, if there was no intervening def, its live
127 * interval doesn't extend back to the start of he program, and if too
128 * many registers did that we'd fail to register allocate.
129 */
130 if (((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
131 inst->qpu.flags.mc == V3D_QPU_COND_NONE) ||
132 inst->cond_is_exec_mask) &&
133 inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
134 inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
135 BITSET_SET(block->def, var);
136 return;
137 }
138
139 /* Finally, look at the condition code and packing and mark it as a
140 * def. We need to make sure that we understand sequences
141 * instructions like:
142 *
143 * mov.zs t0, t1
144 * mov.zc t0, t2
145 *
146 * or:
147 *
148 * mmov t0.8a, t1
149 * mmov t0.8b, t2
150 * mmov t0.8c, t3
151 * mmov t0.8d, t4
152 *
153 * as defining the temp within the block, because otherwise dst's live
154 * range will get extended up the control flow to the top of the
155 * program.
156 */
157 struct partial_update_state *state =
158 get_partial_update_state(partial_update_ht, inst);
159 uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
160
161 if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
162 inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
163 state->channels |= mask;
164 } else {
165 for (int i = 0; i < 4; i++) {
166 if (!(mask & (1 << i)))
167 continue;
168
169 /* XXXif (state->insts[i] &&
170 state->insts[i]->cond ==
171 qpu_cond_complement(inst->cond))
172 state->channels |= 1 << i;
173 else
174 */
175 state->insts[i] = inst;
176 }
177 }
178
179 if (state->channels == 0xf)
180 BITSET_SET(block->def, var);
181 }
182
183 static void
184 sf_state_clear(struct hash_table *partial_update_ht)
185 {
186 hash_table_foreach(partial_update_ht, entry) {
187 struct partial_update_state *state = entry->data;
188
189 for (int i = 0; i < 4; i++) {
190 if (state->insts[i] &&
191 (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
192 state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
193 state->insts[i] = NULL;
194 }
195 }
196 }
197
198 /* Sets up the def/use arrays for when variables are used-before-defined or
199 * defined-before-used in the block.
200 *
201 * Also initializes the temp_start/temp_end to cover just the instruction IPs
202 * where the variable is used, which will be extended later in
203 * vir_compute_start_end().
204 */
205 static void
206 vir_setup_def_use(struct v3d_compile *c)
207 {
208 struct hash_table *partial_update_ht =
209 _mesa_hash_table_create(c, int_hash, int_compare);
210 int ip = 0;
211
212 vir_for_each_block(block, c) {
213 block->start_ip = ip;
214
215 _mesa_hash_table_clear(partial_update_ht, NULL);
216
217 vir_for_each_inst(inst, block) {
218 for (int i = 0; i < vir_get_nsrc(inst); i++)
219 vir_setup_use(c, block, ip, inst->src[i]);
220
221 vir_setup_def(c, block, ip, partial_update_ht, inst);
222
223 if (false /* XXX inst->uf */)
224 sf_state_clear(partial_update_ht);
225
226 /* Payload registers: r0/1/2 contain W, centroid W,
227 * and Z at program start. Register allocation will
228 * force their nodes to R0/1/2.
229 */
230 if (inst->src[0].file == QFILE_REG) {
231 switch (inst->src[0].index) {
232 case 0:
233 case 1:
234 case 2:
235 c->temp_start[inst->dst.index] = 0;
236 break;
237 }
238 }
239
240 ip++;
241 }
242 block->end_ip = ip;
243 }
244
245 _mesa_hash_table_destroy(partial_update_ht, NULL);
246 }
247
248 static bool
249 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
250 {
251 bool cont = false;
252
253 vir_for_each_block_rev(block, c) {
254 /* Update live_out: Any successor using the variable
255 * on entrance needs us to have the variable live on
256 * exit.
257 */
258 vir_for_each_successor(succ, block) {
259 for (int i = 0; i < bitset_words; i++) {
260 BITSET_WORD new_live_out = (succ->live_in[i] &
261 ~block->live_out[i]);
262 if (new_live_out) {
263 block->live_out[i] |= new_live_out;
264 cont = true;
265 }
266 }
267 }
268
269 /* Update live_in */
270 for (int i = 0; i < bitset_words; i++) {
271 BITSET_WORD new_live_in = (block->use[i] |
272 (block->live_out[i] &
273 ~block->def[i]));
274 if (new_live_in & ~block->live_in[i]) {
275 block->live_in[i] |= new_live_in;
276 cont = true;
277 }
278 }
279 }
280
281 return cont;
282 }
283
284 static bool
285 vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
286 {
287 bool cont = false;
288
289 vir_for_each_block_rev(block, c) {
290 /* Propagate defin/defout down the successors to produce the
291 * union of blocks with a reachable (partial) definition of
292 * the var.
293 *
294 * This keeps a conditional first write to a reg from
295 * extending its lifetime back to the start of the program.
296 */
297 vir_for_each_successor(succ, block) {
298 for (int i = 0; i < bitset_words; i++) {
299 BITSET_WORD new_def = (block->defout[i] &
300 ~succ->defin[i]);
301 succ->defin[i] |= new_def;
302 succ->defout[i] |= new_def;
303 cont |= new_def;
304 }
305 }
306 }
307
308 return cont;
309 }
310
311 /**
312 * Extend the start/end ranges for each variable to account for the
313 * new information calculated from control flow.
314 */
315 static void
316 vir_compute_start_end(struct v3d_compile *c, int num_vars)
317 {
318 vir_for_each_block(block, c) {
319 for (int i = 0; i < num_vars; i++) {
320 if (BITSET_TEST(block->live_in, i) &&
321 BITSET_TEST(block->defin, i)) {
322 c->temp_start[i] = MIN2(c->temp_start[i],
323 block->start_ip);
324 c->temp_end[i] = MAX2(c->temp_end[i],
325 block->start_ip);
326 }
327
328 if (BITSET_TEST(block->live_out, i) &&
329 BITSET_TEST(block->defout, i)) {
330 c->temp_start[i] = MIN2(c->temp_start[i],
331 block->end_ip);
332 c->temp_end[i] = MAX2(c->temp_end[i],
333 block->end_ip);
334 }
335 }
336 }
337 }
338
339 void
340 vir_calculate_live_intervals(struct v3d_compile *c)
341 {
342 int bitset_words = BITSET_WORDS(c->num_temps);
343
344 /* We may be called more than once if we've rearranged the program to
345 * try to get register allocation to succeed.
346 */
347 if (c->temp_start) {
348 ralloc_free(c->temp_start);
349 ralloc_free(c->temp_end);
350
351 vir_for_each_block(block, c) {
352 ralloc_free(block->def);
353 ralloc_free(block->use);
354 ralloc_free(block->live_in);
355 ralloc_free(block->live_out);
356 }
357 }
358
359 c->temp_start = rzalloc_array(c, int, c->num_temps);
360 c->temp_end = rzalloc_array(c, int, c->num_temps);
361
362 for (int i = 0; i < c->num_temps; i++) {
363 c->temp_start[i] = MAX_INSTRUCTION;
364 c->temp_end[i] = -1;
365 }
366
367 vir_for_each_block(block, c) {
368 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
369 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
370 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
371 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
372 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
373 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
374 }
375
376 vir_setup_def_use(c);
377
378 while (vir_live_variables_dataflow(c, bitset_words))
379 ;
380
381 while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
382 ;
383
384 vir_compute_start_end(c, c->num_temps);
385
386 c->live_intervals_valid = true;
387 }