vc4: Use the intrinsic's first_component for vattr VPM index.
[mesa.git] / src / gallium / drivers / vc4 / vc4_qir_live_variables.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #define MAX_INSTRUCTION (1 << 30)
26
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "vc4_context.h"
30 #include "vc4_qir.h"
31
32 struct partial_update_state {
33 struct qinst *insts[4];
34 uint8_t channels;
35 };
36
37 static uint32_t
38 int_hash(const void *key)
39 {
40 return _mesa_hash_data(key, sizeof(int));
41 }
42
43 static bool
44 int_compare(const void *key1, const void *key2)
45 {
46 return *(const int *)key1 == *(const int *)key2;
47 }
48
49 static int
50 qir_reg_to_var(struct qreg reg)
51 {
52 if (reg.file == QFILE_TEMP)
53 return reg.index;
54
55 return -1;
56 }
57
58 static void
59 qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
60 struct qreg src)
61 {
62 int var = qir_reg_to_var(src);
63 if (var == -1)
64 return;
65
66 c->temp_start[var] = MIN2(c->temp_start[var], ip);
67 c->temp_end[var] = MAX2(c->temp_end[var], ip);
68
69 /* The use[] bitset marks when the block makes
70 * use of a variable without having completely
71 * defined that variable within the block.
72 */
73 if (!BITSET_TEST(block->def, var))
74 BITSET_SET(block->use, var);
75 }
76
77 static struct partial_update_state *
78 get_partial_update_state(struct hash_table *partial_update_ht,
79 struct qinst *inst)
80 {
81 struct hash_entry *entry =
82 _mesa_hash_table_search(partial_update_ht,
83 &inst->dst.index);
84 if (entry)
85 return entry->data;
86
87 struct partial_update_state *state =
88 rzalloc(partial_update_ht, struct partial_update_state);
89
90 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
91
92 return state;
93 }
94
95 static void
96 qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
97 struct hash_table *partial_update_ht, struct qinst *inst)
98 {
99 /* The def[] bitset marks when an initialization in a
100 * block completely screens off previous updates of
101 * that variable.
102 */
103 int var = qir_reg_to_var(inst->dst);
104 if (var == -1)
105 return;
106
107 c->temp_start[var] = MIN2(c->temp_start[var], ip);
108 c->temp_end[var] = MAX2(c->temp_end[var], ip);
109
110 /* If we've already tracked this as a def, or already used it within
111 * the block, there's nothing to do.
112 */
113 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
114 return;
115
116 /* Easy, common case: unconditional full register update. */
117 if (inst->cond == QPU_COND_ALWAYS && !inst->dst.pack) {
118 BITSET_SET(block->def, var);
119 return;
120 }
121
122 /* Finally, look at the condition code and packing and mark it as a
123 * def. We need to make sure that we understand sequences
124 * instructions like:
125 *
126 * mov.zs t0, t1
127 * mov.zc t0, t2
128 *
129 * or:
130 *
131 * mmov t0.8a, t1
132 * mmov t0.8b, t2
133 * mmov t0.8c, t3
134 * mmov t0.8d, t4
135 *
136 * as defining the temp within the block, because otherwise dst's live
137 * range will get extended up the control flow to the top of the
138 * program.
139 */
140 struct partial_update_state *state =
141 get_partial_update_state(partial_update_ht, inst);
142 uint8_t mask = qir_channels_written(inst);
143
144 if (inst->cond == QPU_COND_ALWAYS) {
145 state->channels |= mask;
146 } else {
147 for (int i = 0; i < 4; i++) {
148 if (!(mask & (1 << i)))
149 continue;
150
151 if (state->insts[i] &&
152 state->insts[i]->cond ==
153 qpu_cond_complement(inst->cond))
154 state->channels |= 1 << i;
155 else
156 state->insts[i] = inst;
157 }
158 }
159
160 if (state->channels == 0xf)
161 BITSET_SET(block->def, var);
162 }
163
164 static void
165 sf_state_clear(struct hash_table *partial_update_ht)
166 {
167 struct hash_entry *entry;
168
169 hash_table_foreach(partial_update_ht, entry) {
170 struct partial_update_state *state = entry->data;
171
172 for (int i = 0; i < 4; i++) {
173 if (state->insts[i] && state->insts[i]->cond)
174 state->insts[i] = NULL;
175 }
176 }
177 }
178
179 /* Sets up the def/use arrays for when variables are used-before-defined or
180 * defined-before-used in the block.
181 *
182 * Also initializes the temp_start/temp_end to cover just the instruction IPs
183 * where the variable is used, which will be extended later in
184 * qir_compute_start_end().
185 */
186 static void
187 qir_setup_def_use(struct vc4_compile *c)
188 {
189 struct hash_table *partial_update_ht =
190 _mesa_hash_table_create(c, int_hash, int_compare);
191 int ip = 0;
192
193 qir_for_each_block(block, c) {
194 block->start_ip = ip;
195
196 _mesa_hash_table_clear(partial_update_ht, NULL);
197
198 qir_for_each_inst(inst, block) {
199 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++)
200 qir_setup_use(c, block, ip, inst->src[i]);
201
202 qir_setup_def(c, block, ip, partial_update_ht, inst);
203
204 if (inst->sf)
205 sf_state_clear(partial_update_ht);
206
207 switch (inst->op) {
208 case QOP_FRAG_Z:
209 case QOP_FRAG_W:
210 /* The payload registers have values
211 * implicitly loaded at the start of the
212 * program.
213 */
214 if (inst->dst.file == QFILE_TEMP)
215 c->temp_start[inst->dst.index] = 0;
216 break;
217 default:
218 break;
219 }
220 ip++;
221 }
222 block->end_ip = ip;
223 }
224
225 _mesa_hash_table_destroy(partial_update_ht, NULL);
226 }
227
228 static bool
229 qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
230 {
231 bool cont = false;
232
233 qir_for_each_block_rev(block, c) {
234 /* Update live_out: Any successor using the variable
235 * on entrance needs us to have the variable live on
236 * exit.
237 */
238 qir_for_each_successor(succ, block) {
239 for (int i = 0; i < bitset_words; i++) {
240 BITSET_WORD new_live_out = (succ->live_in[i] &
241 ~block->live_out[i]);
242 if (new_live_out) {
243 block->live_out[i] |= new_live_out;
244 cont = true;
245 }
246 }
247 }
248
249 /* Update live_in */
250 for (int i = 0; i < bitset_words; i++) {
251 BITSET_WORD new_live_in = (block->use[i] |
252 (block->live_out[i] &
253 ~block->def[i]));
254 if (new_live_in & ~block->live_in[i]) {
255 block->live_in[i] |= new_live_in;
256 cont = true;
257 }
258 }
259 }
260
261 return cont;
262 }
263
264 /**
265 * Extend the start/end ranges for each variable to account for the
266 * new information calculated from control flow.
267 */
268 static void
269 qir_compute_start_end(struct vc4_compile *c, int num_vars)
270 {
271 qir_for_each_block(block, c) {
272 for (int i = 0; i < num_vars; i++) {
273 if (BITSET_TEST(block->live_in, i)) {
274 c->temp_start[i] = MIN2(c->temp_start[i],
275 block->start_ip);
276 c->temp_end[i] = MAX2(c->temp_end[i],
277 block->start_ip);
278 }
279
280 if (BITSET_TEST(block->live_out, i)) {
281 c->temp_start[i] = MIN2(c->temp_start[i],
282 block->end_ip);
283 c->temp_end[i] = MAX2(c->temp_end[i],
284 block->end_ip);
285 }
286 }
287 }
288 }
289
290 void
291 qir_calculate_live_intervals(struct vc4_compile *c)
292 {
293 int bitset_words = BITSET_WORDS(c->num_temps);
294
295 c->temp_start = reralloc(c, c->temp_start, int, c->num_temps);
296 c->temp_end = reralloc(c, c->temp_end, int, c->num_temps);
297
298 for (int i = 0; i < c->num_temps; i++) {
299 c->temp_start[i] = MAX_INSTRUCTION;
300 c->temp_end[i] = -1;
301 }
302
303 qir_for_each_block(block, c) {
304 block->def = reralloc(c, block->def, BITSET_WORD, bitset_words);
305 block->use = reralloc(c, block->use, BITSET_WORD, bitset_words);
306 block->live_in = reralloc(c, block->live_in, BITSET_WORD, bitset_words);
307 block->live_out = reralloc(c, block->live_out, BITSET_WORD, bitset_words);
308 }
309
310 qir_setup_def_use(c);
311
312 while (qir_live_variables_dataflow(c, bitset_words))
313 ;
314
315 qir_compute_start_end(c, c->num_temps);
316 }