00fa3538cc040b672b8f47175a707ae758b4ee61
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_cp.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "freedreno_util.h"
30
31 #include "ir3.h"
32
33 /*
34 * Copy Propagate:
35 */
36
37 /* is it a type preserving mov, with ok flags? */
38 static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
39 {
40 if (is_same_type_mov(instr)) {
41 struct ir3_register *dst = instr->regs[0];
42 struct ir3_register *src = instr->regs[1];
43 struct ir3_instruction *src_instr = ssa(src);
44
45 /* only if mov src is SSA (not const/immed): */
46 if (!src_instr)
47 return false;
48
49 /* no indirect: */
50 if (dst->flags & IR3_REG_RELATIV)
51 return false;
52 if (src->flags & IR3_REG_RELATIV)
53 return false;
54
55 if (!allow_flags)
56 if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
57 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
58 return false;
59
60 /* TODO: remove this hack: */
61 if (src_instr->opc == OPC_META_FO)
62 return false;
63 /* TODO: we currently don't handle left/right neighbors
64 * very well when inserting parallel-copies into phi..
65 * to avoid problems don't eliminate a mov coming out
66 * of phi..
67 */
68 if (src_instr->opc == OPC_META_PHI)
69 return false;
70 return true;
71 }
72 return false;
73 }
74
75 static unsigned cp_flags(unsigned flags)
76 {
77 /* only considering these flags (at least for now): */
78 flags &= (IR3_REG_CONST | IR3_REG_IMMED |
79 IR3_REG_FNEG | IR3_REG_FABS |
80 IR3_REG_SNEG | IR3_REG_SABS |
81 IR3_REG_BNOT | IR3_REG_RELATIV);
82 return flags;
83 }
84
85 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
86 unsigned flags)
87 {
88 unsigned valid_flags;
89 flags = cp_flags(flags);
90
91 /* If destination is indirect, then source cannot be.. at least
92 * I don't think so..
93 */
94 if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
95 (flags & IR3_REG_RELATIV))
96 return false;
97
98 /* clear flags that are 'ok' */
99 switch (opc_cat(instr->opc)) {
100 case 1:
101 valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
102 if (flags & ~valid_flags)
103 return false;
104 break;
105 case 5:
106 /* no flags allowed */
107 if (flags)
108 return false;
109 break;
110 case 6:
111 valid_flags = IR3_REG_IMMED;
112 if (flags & ~valid_flags)
113 return false;
114
115 if (flags & IR3_REG_IMMED) {
116 /* doesn't seem like we can have immediate src for store
117 * instructions:
118 *
119 * TODO this restriction could also apply to load instructions,
120 * but for load instructions this arg is the address (and not
121 * really sure any good way to test a hard-coded immed addr src)
122 */
123 if (is_store(instr) && (n == 1))
124 return false;
125 }
126
127 break;
128 case 2:
129 valid_flags = ir3_cat2_absneg(instr->opc) |
130 IR3_REG_CONST | IR3_REG_RELATIV;
131
132 if (ir3_cat2_int(instr->opc))
133 valid_flags |= IR3_REG_IMMED;
134
135 if (flags & ~valid_flags)
136 return false;
137
138 if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
139 unsigned m = (n ^ 1) + 1;
140 /* cannot deal w/ const in both srcs:
141 * (note that some cat2 actually only have a single src)
142 */
143 if (m < instr->regs_count) {
144 struct ir3_register *reg = instr->regs[m];
145 if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
146 return false;
147 if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
148 return false;
149 }
150 /* cannot be const + ABS|NEG: */
151 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
152 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
153 return false;
154 }
155 break;
156 case 3:
157 valid_flags = ir3_cat3_absneg(instr->opc) |
158 IR3_REG_CONST | IR3_REG_RELATIV;
159
160 if (flags & ~valid_flags)
161 return false;
162
163 if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
164 /* cannot deal w/ const/relativ in 2nd src: */
165 if (n == 1)
166 return false;
167 }
168
169 if (flags & IR3_REG_CONST) {
170 /* cannot be const + ABS|NEG: */
171 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
172 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
173 return false;
174 }
175 break;
176 case 4:
177 /* seems like blob compiler avoids const as src.. */
178 /* TODO double check if this is still the case on a4xx */
179 if (flags & IR3_REG_CONST)
180 return false;
181 if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
182 return false;
183 break;
184 }
185
186 return true;
187 }
188
189 /* propagate register flags from src to dst.. negates need special
190 * handling to cancel each other out.
191 */
192 static void combine_flags(unsigned *dstflags, unsigned srcflags)
193 {
194 /* if what we are combining into already has (abs) flags,
195 * we can drop (neg) from src:
196 */
197 if (*dstflags & IR3_REG_FABS)
198 srcflags &= ~IR3_REG_FNEG;
199 if (*dstflags & IR3_REG_SABS)
200 srcflags &= ~IR3_REG_SNEG;
201
202 if (srcflags & IR3_REG_FABS)
203 *dstflags |= IR3_REG_FABS;
204 if (srcflags & IR3_REG_SABS)
205 *dstflags |= IR3_REG_SABS;
206 if (srcflags & IR3_REG_FNEG)
207 *dstflags ^= IR3_REG_FNEG;
208 if (srcflags & IR3_REG_SNEG)
209 *dstflags ^= IR3_REG_SNEG;
210 if (srcflags & IR3_REG_BNOT)
211 *dstflags ^= IR3_REG_BNOT;
212
213 *dstflags &= ~IR3_REG_SSA;
214 *dstflags |= srcflags & IR3_REG_SSA;
215 *dstflags |= srcflags & IR3_REG_CONST;
216 *dstflags |= srcflags & IR3_REG_IMMED;
217 *dstflags |= srcflags & IR3_REG_RELATIV;
218 *dstflags |= srcflags & IR3_REG_ARRAY;
219 }
220
221 /**
222 * Handle cp for a given src register. This additionally handles
223 * the cases of collapsing immedate/const (which replace the src
224 * register with a non-ssa src) or collapsing mov's from relative
225 * src (which needs to also fixup the address src reference by the
226 * instruction).
227 */
228 static void
229 reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
230 {
231 struct ir3_instruction *src = ssa(reg);
232
233 if (is_eligible_mov(src, true)) {
234 /* simple case, no immed/const/relativ, only mov's w/ ssa src: */
235 struct ir3_register *src_reg = src->regs[1];
236 unsigned new_flags = reg->flags;
237
238 combine_flags(&new_flags, src_reg->flags);
239
240 if (valid_flags(instr, n, new_flags)) {
241 if (new_flags & IR3_REG_ARRAY) {
242 debug_assert(!(reg->flags & IR3_REG_ARRAY));
243 reg->array = src_reg->array;
244 }
245 reg->flags = new_flags;
246 reg->instr = ssa(src_reg);
247 }
248
249 src = ssa(reg); /* could be null for IR3_REG_ARRAY case */
250 if (!src)
251 return;
252 } else if (is_same_type_mov(src) &&
253 /* cannot collapse const/immed/etc into meta instrs: */
254 !is_meta(instr)) {
255 /* immed/const/etc cases, which require some special handling: */
256 struct ir3_register *src_reg = src->regs[1];
257 unsigned new_flags = reg->flags;
258
259 combine_flags(&new_flags, src_reg->flags);
260
261 if (!valid_flags(instr, n, new_flags)) {
262 /* special case for "normal" mad instructions, we can
263 * try swapping the first two args if that fits better.
264 *
265 * the "plain" MAD's (ie. the ones that don't shift first
266 * src prior to multiply) can swap their first two srcs if
267 * src[0] is !CONST and src[1] is CONST:
268 */
269 if ((n == 1) && is_mad(instr->opc) &&
270 !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
271 valid_flags(instr, 0, new_flags)) {
272 /* swap src[0] and src[1]: */
273 struct ir3_register *tmp;
274 tmp = instr->regs[0 + 1];
275 instr->regs[0 + 1] = instr->regs[1 + 1];
276 instr->regs[1 + 1] = tmp;
277 n = 0;
278 } else {
279 return;
280 }
281 }
282
283 /* Here we handle the special case of mov from
284 * CONST and/or RELATIV. These need to be handled
285 * specially, because in the case of move from CONST
286 * there is no src ir3_instruction so we need to
287 * replace the ir3_register. And in the case of
288 * RELATIV we need to handle the address register
289 * dependency.
290 */
291 if (src_reg->flags & IR3_REG_CONST) {
292 /* an instruction cannot reference two different
293 * address registers:
294 */
295 if ((src_reg->flags & IR3_REG_RELATIV) &&
296 conflicts(instr->address, reg->instr->address))
297 return;
298
299 /* This seems to be a hw bug, or something where the timings
300 * just somehow don't work out. This restriction may only
301 * apply if the first src is also CONST.
302 */
303 if ((opc_cat(instr->opc) == 3) && (n == 2) &&
304 (src_reg->flags & IR3_REG_RELATIV) &&
305 (src_reg->array.offset == 0))
306 return;
307
308 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
309 src_reg->flags = new_flags;
310 instr->regs[n+1] = src_reg;
311
312 if (src_reg->flags & IR3_REG_RELATIV)
313 ir3_instr_set_address(instr, reg->instr->address);
314
315 return;
316 }
317
318 if ((src_reg->flags & IR3_REG_RELATIV) &&
319 !conflicts(instr->address, reg->instr->address)) {
320 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
321 src_reg->flags = new_flags;
322 instr->regs[n+1] = src_reg;
323 ir3_instr_set_address(instr, reg->instr->address);
324
325 return;
326 }
327
328 /* NOTE: seems we can only do immed integers, so don't
329 * need to care about float. But we do need to handle
330 * abs/neg *before* checking that the immediate requires
331 * few enough bits to encode:
332 *
333 * TODO: do we need to do something to avoid accidentally
334 * catching a float immed?
335 */
336 if (src_reg->flags & IR3_REG_IMMED) {
337 int32_t iim_val = src_reg->iim_val;
338
339 debug_assert((opc_cat(instr->opc) == 1) ||
340 (opc_cat(instr->opc) == 6) ||
341 ir3_cat2_int(instr->opc));
342
343 if (new_flags & IR3_REG_SABS)
344 iim_val = abs(iim_val);
345
346 if (new_flags & IR3_REG_SNEG)
347 iim_val = -iim_val;
348
349 if (new_flags & IR3_REG_BNOT)
350 iim_val = ~iim_val;
351
352 /* other than category 1 (mov) we can only encode up to 10 bits: */
353 if ((instr->opc == OPC_MOV) || !(iim_val & ~0x3ff)) {
354 new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
355 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
356 src_reg->flags = new_flags;
357 src_reg->iim_val = iim_val;
358 instr->regs[n+1] = src_reg;
359 }
360
361 return;
362 }
363 }
364 }
365
366 /* Handle special case of eliminating output mov, and similar cases where
367 * there isn't a normal "consuming" instruction. In this case we cannot
368 * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
369 * be eliminated)
370 */
371 static struct ir3_instruction *
372 eliminate_output_mov(struct ir3_instruction *instr)
373 {
374 if (is_eligible_mov(instr, false)) {
375 struct ir3_register *reg = instr->regs[1];
376 if (!(reg->flags & IR3_REG_ARRAY)) {
377 struct ir3_instruction *src_instr = ssa(reg);
378 debug_assert(src_instr);
379 return src_instr;
380 }
381 }
382 return instr;
383 }
384
385 /**
386 * Find instruction src's which are mov's that can be collapsed, replacing
387 * the mov dst with the mov src
388 */
389 static void
390 instr_cp(struct ir3_instruction *instr)
391 {
392 struct ir3_register *reg;
393
394 if (instr->regs_count == 0)
395 return;
396
397 if (ir3_instr_check_mark(instr))
398 return;
399
400 /* walk down the graph from each src: */
401 foreach_src_n(reg, n, instr) {
402 struct ir3_instruction *src = ssa(reg);
403
404 if (!src)
405 continue;
406
407 instr_cp(src);
408
409 /* TODO non-indirect access we could figure out which register
410 * we actually want and allow cp..
411 */
412 if (reg->flags & IR3_REG_ARRAY)
413 continue;
414
415 reg_cp(instr, reg, n);
416 }
417
418 if (instr->regs[0]->flags & IR3_REG_ARRAY) {
419 struct ir3_instruction *src = ssa(instr->regs[0]);
420 if (src)
421 instr_cp(src);
422 }
423
424 if (instr->address) {
425 instr_cp(instr->address);
426 ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
427 }
428 }
429
430 void
431 ir3_cp(struct ir3 *ir)
432 {
433 ir3_clear_mark(ir);
434
435 for (unsigned i = 0; i < ir->noutputs; i++) {
436 if (ir->outputs[i]) {
437 instr_cp(ir->outputs[i]);
438 ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
439 }
440 }
441
442 for (unsigned i = 0; i < ir->keeps_count; i++) {
443 instr_cp(ir->keeps[i]);
444 ir->keeps[i] = eliminate_output_mov(ir->keeps[i]);
445 }
446
447 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
448 if (block->condition) {
449 instr_cp(block->condition);
450 block->condition = eliminate_output_mov(block->condition);
451 }
452 }
453 }