1cc211a766384fdbd8e9f766caf71a5e2d870fbd
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_cp.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "freedreno_util.h"
30
31 #include "ir3.h"
32
33 /*
34 * Copy Propagate:
35 */
36
37 /* is it a type preserving mov, with ok flags? */
38 static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
39 {
40 if (is_same_type_mov(instr)) {
41 struct ir3_register *dst = instr->regs[0];
42 struct ir3_register *src = instr->regs[1];
43 struct ir3_instruction *src_instr = ssa(src);
44
45 /* only if mov src is SSA (not const/immed): */
46 if (!src_instr)
47 return false;
48
49 /* no indirect: */
50 if (dst->flags & IR3_REG_RELATIV)
51 return false;
52 if (src->flags & IR3_REG_RELATIV)
53 return false;
54
55 if (!allow_flags)
56 if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
57 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
58 return false;
59
60 /* TODO: remove this hack: */
61 if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
62 return false;
63 /* TODO: we currently don't handle left/right neighbors
64 * very well when inserting parallel-copies into phi..
65 * to avoid problems don't eliminate a mov coming out
66 * of phi..
67 */
68 if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI))
69 return false;
70 return true;
71 }
72 return false;
73 }
74
75 static unsigned cp_flags(unsigned flags)
76 {
77 /* only considering these flags (at least for now): */
78 flags &= (IR3_REG_CONST | IR3_REG_IMMED |
79 IR3_REG_FNEG | IR3_REG_FABS |
80 IR3_REG_SNEG | IR3_REG_SABS |
81 IR3_REG_BNOT | IR3_REG_RELATIV);
82 return flags;
83 }
84
85 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
86 unsigned flags)
87 {
88 unsigned valid_flags;
89 flags = cp_flags(flags);
90
91 /* If destination is indirect, then source cannot be.. at least
92 * I don't think so..
93 */
94 if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
95 (flags & IR3_REG_RELATIV))
96 return false;
97
98 /* clear flags that are 'ok' */
99 switch (instr->category) {
100 case 1:
101 valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
102 if (flags & ~valid_flags)
103 return false;
104 break;
105 case 5:
106 /* no flags allowed */
107 if (flags)
108 return false;
109 break;
110 case 6:
111 valid_flags = IR3_REG_IMMED;
112 if (flags & ~valid_flags)
113 return false;
114 break;
115 case 2:
116 valid_flags = ir3_cat2_absneg(instr->opc) |
117 IR3_REG_CONST | IR3_REG_RELATIV;
118
119 if (ir3_cat2_int(instr->opc))
120 valid_flags |= IR3_REG_IMMED;
121
122 if (flags & ~valid_flags)
123 return false;
124
125 if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
126 unsigned m = (n ^ 1) + 1;
127 /* cannot deal w/ const in both srcs:
128 * (note that some cat2 actually only have a single src)
129 */
130 if (m < instr->regs_count) {
131 struct ir3_register *reg = instr->regs[m];
132 if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
133 return false;
134 if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
135 return false;
136 }
137 /* cannot be const + ABS|NEG: */
138 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
139 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
140 return false;
141 }
142 break;
143 case 3:
144 valid_flags = ir3_cat3_absneg(instr->opc) |
145 IR3_REG_CONST | IR3_REG_RELATIV;
146
147 if (flags & ~valid_flags)
148 return false;
149
150 if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
151 /* cannot deal w/ const/relativ in 2nd src: */
152 if (n == 1)
153 return false;
154 }
155
156 if (flags & IR3_REG_CONST) {
157 /* cannot be const + ABS|NEG: */
158 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
159 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
160 return false;
161 }
162 break;
163 case 4:
164 /* seems like blob compiler avoids const as src.. */
165 /* TODO double check if this is still the case on a4xx */
166 if (flags & IR3_REG_CONST)
167 return false;
168 if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
169 return false;
170 break;
171 }
172
173 return true;
174 }
175
176 /* propagate register flags from src to dst.. negates need special
177 * handling to cancel each other out.
178 */
179 static void combine_flags(unsigned *dstflags, unsigned srcflags)
180 {
181 /* if what we are combining into already has (abs) flags,
182 * we can drop (neg) from src:
183 */
184 if (*dstflags & IR3_REG_FABS)
185 srcflags &= ~IR3_REG_FNEG;
186 if (*dstflags & IR3_REG_SABS)
187 srcflags &= ~IR3_REG_SNEG;
188
189 if (srcflags & IR3_REG_FABS)
190 *dstflags |= IR3_REG_FABS;
191 if (srcflags & IR3_REG_SABS)
192 *dstflags |= IR3_REG_SABS;
193 if (srcflags & IR3_REG_FNEG)
194 *dstflags ^= IR3_REG_FNEG;
195 if (srcflags & IR3_REG_SNEG)
196 *dstflags ^= IR3_REG_SNEG;
197 if (srcflags & IR3_REG_BNOT)
198 *dstflags ^= IR3_REG_BNOT;
199
200 *dstflags &= ~IR3_REG_SSA;
201 *dstflags |= srcflags & IR3_REG_SSA;
202 *dstflags |= srcflags & IR3_REG_CONST;
203 *dstflags |= srcflags & IR3_REG_IMMED;
204 *dstflags |= srcflags & IR3_REG_RELATIV;
205 *dstflags |= srcflags & IR3_REG_ARRAY;
206 }
207
208 /* the "plain" MAD's (ie. the ones that don't shift first src prior to
209 * multiply) can swap their first two srcs if src[0] is !CONST and
210 * src[1] is CONST:
211 */
212 static bool is_valid_mad(struct ir3_instruction *instr)
213 {
214 return (instr->category == 3) && is_mad(instr->opc);
215 }
216
217 /**
218 * Handle cp for a given src register. This additionally handles
219 * the cases of collapsing immedate/const (which replace the src
220 * register with a non-ssa src) or collapsing mov's from relative
221 * src (which needs to also fixup the address src reference by the
222 * instruction).
223 */
224 static void
225 reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
226 {
227 struct ir3_instruction *src = ssa(reg);
228
229 if (is_eligible_mov(src, true)) {
230 /* simple case, no immed/const/relativ, only mov's w/ ssa src: */
231 struct ir3_register *src_reg = src->regs[1];
232 unsigned new_flags = reg->flags;
233
234 combine_flags(&new_flags, src_reg->flags);
235
236 if (valid_flags(instr, n, new_flags)) {
237 if (new_flags & IR3_REG_ARRAY) {
238 debug_assert(!(reg->flags & IR3_REG_ARRAY));
239 reg->array = src_reg->array;
240 }
241 reg->flags = new_flags;
242 reg->instr = ssa(src_reg);
243 }
244
245 src = ssa(reg); /* could be null for IR3_REG_ARRAY case */
246 if (!src)
247 return;
248 } else if (is_same_type_mov(src) &&
249 /* cannot collapse const/immed/etc into meta instrs: */
250 !is_meta(instr)) {
251 /* immed/const/etc cases, which require some special handling: */
252 struct ir3_register *src_reg = src->regs[1];
253 unsigned new_flags = reg->flags;
254
255 combine_flags(&new_flags, src_reg->flags);
256
257 if (!valid_flags(instr, n, new_flags)) {
258 /* special case for "normal" mad instructions, we can
259 * try swapping the first two args if that fits better.
260 */
261 if ((n == 1) && is_valid_mad(instr) &&
262 !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
263 valid_flags(instr, 0, new_flags)) {
264 /* swap src[0] and src[1]: */
265 struct ir3_register *tmp;
266 tmp = instr->regs[0 + 1];
267 instr->regs[0 + 1] = instr->regs[1 + 1];
268 instr->regs[1 + 1] = tmp;
269 n = 0;
270 } else {
271 return;
272 }
273 }
274
275 /* Here we handle the special case of mov from
276 * CONST and/or RELATIV. These need to be handled
277 * specially, because in the case of move from CONST
278 * there is no src ir3_instruction so we need to
279 * replace the ir3_register. And in the case of
280 * RELATIV we need to handle the address register
281 * dependency.
282 */
283 if (src_reg->flags & IR3_REG_CONST) {
284 /* an instruction cannot reference two different
285 * address registers:
286 */
287 if ((src_reg->flags & IR3_REG_RELATIV) &&
288 conflicts(instr->address, reg->instr->address))
289 return;
290
291 /* This seems to be a hw bug, or something where the timings
292 * just somehow don't work out. This restriction may only
293 * apply if the first src is also CONST.
294 */
295 if ((instr->category == 3) && (n == 2) &&
296 (src_reg->flags & IR3_REG_RELATIV) &&
297 (src_reg->array.offset == 0))
298 return;
299
300 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
301 src_reg->flags = new_flags;
302 instr->regs[n+1] = src_reg;
303
304 if (src_reg->flags & IR3_REG_RELATIV)
305 ir3_instr_set_address(instr, reg->instr->address);
306
307 return;
308 }
309
310 if ((src_reg->flags & IR3_REG_RELATIV) &&
311 !conflicts(instr->address, reg->instr->address)) {
312 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
313 src_reg->flags = new_flags;
314 instr->regs[n+1] = src_reg;
315 ir3_instr_set_address(instr, reg->instr->address);
316
317 return;
318 }
319
320 /* NOTE: seems we can only do immed integers, so don't
321 * need to care about float. But we do need to handle
322 * abs/neg *before* checking that the immediate requires
323 * few enough bits to encode:
324 *
325 * TODO: do we need to do something to avoid accidentally
326 * catching a float immed?
327 */
328 if (src_reg->flags & IR3_REG_IMMED) {
329 int32_t iim_val = src_reg->iim_val;
330
331 debug_assert((instr->category == 1) ||
332 (instr->category == 6) ||
333 ((instr->category == 2) &&
334 ir3_cat2_int(instr->opc)));
335
336 if (new_flags & IR3_REG_SABS)
337 iim_val = abs(iim_val);
338
339 if (new_flags & IR3_REG_SNEG)
340 iim_val = -iim_val;
341
342 if (new_flags & IR3_REG_BNOT)
343 iim_val = ~iim_val;
344
345 /* other than category 1 (mov) we can only encode up to 10 bits: */
346 if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
347 new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
348 src_reg = ir3_reg_clone(instr->block->shader, src_reg);
349 src_reg->flags = new_flags;
350 src_reg->iim_val = iim_val;
351 instr->regs[n+1] = src_reg;
352 }
353
354 return;
355 }
356 }
357 }
358
359 /* Handle special case of eliminating output mov, and similar cases where
360 * there isn't a normal "consuming" instruction. In this case we cannot
361 * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
362 * be eliminated)
363 */
364 static struct ir3_instruction *
365 eliminate_output_mov(struct ir3_instruction *instr)
366 {
367 if (is_eligible_mov(instr, false)) {
368 struct ir3_register *reg = instr->regs[1];
369 if (!(reg->flags & IR3_REG_ARRAY)) {
370 struct ir3_instruction *src_instr = ssa(reg);
371 debug_assert(src_instr);
372 return src_instr;
373 }
374 }
375 return instr;
376 }
377
378 /**
379 * Find instruction src's which are mov's that can be collapsed, replacing
380 * the mov dst with the mov src
381 */
382 static void
383 instr_cp(struct ir3_instruction *instr)
384 {
385 struct ir3_register *reg;
386
387 if (instr->regs_count == 0)
388 return;
389
390 if (ir3_instr_check_mark(instr))
391 return;
392
393 /* walk down the graph from each src: */
394 foreach_src_n(reg, n, instr) {
395 struct ir3_instruction *src = ssa(reg);
396
397 if (!src)
398 continue;
399
400 instr_cp(src);
401
402 /* TODO non-indirect access we could figure out which register
403 * we actually want and allow cp..
404 */
405 if (reg->flags & IR3_REG_ARRAY)
406 continue;
407
408 reg_cp(instr, reg, n);
409 }
410
411 if (instr->regs[0]->flags & IR3_REG_ARRAY) {
412 struct ir3_instruction *src = ssa(instr->regs[0]);
413 if (src)
414 instr_cp(src);
415 }
416
417 if (instr->address) {
418 instr_cp(instr->address);
419 ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
420 }
421 }
422
423 void
424 ir3_cp(struct ir3 *ir)
425 {
426 ir3_clear_mark(ir);
427
428 for (unsigned i = 0; i < ir->noutputs; i++) {
429 if (ir->outputs[i]) {
430 instr_cp(ir->outputs[i]);
431 ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
432 }
433 }
434
435 for (unsigned i = 0; i < ir->keeps_count; i++) {
436 instr_cp(ir->keeps[i]);
437 ir->keeps[i] = eliminate_output_mov(ir->keeps[i]);
438 }
439
440 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
441 if (block->condition) {
442 instr_cp(block->condition);
443 block->condition = eliminate_output_mov(block->condition);
444 }
445 }
446 }