Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_cp.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "freedreno_util.h"
30
31 #include "ir3.h"
32
33 /*
34 * Copy Propagate:
35 */
36
37 /* is it a type preserving mov, with ok flags? */
38 static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
39 {
40 if (is_same_type_mov(instr)) {
41 struct ir3_register *dst = instr->regs[0];
42 struct ir3_register *src = instr->regs[1];
43 struct ir3_instruction *src_instr = ssa(src);
44 if (dst->flags & IR3_REG_RELATIV)
45 return false;
46 if (src->flags & IR3_REG_RELATIV)
47 return false;
48 if (!allow_flags)
49 if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
50 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
51 return false;
52 if (!src_instr)
53 return false;
54 /* TODO: remove this hack: */
55 if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
56 return false;
57 /* TODO: we currently don't handle left/right neighbors
58 * very well when inserting parallel-copies into phi..
59 * to avoid problems don't eliminate a mov coming out
60 * of phi..
61 */
62 if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI))
63 return false;
64 return true;
65 }
66 return false;
67 }
68
69 static unsigned cp_flags(unsigned flags)
70 {
71 /* only considering these flags (at least for now): */
72 flags &= (IR3_REG_CONST | IR3_REG_IMMED |
73 IR3_REG_FNEG | IR3_REG_FABS |
74 IR3_REG_SNEG | IR3_REG_SABS |
75 IR3_REG_BNOT | IR3_REG_RELATIV);
76 return flags;
77 }
78
79 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
80 unsigned flags)
81 {
82 unsigned valid_flags;
83 flags = cp_flags(flags);
84
85 /* clear flags that are 'ok' */
86 switch (instr->category) {
87 case 1:
88 valid_flags = IR3_REG_IMMED | IR3_REG_RELATIV;
89 if (flags & ~valid_flags)
90 return false;
91 break;
92 case 5:
93 /* no flags allowed */
94 if (flags)
95 return false;
96 break;
97 case 6:
98 valid_flags = IR3_REG_IMMED;
99 if (flags & ~valid_flags)
100 return false;
101 break;
102 case 2:
103 valid_flags = ir3_cat2_absneg(instr->opc) |
104 IR3_REG_CONST | IR3_REG_RELATIV;
105
106 if (ir3_cat2_int(instr->opc))
107 valid_flags |= IR3_REG_IMMED;
108
109 if (flags & ~valid_flags)
110 return false;
111
112 if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
113 unsigned m = (n ^ 1) + 1;
114 /* cannot deal w/ const in both srcs:
115 * (note that some cat2 actually only have a single src)
116 */
117 if (m < instr->regs_count) {
118 struct ir3_register *reg = instr->regs[m];
119 if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
120 return false;
121 if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
122 return false;
123 }
124 /* cannot be const + ABS|NEG: */
125 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
126 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
127 return false;
128 }
129 break;
130 case 3:
131 valid_flags = ir3_cat3_absneg(instr->opc) |
132 IR3_REG_CONST | IR3_REG_RELATIV;
133
134 if (flags & ~valid_flags)
135 return false;
136
137 if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
138 /* cannot deal w/ const/relativ in 2nd src: */
139 if (n == 1)
140 return false;
141 }
142
143 if (flags & IR3_REG_CONST) {
144 /* cannot be const + ABS|NEG: */
145 if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
146 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
147 return false;
148 }
149 break;
150 case 4:
151 /* seems like blob compiler avoids const as src.. */
152 /* TODO double check if this is still the case on a4xx */
153 if (flags & IR3_REG_CONST)
154 return false;
155 if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
156 return false;
157 break;
158 }
159
160 return true;
161 }
162
163 /* propagate register flags from src to dst.. negates need special
164 * handling to cancel each other out.
165 */
166 static void combine_flags(unsigned *dstflags, unsigned srcflags)
167 {
168 /* if what we are combining into already has (abs) flags,
169 * we can drop (neg) from src:
170 */
171 if (*dstflags & IR3_REG_FABS)
172 srcflags &= ~IR3_REG_FNEG;
173 if (*dstflags & IR3_REG_SABS)
174 srcflags &= ~IR3_REG_SNEG;
175
176 if (srcflags & IR3_REG_FABS)
177 *dstflags |= IR3_REG_FABS;
178 if (srcflags & IR3_REG_SABS)
179 *dstflags |= IR3_REG_SABS;
180 if (srcflags & IR3_REG_FNEG)
181 *dstflags ^= IR3_REG_FNEG;
182 if (srcflags & IR3_REG_SNEG)
183 *dstflags ^= IR3_REG_SNEG;
184 if (srcflags & IR3_REG_BNOT)
185 *dstflags ^= IR3_REG_BNOT;
186 }
187
188 static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
189
190 /* the "plain" MAD's (ie. the ones that don't shift first src prior to
191 * multiply) can swap their first two srcs if src[0] is !CONST and
192 * src[1] is CONST:
193 */
194 static bool is_valid_mad(struct ir3_instruction *instr)
195 {
196 return (instr->category == 3) && is_mad(instr->opc);
197 }
198
199 /**
200 * Handle cp for a given src register. This additionally handles
201 * the cases of collapsing immedate/const (which replace the src
202 * register with a non-ssa src) or collapsing mov's from relative
203 * src (which needs to also fixup the address src reference by the
204 * instruction).
205 */
206 static void
207 reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
208 {
209 unsigned src_flags = 0, new_flags;
210 struct ir3_instruction *src_instr;
211
212 if (is_meta(instr)) {
213 /* meta instructions cannot fold up register
214 * flags.. they are usually src for texture
215 * fetch, etc, where we cannot specify abs/neg
216 */
217 reg->instr = instr_cp(reg->instr, NULL);
218 return;
219 }
220
221 src_instr = instr_cp(reg->instr, &src_flags);
222
223 new_flags = reg->flags;
224 combine_flags(&new_flags, src_flags);
225
226 reg->flags = new_flags;
227 reg->instr = src_instr;
228
229 if (!valid_flags(instr, n, reg->flags)) {
230 /* insert an absneg.f */
231 if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) {
232 debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
233 reg->instr = ir3_ABSNEG_S(instr->block,
234 reg->instr, cp_flags(src_flags));
235 } else {
236 debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)));
237 reg->instr = ir3_ABSNEG_F(instr->block,
238 reg->instr, cp_flags(src_flags));
239 }
240 reg->flags &= ~cp_flags(src_flags);
241 debug_assert(valid_flags(instr, n, reg->flags));
242 /* send it through instr_cp() again since
243 * the absneg src might be a mov from const
244 * that could be cleaned up:
245 */
246 reg->instr = instr_cp(reg->instr, NULL);
247 return;
248 }
249
250 if (is_same_type_mov(reg->instr)) {
251 struct ir3_register *src_reg = reg->instr->regs[1];
252 unsigned new_flags = src_reg->flags;
253
254 combine_flags(&new_flags, reg->flags);
255
256 if (!valid_flags(instr, n, new_flags)) {
257 /* special case for "normal" mad instructions, we can
258 * try swapping the first two args if that fits better.
259 */
260 if ((n == 1) && is_valid_mad(instr) &&
261 !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
262 valid_flags(instr, 0, new_flags)) {
263 /* swap src[0] and src[1]: */
264 struct ir3_register *tmp;
265 tmp = instr->regs[0 + 1];
266 instr->regs[0 + 1] = instr->regs[1 + 1];
267 instr->regs[1 + 1] = tmp;
268 n = 0;
269 } else {
270 return;
271 }
272 }
273
274 /* Here we handle the special case of mov from
275 * CONST and/or RELATIV. These need to be handled
276 * specially, because in the case of move from CONST
277 * there is no src ir3_instruction so we need to
278 * replace the ir3_register. And in the case of
279 * RELATIV we need to handle the address register
280 * dependency.
281 */
282 if (src_reg->flags & IR3_REG_CONST) {
283 /* an instruction cannot reference two different
284 * address registers:
285 */
286 if ((src_reg->flags & IR3_REG_RELATIV) &&
287 conflicts(instr->address, reg->instr->address))
288 return;
289
290 src_reg->flags = new_flags;
291 instr->regs[n+1] = src_reg;
292
293 if (src_reg->flags & IR3_REG_RELATIV)
294 ir3_instr_set_address(instr, reg->instr->address);
295
296 return;
297 }
298
299 if ((src_reg->flags & IR3_REG_RELATIV) &&
300 !conflicts(instr->address, reg->instr->address)) {
301 src_reg->flags = new_flags;
302 instr->regs[n+1] = src_reg;
303 ir3_instr_set_address(instr, reg->instr->address);
304
305 return;
306 }
307
308 /* NOTE: seems we can only do immed integers, so don't
309 * need to care about float. But we do need to handle
310 * abs/neg *before* checking that the immediate requires
311 * few enough bits to encode:
312 *
313 * TODO: do we need to do something to avoid accidentally
314 * catching a float immed?
315 */
316 if (src_reg->flags & IR3_REG_IMMED) {
317 int32_t iim_val = src_reg->iim_val;
318
319 debug_assert((instr->category == 1) ||
320 (instr->category == 6) ||
321 ((instr->category == 2) &&
322 ir3_cat2_int(instr->opc)));
323
324 if (new_flags & IR3_REG_SABS)
325 iim_val = abs(iim_val);
326
327 if (new_flags & IR3_REG_SNEG)
328 iim_val = -iim_val;
329
330 if (new_flags & IR3_REG_BNOT)
331 iim_val = ~iim_val;
332
333 if (!(iim_val & ~0x3ff)) {
334 new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
335 src_reg->flags = new_flags;
336 src_reg->iim_val = iim_val;
337 instr->regs[n+1] = src_reg;
338 }
339
340 return;
341 }
342 }
343 }
344
345 /**
346 * Given an SSA src (instruction), return the one with extraneous
347 * mov's removed, ie, for (to copy NIR syntax):
348 *
349 * vec1 ssa1 = fadd <something>, <somethingelse>
350 * vec1 ssa2 = fabs ssa1
351 * vec1 ssa3 = fneg ssa1
352 *
353 * then calling instr_cp(ssa3, &flags) would return ssa1 with
354 * (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
355 * then disallow eliminating copies which would require flag
356 * propagation (for example, we cannot propagate abs/neg into
357 * an output).
358 */
359 static struct ir3_instruction *
360 instr_cp(struct ir3_instruction *instr, unsigned *flags)
361 {
362 struct ir3_register *reg;
363
364 if (is_eligible_mov(instr, !!flags)) {
365 struct ir3_register *reg = instr->regs[1];
366 struct ir3_instruction *src_instr = ssa(reg);
367 if (flags)
368 combine_flags(flags, reg->flags);
369 return instr_cp(src_instr, flags);
370 }
371
372 /* Check termination condition before walking children (rather
373 * than before checking eligible-mov). A mov instruction may
374 * appear as ssa-src for multiple other instructions, and we
375 * want to consider it for removal for each, rather than just
376 * the first one. (But regardless of how many places it shows
377 * up as a src, we only need to recursively walk the children
378 * once.)
379 */
380 if (ir3_instr_check_mark(instr))
381 return instr;
382
383 /* walk down the graph from each src: */
384 foreach_src_n(reg, n, instr) {
385 if (!(reg->flags & IR3_REG_SSA))
386 continue;
387
388 reg_cp(instr, reg, n);
389 }
390
391 if (instr->address)
392 ir3_instr_set_address(instr, instr_cp(instr->address, NULL));
393
394 return instr;
395 }
396
397 void
398 ir3_cp(struct ir3 *ir)
399 {
400 ir3_clear_mark(ir);
401
402 for (unsigned i = 0; i < ir->noutputs; i++) {
403 if (ir->outputs[i]) {
404 struct ir3_instruction *out =
405 instr_cp(ir->outputs[i], NULL);
406
407 ir->outputs[i] = out;
408 }
409 }
410
411 for (unsigned i = 0; i < ir->keeps_count; i++) {
412 ir->keeps[i] = instr_cp(ir->keeps[i], NULL);
413 }
414
415 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
416 if (block->condition)
417 block->condition = instr_cp(block->condition, NULL);
418 }
419 }