1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "freedreno_util.h"
37 /* is it a type preserving mov, with ok flags? */
38 static bool is_eligible_mov(struct ir3_instruction
*instr
, bool allow_flags
)
40 if (is_same_type_mov(instr
)) {
41 struct ir3_register
*dst
= instr
->regs
[0];
42 struct ir3_register
*src
= instr
->regs
[1];
43 struct ir3_instruction
*src_instr
= ssa(src
);
44 if (dst
->flags
& IR3_REG_RELATIV
)
46 if (src
->flags
& IR3_REG_RELATIV
)
49 if (src
->flags
& (IR3_REG_FABS
| IR3_REG_FNEG
|
50 IR3_REG_SABS
| IR3_REG_SNEG
| IR3_REG_BNOT
))
54 /* TODO: remove this hack: */
55 if (is_meta(src_instr
) && (src_instr
->opc
== OPC_META_FO
))
57 /* TODO: we currently don't handle left/right neighbors
58 * very well when inserting parallel-copies into phi..
59 * to avoid problems don't eliminate a mov coming out
62 if (is_meta(src_instr
) && (src_instr
->opc
== OPC_META_PHI
))
69 static unsigned cp_flags(unsigned flags
)
71 /* only considering these flags (at least for now): */
72 flags
&= (IR3_REG_CONST
| IR3_REG_IMMED
|
73 IR3_REG_FNEG
| IR3_REG_FABS
|
74 IR3_REG_SNEG
| IR3_REG_SABS
|
75 IR3_REG_BNOT
| IR3_REG_RELATIV
);
79 static bool valid_flags(struct ir3_instruction
*instr
, unsigned n
,
83 flags
= cp_flags(flags
);
85 /* clear flags that are 'ok' */
86 switch (instr
->category
) {
88 valid_flags
= IR3_REG_IMMED
| IR3_REG_RELATIV
;
89 if (flags
& ~valid_flags
)
93 /* no flags allowed */
98 valid_flags
= IR3_REG_IMMED
;
99 if (flags
& ~valid_flags
)
103 valid_flags
= ir3_cat2_absneg(instr
->opc
) |
104 IR3_REG_CONST
| IR3_REG_RELATIV
;
106 if (ir3_cat2_int(instr
->opc
))
107 valid_flags
|= IR3_REG_IMMED
;
109 if (flags
& ~valid_flags
)
112 if (flags
& (IR3_REG_CONST
| IR3_REG_IMMED
)) {
113 unsigned m
= (n
^ 1) + 1;
114 /* cannot deal w/ const in both srcs:
115 * (note that some cat2 actually only have a single src)
117 if (m
< instr
->regs_count
) {
118 struct ir3_register
*reg
= instr
->regs
[m
];
119 if ((flags
& IR3_REG_CONST
) && (reg
->flags
& IR3_REG_CONST
))
121 if ((flags
& IR3_REG_IMMED
) && (reg
->flags
& IR3_REG_IMMED
))
124 /* cannot be const + ABS|NEG: */
125 if (flags
& (IR3_REG_FABS
| IR3_REG_FNEG
|
126 IR3_REG_SABS
| IR3_REG_SNEG
| IR3_REG_BNOT
))
131 valid_flags
= ir3_cat3_absneg(instr
->opc
) |
132 IR3_REG_CONST
| IR3_REG_RELATIV
;
134 if (flags
& ~valid_flags
)
137 if (flags
& (IR3_REG_CONST
| IR3_REG_RELATIV
)) {
138 /* cannot deal w/ const/relativ in 2nd src: */
143 if (flags
& IR3_REG_CONST
) {
144 /* cannot be const + ABS|NEG: */
145 if (flags
& (IR3_REG_FABS
| IR3_REG_FNEG
|
146 IR3_REG_SABS
| IR3_REG_SNEG
| IR3_REG_BNOT
))
151 /* seems like blob compiler avoids const as src.. */
152 /* TODO double check if this is still the case on a4xx */
153 if (flags
& IR3_REG_CONST
)
155 if (flags
& (IR3_REG_SABS
| IR3_REG_SNEG
))
163 /* propagate register flags from src to dst.. negates need special
164 * handling to cancel each other out.
166 static void combine_flags(unsigned *dstflags
, unsigned srcflags
)
168 /* if what we are combining into already has (abs) flags,
169 * we can drop (neg) from src:
171 if (*dstflags
& IR3_REG_FABS
)
172 srcflags
&= ~IR3_REG_FNEG
;
173 if (*dstflags
& IR3_REG_SABS
)
174 srcflags
&= ~IR3_REG_SNEG
;
176 if (srcflags
& IR3_REG_FABS
)
177 *dstflags
|= IR3_REG_FABS
;
178 if (srcflags
& IR3_REG_SABS
)
179 *dstflags
|= IR3_REG_SABS
;
180 if (srcflags
& IR3_REG_FNEG
)
181 *dstflags
^= IR3_REG_FNEG
;
182 if (srcflags
& IR3_REG_SNEG
)
183 *dstflags
^= IR3_REG_SNEG
;
184 if (srcflags
& IR3_REG_BNOT
)
185 *dstflags
^= IR3_REG_BNOT
;
188 static struct ir3_instruction
* instr_cp(struct ir3_instruction
*instr
, unsigned *flags
);
190 /* the "plain" MAD's (ie. the ones that don't shift first src prior to
191 * multiply) can swap their first two srcs if src[0] is !CONST and
194 static bool is_valid_mad(struct ir3_instruction
*instr
)
196 return (instr
->category
== 3) && is_mad(instr
->opc
);
200 * Handle cp for a given src register. This additionally handles
201 * the cases of collapsing immedate/const (which replace the src
202 * register with a non-ssa src) or collapsing mov's from relative
203 * src (which needs to also fixup the address src reference by the
207 reg_cp(struct ir3_instruction
*instr
, struct ir3_register
*reg
, unsigned n
)
209 unsigned src_flags
= 0, new_flags
;
210 struct ir3_instruction
*src_instr
;
212 if (is_meta(instr
)) {
213 /* meta instructions cannot fold up register
214 * flags.. they are usually src for texture
215 * fetch, etc, where we cannot specify abs/neg
217 reg
->instr
= instr_cp(reg
->instr
, NULL
);
221 src_instr
= instr_cp(reg
->instr
, &src_flags
);
223 new_flags
= reg
->flags
;
224 combine_flags(&new_flags
, src_flags
);
226 reg
->flags
= new_flags
;
227 reg
->instr
= src_instr
;
229 if (!valid_flags(instr
, n
, reg
->flags
)) {
230 /* insert an absneg.f */
231 if (reg
->flags
& (IR3_REG_SNEG
| IR3_REG_SABS
| IR3_REG_BNOT
)) {
232 debug_assert(!(reg
->flags
& (IR3_REG_FNEG
| IR3_REG_FABS
)));
233 reg
->instr
= ir3_ABSNEG_S(instr
->block
,
234 reg
->instr
, cp_flags(src_flags
));
236 debug_assert(!(reg
->flags
& (IR3_REG_SNEG
| IR3_REG_SABS
| IR3_REG_BNOT
)));
237 reg
->instr
= ir3_ABSNEG_F(instr
->block
,
238 reg
->instr
, cp_flags(src_flags
));
240 reg
->flags
&= ~cp_flags(src_flags
);
241 debug_assert(valid_flags(instr
, n
, reg
->flags
));
242 /* send it through instr_cp() again since
243 * the absneg src might be a mov from const
244 * that could be cleaned up:
246 reg
->instr
= instr_cp(reg
->instr
, NULL
);
250 if (is_same_type_mov(reg
->instr
)) {
251 struct ir3_register
*src_reg
= reg
->instr
->regs
[1];
252 unsigned new_flags
= src_reg
->flags
;
254 combine_flags(&new_flags
, reg
->flags
);
256 if (!valid_flags(instr
, n
, new_flags
)) {
257 /* special case for "normal" mad instructions, we can
258 * try swapping the first two args if that fits better.
260 if ((n
== 1) && is_valid_mad(instr
) &&
261 !(instr
->regs
[0 + 1]->flags
& (IR3_REG_CONST
| IR3_REG_RELATIV
)) &&
262 valid_flags(instr
, 0, new_flags
)) {
263 /* swap src[0] and src[1]: */
264 struct ir3_register
*tmp
;
265 tmp
= instr
->regs
[0 + 1];
266 instr
->regs
[0 + 1] = instr
->regs
[1 + 1];
267 instr
->regs
[1 + 1] = tmp
;
274 /* Here we handle the special case of mov from
275 * CONST and/or RELATIV. These need to be handled
276 * specially, because in the case of move from CONST
277 * there is no src ir3_instruction so we need to
278 * replace the ir3_register. And in the case of
279 * RELATIV we need to handle the address register
282 if (src_reg
->flags
& IR3_REG_CONST
) {
283 /* an instruction cannot reference two different
286 if ((src_reg
->flags
& IR3_REG_RELATIV
) &&
287 conflicts(instr
->address
, reg
->instr
->address
))
290 src_reg
->flags
= new_flags
;
291 instr
->regs
[n
+1] = src_reg
;
293 if (src_reg
->flags
& IR3_REG_RELATIV
)
294 ir3_instr_set_address(instr
, reg
->instr
->address
);
299 if ((src_reg
->flags
& IR3_REG_RELATIV
) &&
300 !conflicts(instr
->address
, reg
->instr
->address
)) {
301 src_reg
->flags
= new_flags
;
302 instr
->regs
[n
+1] = src_reg
;
303 ir3_instr_set_address(instr
, reg
->instr
->address
);
308 /* NOTE: seems we can only do immed integers, so don't
309 * need to care about float. But we do need to handle
310 * abs/neg *before* checking that the immediate requires
311 * few enough bits to encode:
313 * TODO: do we need to do something to avoid accidentally
314 * catching a float immed?
316 if (src_reg
->flags
& IR3_REG_IMMED
) {
317 int32_t iim_val
= src_reg
->iim_val
;
319 debug_assert((instr
->category
== 1) ||
320 (instr
->category
== 6) ||
321 ((instr
->category
== 2) &&
322 ir3_cat2_int(instr
->opc
)));
324 if (new_flags
& IR3_REG_SABS
)
325 iim_val
= abs(iim_val
);
327 if (new_flags
& IR3_REG_SNEG
)
330 if (new_flags
& IR3_REG_BNOT
)
333 if (!(iim_val
& ~0x3ff)) {
334 new_flags
&= ~(IR3_REG_SABS
| IR3_REG_SNEG
| IR3_REG_BNOT
);
335 src_reg
->flags
= new_flags
;
336 src_reg
->iim_val
= iim_val
;
337 instr
->regs
[n
+1] = src_reg
;
346 * Given an SSA src (instruction), return the one with extraneous
347 * mov's removed, ie, for (to copy NIR syntax):
349 * vec1 ssa1 = fadd <something>, <somethingelse>
350 * vec1 ssa2 = fabs ssa1
351 * vec1 ssa3 = fneg ssa1
353 * then calling instr_cp(ssa3, &flags) would return ssa1 with
354 * (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
355 * then disallow eliminating copies which would require flag
356 * propagation (for example, we cannot propagate abs/neg into
359 static struct ir3_instruction
*
360 instr_cp(struct ir3_instruction
*instr
, unsigned *flags
)
362 struct ir3_register
*reg
;
364 if (is_eligible_mov(instr
, !!flags
)) {
365 struct ir3_register
*reg
= instr
->regs
[1];
366 struct ir3_instruction
*src_instr
= ssa(reg
);
368 combine_flags(flags
, reg
->flags
);
369 return instr_cp(src_instr
, flags
);
372 /* Check termination condition before walking children (rather
373 * than before checking eligible-mov). A mov instruction may
374 * appear as ssa-src for multiple other instructions, and we
375 * want to consider it for removal for each, rather than just
376 * the first one. (But regardless of how many places it shows
377 * up as a src, we only need to recursively walk the children
380 if (ir3_instr_check_mark(instr
))
383 /* walk down the graph from each src: */
384 foreach_src_n(reg
, n
, instr
) {
385 if (!(reg
->flags
& IR3_REG_SSA
))
388 reg_cp(instr
, reg
, n
);
392 ir3_instr_set_address(instr
, instr_cp(instr
->address
, NULL
));
398 ir3_cp(struct ir3
*ir
)
402 for (unsigned i
= 0; i
< ir
->noutputs
; i
++) {
403 if (ir
->outputs
[i
]) {
404 struct ir3_instruction
*out
=
405 instr_cp(ir
->outputs
[i
], NULL
);
407 ir
->outputs
[i
] = out
;
411 for (unsigned i
= 0; i
< ir
->keeps_count
; i
++) {
412 ir
->keeps
[i
] = instr_cp(ir
->keeps
[i
], NULL
);
415 list_for_each_entry (struct ir3_block
, block
, &ir
->block_list
, node
) {
416 if (block
->condition
)
417 block
->condition
= instr_cp(block
->condition
, NULL
);