* SOFTWARE.
*/
+#include "util/ralloc.h"
+
#include "ir3.h"
static bool
if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
return false;
+ if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+ return false;
+ if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+ return false;
+
if (instr->cat1.src_type == TYPE_F32 &&
instr->cat1.dst_type == TYPE_F16)
return true;
}
static bool
-all_uses_fp16_conv(struct ir3 *ir, struct ir3_instruction *conv_src)
+all_uses_fp16_conv(struct ir3_instruction *conv_src)
{
- foreach_block (block, &ir->block_list) {
- foreach_instr (instr, &block->instr_list) {
- struct ir3_instruction *src;
- foreach_ssa_src (src, instr) {
- if (src == conv_src && !is_fp16_conv(instr))
- return false;
- }
- }
- }
-
+ foreach_ssa_use (use, conv_src)
+ if (!is_fp16_conv(use))
+ return false;
return true;
}
+/* For an instruction which has a conversion folded in, re-write the
+ * uses of *all* conv's that used that src to be a simple mov that
+ * cp can eliminate. This avoids invalidating the SSA uses, it just
+ * shifts the use to a simple mov.
+ */
static void
-rewrite_uses(struct ir3 *ir, struct ir3_instruction *conv,
- struct ir3_instruction *replace)
+rewrite_src_uses(struct ir3_instruction *src)
{
- foreach_block (block, &ir->block_list) {
- foreach_instr (instr, &block->instr_list) {
- struct ir3_instruction *src;
- foreach_ssa_src_n (src, n, instr) {
- if (src == conv)
- instr->regs[n]->instr = replace;
- }
+ foreach_ssa_use (use, src) {
+ assert(is_fp16_conv(use));
+
+ if (is_half(src)) {
+ use->regs[1]->flags |= IR3_REG_HALF;
+ } else {
+ use->regs[1]->flags &= ~IR3_REG_HALF;
}
+
+ use->cat1.src_type = use->cat1.dst_type;
}
}
-static void
-try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
+static bool
+try_conversion_folding(struct ir3_instruction *conv)
{
struct ir3_instruction *src;
if (!is_fp16_conv(conv))
- return;
+ return false;
+ /* NOTE: we can have non-ssa srcs after copy propagation: */
src = ssa(conv->regs[1]);
+ if (!src)
+ return false;
+
if (!is_alu(src))
- return;
+ return false;
/* avoid folding f2f32(f2f16) together, in cases where this is legal to
* do (glsl) nir should have handled that for us already:
*/
if (is_fp16_conv(src))
- return;
+ return false;
switch (src->opc) {
case OPC_SEL_B32:
case OPC_MIN_F:
case OPC_SIGN_F:
case OPC_ABSNEG_F:
- return;
+ return false;
+ case OPC_MOV:
+ /* if src is a "cov" and type doesn't match, then it can't be folded
+ * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32
+ */
+ if (src->cat1.dst_type != src->cat1.src_type &&
+ conv->cat1.src_type != src->cat1.dst_type)
+ return false;
+ break;
default:
break;
}
- if (!all_uses_fp16_conv(ir, src))
- return;
+ if (!all_uses_fp16_conv(src))
+ return false;
if (src->opc == OPC_MOV) {
if (src->cat1.dst_type == src->cat1.src_type) {
}
}
- if (conv->regs[0]->flags & IR3_REG_HALF) {
- src->regs[0]->flags |= IR3_REG_HALF;
- } else {
- src->regs[0]->flags &= ~IR3_REG_HALF;
- }
+ ir3_set_dst_type(src, is_half(conv));
+ rewrite_src_uses(src);
- rewrite_uses(ir, conv, src);
+ return true;
}
-void
+bool
ir3_cf(struct ir3 *ir)
{
- foreach_block_safe (block, &ir->block_list) {
- foreach_instr_safe (instr, &block->instr_list) {
- try_conversion_folding(ir, instr);
+ void *mem_ctx = ralloc_context(NULL);
+ bool progress = false;
+
+ ir3_find_ssa_uses(ir, mem_ctx, false);
+
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ progress |= try_conversion_folding(instr);
}
}
+
+ ralloc_free(mem_ctx);
+
+ return progress;
}