bifrost/bifrost.h \
bifrost/bifrost_compile.c \
bifrost/bifrost_compile.h \
+ bifrost/bi_lower_combine.c \
bifrost/bi_tables.c \
bifrost/bi_schedule.c \
bifrost/bi_print.c \
--- /dev/null
+/*
+ * Copyright (C) 2020 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+
+/* NIR creates vectors as vecN ops, which we represent by a synthetic
+ * BI_COMBINE instruction, e.g.:
+ *
+ * v = combine x, y, z, w
+ *
+ * These combines need to be lowered by the pass in this file.
+ */
+
+static void
+bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp)
+{
+ unsigned bits = nir_alu_type_get_type_size(parent->dest_type);
+ unsigned bytes = bits / 8;
+
+ bi_instruction move = {
+ .type = BI_MOV,
+ .dest = parent->dest,
+ .dest_type = parent->dest_type,
+ .writemask = ((1 << bytes) - 1) << (bytes * comp),
+ .src = { parent->src[comp] },
+ .src_types = { parent->dest_type },
+ .swizzle = { { parent->swizzle[comp][0] } }
+ };
+
+ bi_emit_before(ctx, parent, move);
+}
+
+void
+bi_lower_combine(bi_context *ctx, bi_block *block)
+{
+ bi_foreach_instr_in_block_safe(block, ins) {
+ if (ins->type != BI_COMBINE) continue;
+
+ bi_foreach_src(ins, s) {
+ if (!ins->src[s])
+ break;
+
+ bi_insert_combine_mov(ctx, ins, s);
+ }
+
+ bi_remove_instruction(ins);
+ }
+}
case BI_CMP: return "cmp";
case BI_BLEND: return "blend";
case BI_BITWISE: return "bitwise";
+ case BI_COMBINE: return "combine";
case BI_CONVERT: return "convert";
case BI_CSEL: return "csel";
case BI_DISCARD: return "discard";
[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_BLEND] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR,
[BI_BITWISE] = BI_GENERIC | BI_SCHED_ALL,
+ [BI_COMBINE] = 0,
[BI_CONVERT] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_CSEL] = BI_SCHED_FMA,
[BI_DISCARD] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
case nir_op_u2f64:
return BI_CONVERT;
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ return BI_COMBINE;
+
+ case nir_op_vec8:
+ case nir_op_vec16:
+ unreachable("should've been lowered");
+
case nir_op_ffma:
case nir_op_fmul:
return BI_FMA;
/* Construct a writemask */
unsigned bits_per_comp = instr->dest.dest.ssa.bit_size;
unsigned comps = instr->dest.dest.ssa.num_components;
- assert(comps == 1);
+
+ if (alu.type != BI_COMBINE)
+ assert(comps == 1);
+
unsigned bits = bits_per_comp * comps;
unsigned bytes = bits / 8;
alu.writemask = (1 << bytes) - 1;
/* Take us out of SSA */
NIR_PASS(progress, nir, nir_lower_locals_to_regs);
- NIR_PASS(progress, nir, nir_convert_from_ssa, true);
-
- /* We're a primary scalar architecture but there's enough vector that
- * we use a vector IR so let's not also deal with scalar hacks on top
- * of the vector hacks */
-
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
- NIR_PASS(progress, nir, nir_lower_vec_to_movs);
- NIR_PASS(progress, nir, nir_opt_dce);
-}
-
-static void
-bi_insert_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp)
-{
- bi_instruction move = {
- .type = BI_MOV,
- .dest = parent->dest,
- .dest_type = nir_type_uint32,
- .writemask = (0xF << (4 * comp)),
- .src = { parent->src[0] },
- .src_types = { nir_type_uint32 },
- .swizzle = { { comp } }
- };
-
- bi_emit_before(ctx, parent, move);
-}
-
-static void
-bi_lower_mov(bi_context *ctx, bi_block *block)
-{
- bi_foreach_instr_in_block_safe(block, ins) {
- if (ins->type != BI_MOV) continue;
- if (util_bitcount(ins->writemask) <= 4) continue;
-
- for (unsigned i = 0; i < 4; ++i) {
- unsigned quad = (ins->writemask >> (4 * i)) & 0xF;
-
- if (quad == 0)
- continue;
- else if (quad == 0xF)
- bi_insert_mov32(ctx, ins, i);
- else
- unreachable("TODO: Lowering <32bit moves");
- }
-
- bi_remove_instruction(ins);
- }
+ NIR_PASS(progress, nir, nir_convert_from_ssa, true);
}
void
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
- bi_lower_mov(ctx, block);
+ bi_lower_combine(ctx, block);
}
bool progress = false;
BI_CMP,
BI_BLEND,
BI_BITWISE,
+ BI_COMBINE,
BI_CONVERT,
BI_CSEL,
BI_DISCARD,
/* BIR passes */
+void bi_lower_combine(bi_context *ctx, bi_block *block);
bool bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block);
void bi_schedule(bi_context *ctx);
void bi_register_allocate(bi_context *ctx);
libpanfrost_bifrost_files = files(
'disassemble.c',
'bi_liveness.c',
+ 'bi_lower_combine.c',
'bi_print.c',
'bi_opt_dce.c',
'bi_pack.c',