From e0a51d5308f3a9c6030c4ebc42be6be5c4b9e46a Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 22 Mar 2020 17:31:23 -0400 Subject: [PATCH] pan/bi: Ingest vecN directly (again) Last time, I swear. We still generate writemasks but SSA-like ones and do the lowering ourselves. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/Makefile.sources | 1 + src/panfrost/bifrost/bi_lower_combine.c | 68 +++++++++++++++++++++++++ src/panfrost/bifrost/bi_print.c | 1 + src/panfrost/bifrost/bi_tables.c | 1 + src/panfrost/bifrost/bifrost_compile.c | 63 ++++++----------------- src/panfrost/bifrost/compiler.h | 2 + src/panfrost/bifrost/meson.build | 1 + 7 files changed, 89 insertions(+), 48 deletions(-) create mode 100644 src/panfrost/bifrost/bi_lower_combine.c diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index daa58eb8785..25c221bf697 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -2,6 +2,7 @@ bifrost_FILES := \ bifrost/bifrost.h \ bifrost/bifrost_compile.c \ bifrost/bifrost_compile.h \ + bifrost/bi_lower_combine.c \ bifrost/bi_tables.c \ bifrost/bi_schedule.c \ bifrost/bi_print.c \ diff --git a/src/panfrost/bifrost/bi_lower_combine.c b/src/panfrost/bifrost/bi_lower_combine.c new file mode 100644 index 00000000000..e41bdb61c52 --- /dev/null +++ b/src/panfrost/bifrost/bi_lower_combine.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2020 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" + +/* NIR creates vectors as vecN ops, which we represent by a synthetic + * BI_COMBINE instruction, e.g.: + * + * v = combine x, y, z, w + * + * These combines need to be lowered by the pass in this file. + */ + +static void +bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp) +{ + unsigned bits = nir_alu_type_get_type_size(parent->dest_type); + unsigned bytes = bits / 8; + + bi_instruction move = { + .type = BI_MOV, + .dest = parent->dest, + .dest_type = parent->dest_type, + .writemask = ((1 << bytes) - 1) << (bytes * comp), + .src = { parent->src[comp] }, + .src_types = { parent->dest_type }, + .swizzle = { { parent->swizzle[comp][0] } } + }; + + bi_emit_before(ctx, parent, move); +} + +void +bi_lower_combine(bi_context *ctx, bi_block *block) +{ + bi_foreach_instr_in_block_safe(block, ins) { + if (ins->type != BI_COMBINE) continue; + + bi_foreach_src(ins, s) { + if (!ins->src[s]) + break; + + bi_insert_combine_mov(ctx, ins, s); + } + + bi_remove_instruction(ins); + } +} diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c index 5f53e4b98c1..73c3a7f6422 100644 --- a/src/panfrost/bifrost/bi_print.c +++ b/src/panfrost/bifrost/bi_print.c @@ -132,6 +132,7 @@ bi_class_name(enum bi_class cl) case BI_CMP: return "cmp"; case BI_BLEND: return "blend"; case BI_BITWISE: return "bitwise"; + case BI_COMBINE: return "combine"; case BI_CONVERT: return "convert"; case BI_CSEL: return "csel"; case BI_DISCARD: return "discard"; diff --git a/src/panfrost/bifrost/bi_tables.c b/src/panfrost/bifrost/bi_tables.c index bdacfde6fd6..3926afc4335 100644 --- a/src/panfrost/bifrost/bi_tables.c +++ b/src/panfrost/bifrost/bi_tables.c @@ -33,6 +33,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = { [BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL, [BI_BLEND] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR, [BI_BITWISE] = BI_GENERIC | BI_SCHED_ALL, + [BI_COMBINE] = 0, [BI_CONVERT] = BI_SCHED_ALL | BI_SWIZZLABLE, [BI_CSEL] = BI_SCHED_FMA, [BI_DISCARD] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD, diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index a00dabe71fb..42ce6a45621 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -381,6 +381,15 @@ bi_class_for_nir_alu(nir_op op) case nir_op_u2f64: return BI_CONVERT; + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + return BI_COMBINE; + + case nir_op_vec8: + case nir_op_vec16: + unreachable("should've been lowered"); + case nir_op_ffma: case nir_op_fmul: return BI_FMA; @@ -519,7 +528,10 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr) /* Construct a writemask */ unsigned bits_per_comp = instr->dest.dest.ssa.bit_size; unsigned comps = instr->dest.dest.ssa.num_components; - assert(comps == 1); + + if (alu.type != BI_COMBINE) + assert(comps == 1); + unsigned bits = bits_per_comp * comps; unsigned bytes = bits / 8; alu.writemask = (1 << bytes) - 1; @@ -909,53 +921,8 @@ bi_optimize_nir(nir_shader *nir) /* Take us out of SSA */ NIR_PASS(progress, nir, nir_lower_locals_to_regs); - NIR_PASS(progress, nir, nir_convert_from_ssa, true); - - /* We're a primary scalar architecture but there's enough vector that - * we use a vector IR so let's not also deal with scalar hacks on top - * of the vector hacks */ - NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest); - NIR_PASS(progress, nir, nir_lower_vec_to_movs); - NIR_PASS(progress, nir, nir_opt_dce); -} - -static void -bi_insert_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp) -{ - bi_instruction move = { - .type = BI_MOV, - .dest = parent->dest, - .dest_type = nir_type_uint32, - .writemask = (0xF << (4 * comp)), - .src = { parent->src[0] }, - .src_types = { nir_type_uint32 }, - .swizzle = { { comp } } - }; - - bi_emit_before(ctx, parent, move); -} - -static void -bi_lower_mov(bi_context *ctx, bi_block *block) -{ - bi_foreach_instr_in_block_safe(block, ins) { - if (ins->type != BI_MOV) continue; - if (util_bitcount(ins->writemask) <= 4) continue; - - for (unsigned i = 0; i < 4; ++i) { - unsigned quad = (ins->writemask >> (4 * i)) & 0xF; - - if (quad == 0) - continue; - else if (quad == 0xF) - bi_insert_mov32(ctx, ins, i); - else - unreachable("TODO: Lowering <32bit moves"); - } - - bi_remove_instruction(ins); - } + NIR_PASS(progress, nir, nir_convert_from_ssa, true); } void @@ -1003,7 +970,7 @@ bifrost_compile_shader_nir(nir_shader *nir, panfrost_program *program, unsigned bi_foreach_block(ctx, _block) { bi_block *block = (bi_block *) _block; - bi_lower_mov(ctx, block); + bi_lower_combine(ctx, block); } bool progress = false; diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index b8eeb64f9ac..3a895c2a6d1 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -53,6 +53,7 @@ enum bi_class { BI_CMP, BI_BLEND, BI_BITWISE, + BI_COMBINE, BI_CONVERT, BI_CSEL, BI_DISCARD, @@ -541,6 +542,7 @@ uint64_t bi_get_immediate(bi_instruction *ins, unsigned index); /* BIR passes */ +void bi_lower_combine(bi_context *ctx, bi_block *block); bool bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block); void bi_schedule(bi_context *ctx); void bi_register_allocate(bi_context *ctx); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 8f14d25005d..90a0d3dcfb1 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -22,6 +22,7 @@ libpanfrost_bifrost_files = files( 'disassemble.c', 'bi_liveness.c', + 'bi_lower_combine.c', 'bi_print.c', 'bi_opt_dce.c', 'bi_pack.c', -- 2.30.2