From 9458b017a946778ef5d065bfd61c47dafdfe3e94 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 20 Mar 2020 12:25:08 -0400 Subject: [PATCH] pan/bi: Flesh out st_vary IR We need to make the semantics of BI_VECTOR a bit more precise - vectorize only the first argument, not all of them. This is enough for current and future users, as far as I know. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_ra.c | 4 ++-- src/panfrost/bifrost/bifrost_compile.c | 16 ++++++++-------- src/panfrost/bifrost/bir.c | 6 +++--- src/panfrost/bifrost/compiler.h | 5 +++-- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index 8c0fa92a12e..a047932ae71 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -114,7 +114,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src) if (ins->src[src] >= l->node_count) return; - bool vector = (bi_class_props[ins->type] & BI_VECTOR); + bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0; unsigned offset = 0; if (vector) { @@ -125,7 +125,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src) unsigned size = nir_alu_type_get_type_size(T); unsigned bytes = (MAX2(size, 8) / 8); unsigned comps_per_reg = 4 / bytes; - unsigned components = bi_get_component_count(ins); + unsigned components = bi_get_component_count(ins, src); for (unsigned i = 0; i < components; ++i) { unsigned off = ins->swizzle[src][i] / comps_per_reg; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index ff1d354b49f..25bb4ab9c58 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -178,22 +178,22 @@ bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr) address.src_types[2] = nir_type_uint32; address.src_types[3] = nir_intrinsic_type(instr); address.dest = bi_make_temp(ctx); - address.dest_type = nir_type_uint64; - address.writemask = (1 << 8) - 1; + address.dest_type = nir_type_uint32; + address.writemask = (1 << 12) - 1; bi_instruction st = { .type = BI_STORE_VAR, .src = { - address.dest, - bir_src_index(&instr->src[0]) + bir_src_index(&instr->src[0]), + address.dest, address.dest, address.dest, }, .src_types = { - nir_type_uint64, - nir_type_uint32 + nir_type_uint32, + nir_type_uint32, nir_type_uint32, nir_type_uint32, }, .swizzle = { - { 0 }, - { 0, 1, 2, 3 } + { 0, 1, 2, 3 }, + { 0 }, { 1 }, { 2} } }; diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index 5de9d30fe30..c385cfe7587 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -93,10 +93,10 @@ bi_from_bytemask(uint16_t bytemask, unsigned bytes) } unsigned -bi_get_component_count(bi_instruction *ins) +bi_get_component_count(bi_instruction *ins, unsigned src) { if (bi_class_props[ins->type] & BI_VECTOR) { - return 4; + return (src == 0) ? 4 : 1; } else { /* Stores imply VECTOR */ assert(ins->dest_type); @@ -119,10 +119,10 @@ uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node) { uint16_t mask = 0x0; - unsigned component_count = bi_get_component_count(ins); bi_foreach_src(ins, s) { if (ins->src[s] != node) continue; + unsigned component_count = bi_get_component_count(ins, s); nir_alu_type T = ins->src_types[s]; unsigned size = nir_alu_type_get_type_size(T); unsigned bytes = (MAX2(size, 8) / 8); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 1a777feaaac..ee868f75d92 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -111,7 +111,8 @@ extern unsigned bi_class_props[BI_NUM_CLASSES]; * the end of a clause. Implies ADD */ #define BI_SCHED_HI_LATENCY (1 << 7) -/* Intrinsic is vectorized and should read 4 components regardless of writemask */ +/* Intrinsic is vectorized and should read 4 components in the first source + * regardless of writemask */ #define BI_VECTOR (1 << 8) /* Use a data register for src0/dest respectively, bypassing the usual @@ -530,7 +531,7 @@ bool bi_has_source_mods(bi_instruction *ins); bool bi_is_src_swizzled(bi_instruction *ins, unsigned s); bool bi_has_arg(bi_instruction *ins, unsigned arg); uint16_t bi_from_bytemask(uint16_t bytemask, unsigned bytes); -unsigned bi_get_component_count(bi_instruction *ins); +unsigned bi_get_component_count(bi_instruction *ins, unsigned s); unsigned bi_load32_components(bi_instruction *ins); uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node); -- 2.30.2