From e9d480ca1bcf9d410535c95cbe2f93c802b82409 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 9 Mar 2020 14:25:00 -0400 Subject: [PATCH] pan/bi: Introduce writemasks I feel so dirty. But this will let the IR be a lot more flexible seeing as we really are vector in a certain sense (I/O, small types) Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_print.c | 24 +++++++++++++++++++++++- src/panfrost/bifrost/bifrost_compile.c | 17 ++++++++++++++--- src/panfrost/bifrost/compiler.h | 9 ++++++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c index 58d0d75ba51..2639a39784c 100644 --- a/src/panfrost/bifrost/bi_print.c +++ b/src/panfrost/bifrost/bi_print.c @@ -125,7 +125,6 @@ bi_class_name(enum bi_class cl) case BI_LOAD_ATTR: return "load_attr"; case BI_LOAD_VAR: return "load_var"; case BI_LOAD_VAR_ADDRESS: return "load_var_address"; - case BI_MAKE_VEC: return "make_vec"; case BI_MINMAX: return "minmax"; case BI_MOV: return "mov"; case BI_SHIFT: return "shift"; @@ -275,6 +274,25 @@ bi_print_branch(struct bi_branch *branch, FILE *fp) fprintf(fp, "%s", bi_cond_name(branch->cond)); } +static void +bi_print_writemask(bi_instruction *ins, FILE *fp) +{ + unsigned bytes_per_comp = nir_alu_type_get_type_size(ins->dest_type) / 8; + unsigned comps = 16 / bytes_per_comp; + unsigned smask = (1 << bytes_per_comp) - 1; + fprintf(fp, "."); + + for (unsigned i = 0; i < comps; ++i) { + unsigned masked = (ins->writemask >> (i * bytes_per_comp)) & smask; + if (!masked) + continue; + + assert(masked == smask); + assert(i < 4); + fputc("xyzw"[i], fp); + } +} + void bi_print_instruction(bi_instruction *ins, FILE *fp) { @@ -311,6 +329,10 @@ bi_print_instruction(bi_instruction *ins, FILE *fp) fprintf(fp, " "); bi_print_index(fp, ins, ins->dest); + + if (ins->dest) + bi_print_writemask(ins, fp); + fprintf(fp, ", "); bi_foreach_src(ins, s) { diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 79164ac79a7..3515c94c336 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -60,6 +60,13 @@ emit_jump(bi_context *ctx, nir_jump_instr *instr) bi_block_add_successor(ctx->current_block, branch->branch.target); } +/* Gets a bytemask for a complete vecN write */ +static unsigned +bi_mask_for_channels_32(unsigned i) +{ + return (1 << (4 * i)) - 1; +} + static void bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr) { @@ -76,6 +83,7 @@ bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr) }, .dest = bir_dest_index(&instr->dest), .dest_type = nir_type_float | nir_dest_bit_size(instr->dest), + .writemask = bi_mask_for_channels_32(instr->num_components) }; nir_src *offset = nir_get_io_offset_src(instr); @@ -137,7 +145,8 @@ bi_emit_ld_attr(bi_context *ctx, nir_intrinsic_instr *instr) .type = BI_LOAD_ATTR, .load = bi_direct_load_for_instr(instr), .dest = bir_dest_index(&instr->dest), - .dest_type = nir_intrinsic_type(instr) + .dest_type = nir_intrinsic_type(instr), + .writemask = bi_mask_for_channels_32(instr->num_components) }; bi_emit(ctx, load); @@ -153,7 +162,8 @@ bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr) .type = BI_LOAD_VAR_ADDRESS, .load = bi_direct_load_for_instr(instr), .dest_type = nir_intrinsic_type(instr), - .dest = bi_make_temp(ctx) + .dest = bi_make_temp(ctx), + .writemask = bi_mask_for_channels_32(instr->num_components) }; bi_instruction st = { @@ -181,6 +191,7 @@ bi_emit_ld_uniform(bi_context *ctx, nir_intrinsic_instr *instr) .load = bi_direct_load_for_instr(instr), .dest = bir_dest_index(&instr->dest), .dest_type = nir_intrinsic_type(instr), + .writemask = bi_mask_for_channels_32(instr->num_components), .src = { BIR_INDEX_ZERO /* TODO: UBOs */ } @@ -237,6 +248,7 @@ emit_load_const(bi_context *ctx, nir_load_const_instr *instr) .type = BI_MOV, .dest = bir_ssa_index(&instr->def), .dest_type = instr->def.bit_size | nir_type_uint, + .writemask = (1 << (instr->def.bit_size / 8)) - 1, .src = { BIR_INDEX_CONSTANT }, @@ -614,7 +626,6 @@ bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned p bi_print_shader(ctx, stdout); bi_schedule(ctx); - bi_print_shader(ctx, stdout); ralloc_free(ctx); } diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index d0ca32a7a79..c99eea0545c 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -63,7 +63,6 @@ enum bi_class { BI_LOAD_ATTR, BI_LOAD_VAR, BI_LOAD_VAR_ADDRESS, - BI_MAKE_VEC, BI_MINMAX, BI_MOV, BI_SHIFT, @@ -207,6 +206,14 @@ typedef struct { /* Round mode (requires BI_ROUNDMODE) */ enum bifrost_roundmode roundmode; + /* Writemask (bit for each affected byte). This is quite restricted -- + * ALU ops can only write to a single channel (exception: <32 in which + * you can write to 32/N contiguous aligned channels). Load/store can + * only write to all channels at once, in a sense. But it's still + * better to use this generic form than have synthetic ops flying + * about, since we're not essentially vector for RA purposes. */ + uint16_t writemask; + /* Destination type. Usually the type of the instruction * itself, but if sources and destination have different * types, the type of the destination wins (so f2i would be -- 2.30.2