From 5e06d90c4510eb3a8c42b0e0d1a3ebfd19830069 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 30 Aug 2019 11:06:33 -0700 Subject: [PATCH] pan/midgard: Handle fragment writeout in RA Rather than using a pile of hacks and awkward constructs in MIR to ensure the writeout parameter gets written into r0, let's add a dedicated shadow register class for writeout (interfering with work register r0) so we can express the writeout condition succintly and directly. Signed-off-by: Alyssa Rosenzweig --- src/panfrost/midgard/compiler.h | 3 +- src/panfrost/midgard/midgard_compile.c | 12 ++----- src/panfrost/midgard/midgard_compile.h | 4 +-- src/panfrost/midgard/midgard_ra.c | 41 +++++++++++++++++----- src/panfrost/midgard/midgard_ra_pipeline.c | 7 ++++ src/panfrost/midgard/midgard_schedule.c | 6 ++-- 6 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 00c6b52413a..82ccde658e3 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -587,13 +587,14 @@ struct ra_graph; /* Broad types of register classes so we can handle special * registers */ -#define NR_REG_CLASSES 5 +#define NR_REG_CLASSES 6 #define REG_CLASS_WORK 0 #define REG_CLASS_LDST 1 #define REG_CLASS_LDST27 2 #define REG_CLASS_TEXR 3 #define REG_CLASS_TEXW 4 +#define REG_CLASS_FRAGC 5 void mir_lower_special_reads(compiler_context *ctx); struct ra_graph* allocate_registers(compiler_context *ctx, bool *spilled); diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 158f89a23c3..eec3e8d56ed 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1379,16 +1379,7 @@ compute_builtin_arg(nir_op op) static void emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt) { - /* First, move in whatever we're outputting */ - midgard_instruction move = v_mov(src, blank_alu_src, SSA_FIXED_REGISTER(0)); - if (rt != 0) { - /* Force a tight schedule. TODO: Make the scheduler MRT aware */ - move.unit = UNIT_VMUL; - move.precede_break = true; - move.dont_eliminate = true; - } - - emit_mir_instruction(ctx, move); + emit_explicit_constant(ctx, src, src); /* If we're doing MRT, we need to specify the render target */ @@ -1974,6 +1965,7 @@ inline_alu_constants(compiler_context *ctx, midgard_block *block) mir_foreach_instr_in_block(block, alu) { /* Other instructions cannot inline constants */ if (alu->type != TAG_ALU_4) continue; + if (alu->compact_branch) continue; /* If there is already a constant here, we can do nothing */ if (alu->has_constants) continue; diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index b5fbb3de6ed..bf512a0ca59 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -42,10 +42,10 @@ struct midgard_screen { struct ra_regs *regs[9]; /* Work register classes corresponds to the above register sets. 20 per - * set for 4 classes per work/ldst/ldst27/texr/texw. TODO: Unify with + * set for 5 classes per work/ldst/ldst27/texr/texw/fragc. TODO: Unify with * compiler.h */ - unsigned reg_classes[9][4 * 5]; + unsigned reg_classes[9][5 * 5]; }; /* Define the general compiler entry point */ diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index c19c6674f57..11bef79a42c 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -49,6 +49,7 @@ /* We have overlapping register classes for special registers, handled via * shadows */ +#define SHADOW_R0 17 #define SHADOW_R28 18 #define SHADOW_R29 19 @@ -159,6 +160,8 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg) if (phys >= SHADOW_R28 && phys <= SHADOW_R29) phys += 28 - SHADOW_R28; + else if (phys == SHADOW_R0) + phys = 0; struct phys_reg r = { .reg = phys, @@ -180,12 +183,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg) * special register allocation */ static void -add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow) +add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow, unsigned shadow_count) { for (unsigned a = 0; a < WORK_STRIDE; ++a) { unsigned reg_a = (WORK_STRIDE * base) + a; - for (unsigned b = 0; b < WORK_STRIDE; ++b) { + for (unsigned b = 0; b < shadow_count; ++b) { unsigned reg_b = (WORK_STRIDE * shadow) + b; ra_add_reg_conflict(regs, reg_a, reg_b); @@ -202,7 +205,7 @@ create_register_set(unsigned work_count, unsigned *classes) /* First, initialize the RA */ struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true); - for (unsigned c = 0; c < NR_REG_CLASSES; ++c) { + for (unsigned c = 0; c < (NR_REG_CLASSES - 1); ++c) { int work_vec4 = ra_alloc_reg_class(regs); int work_vec3 = ra_alloc_reg_class(regs); int work_vec2 = ra_alloc_reg_class(regs); @@ -253,10 +256,18 @@ create_register_set(unsigned work_count, unsigned *classes) } } + int fragc = ra_alloc_reg_class(regs); + + classes[4*REG_CLASS_FRAGC + 0] = fragc; + classes[4*REG_CLASS_FRAGC + 1] = fragc; + classes[4*REG_CLASS_FRAGC + 2] = fragc; + classes[4*REG_CLASS_FRAGC + 3] = fragc; + ra_class_add_reg(regs, fragc, WORK_STRIDE * SHADOW_R0); /* We have duplicate classes */ - add_shadow_conflicts(regs, 28, SHADOW_R28); - add_shadow_conflicts(regs, 29, SHADOW_R29); + add_shadow_conflicts(regs, 0, SHADOW_R0, 1); + add_shadow_conflicts(regs, 28, SHADOW_R28, WORK_STRIDE); + add_shadow_conflicts(regs, 29, SHADOW_R29, WORK_STRIDE); /* We're done setting up */ ra_set_finalize(regs, NULL); @@ -399,6 +410,7 @@ mir_lower_special_reads(compiler_context *ctx) unsigned *alur = calloc(sz, 1); unsigned *aluw = calloc(sz, 1); + unsigned *brar = calloc(sz, 1); unsigned *ldst = calloc(sz, 1); unsigned *texr = calloc(sz, 1); unsigned *texw = calloc(sz, 1); @@ -412,6 +424,10 @@ mir_lower_special_reads(compiler_context *ctx) mark_node_class(alur, ins->src[0]); mark_node_class(alur, ins->src[1]); mark_node_class(alur, ins->src[2]); + + if (ins->compact_branch && ins->writeout) + mark_node_class(brar, ins->src[0]); + break; case TAG_LOAD_STORE_4: @@ -443,6 +459,7 @@ mir_lower_special_reads(compiler_context *ctx) for (unsigned i = 0; i < ctx->temp_count; ++i) { bool is_alur = BITSET_TEST(alur, i); bool is_aluw = BITSET_TEST(aluw, i); + bool is_brar = BITSET_TEST(brar, i); bool is_ldst = BITSET_TEST(ldst, i); bool is_texr = BITSET_TEST(texr, i); bool is_texw = BITSET_TEST(texw, i); @@ -457,7 +474,8 @@ mir_lower_special_reads(compiler_context *ctx) (is_alur && (is_ldst || is_texr)) || (is_ldst && (is_alur || is_texr || is_texw)) || (is_texr && (is_alur || is_ldst || is_texw)) || - (is_texw && (is_aluw || is_ldst || is_texr)); + (is_texw && (is_aluw || is_ldst || is_texr)) || + (is_brar && is_texw); if (!collision) continue; @@ -465,8 +483,8 @@ mir_lower_special_reads(compiler_context *ctx) /* Use the index as-is as the work copy. Emit copies for * special uses */ - unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4 }; - bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw }; + unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4, TAG_ALU_4}; + bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw, is_brar }; for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) { if (!collisions[j]) continue; @@ -517,6 +535,7 @@ mir_lower_special_reads(compiler_context *ctx) free(alur); free(aluw); + free(brar); free(ldst); free(texr); free(texw); @@ -766,6 +785,12 @@ allocate_registers(compiler_context *ctx, bool *spilled) assert(check_read_class(found_class, ins->type, ins->src[2])); } + /* Mark writeout to r0 */ + mir_foreach_instr_global(ctx, ins) { + if (ins->compact_branch && ins->writeout) + set_class(found_class, ins->src[0], REG_CLASS_FRAGC); + } + for (unsigned i = 0; i < ctx->temp_count; ++i) { unsigned class = found_class[i]; ra_set_node_class(g, i, classes[class]); diff --git a/src/panfrost/midgard/midgard_ra_pipeline.c b/src/panfrost/midgard/midgard_ra_pipeline.c index feb457de0f9..afbcf5b64a0 100644 --- a/src/panfrost/midgard/midgard_ra_pipeline.c +++ b/src/panfrost/midgard/midgard_ra_pipeline.c @@ -60,6 +60,13 @@ mir_pipeline_ins( for (unsigned i = 0; i < bundle->instruction_count; ++i) { midgard_instruction *q = bundle->instructions[i]; read_mask |= mir_mask_of_read_components(q, node); + + /* The fragment colour can't be pipelined (well, it is + * pipelined in r0, but this is a delicate dance with + * scheduling and RA, not for us to worry about) */ + + if (q->compact_branch && q->writeout && mir_has_arg(q, node)) + return false; } /* Now analyze for a write mask */ diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 7b687766491..6693a1b725b 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -160,7 +160,7 @@ midgard_has_hazard( */ static bool -can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count) +can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count, unsigned r0) { /* First scan for which components of r0 are written out. Initially * none are written */ @@ -176,7 +176,7 @@ can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsig for (unsigned i = 0; i < count; ++i) { midgard_instruction *ins = bundle[i]; - if (ins->dest != SSA_FIXED_REGISTER(0)) + if (ins->dest != r0) continue; /* Record written out mask */ @@ -516,7 +516,7 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction /* All of r0 has to be written out along with * the branch writeout */ - if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count)) { + if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count, ains->src[0])) { /* We only work on full moves * at the beginning. We could * probably do better */ -- 2.30.2