From 10688257bd7cc1b4241f7b069a4ee9bc3338be47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 20 Jun 2019 15:11:57 -0700 Subject: [PATCH] panfrost/midgard: Merge embedded constants In Midgard, a bundle consists of a few ALU instructions. Within the bundle, there is room for an optional 128-bit constant; this constant is shared across all instructions in the bundle. Unfortunately, many instructions want a 128-bit constant all to themselves (how selfish!). If we run out of space for constants in a bundle, the bundle has to be broken up, incurring a performance and space penalty. As an optimization, the scheduler now analyzes the constants coming in per-instruction and attempts to merge shared components, adjusting the swizzle accessing the bundle's constants appropriately. Concretely, given the GLSL: (a * vec4(1.5, 0.5, 0.5, 1.0)) + vec4(1.0, 2.3, 2.3, 0.5) instead of compiling to the naive two bundles: vmul.fmul [temp], [a], r26 fconstants 1.5, 0.5, 0.5, 1.0 vadd.fadd [out], [temp], r26 fconstants 1.0, 2.3, 2.3, 0.5 The scheduler can now fuse into a single (pipelined!) bundle: vmul.fmul [temp], [a], r26.xyyz vadd.fadd [out], [temp], r26.zwwy fconstants 1.5, 0.5, 1.0, 2.3 Signed-off-by: Alyssa Rosenzweig --- .../panfrost/midgard/midgard_schedule.c | 85 ++++++++++++++----- 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c index 0bf3502f41c..7059f7bbe2a 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c @@ -147,6 +147,8 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction instructions_emitted = -1; midgard_instruction *pins = ins; + unsigned constant_count = 0; + for (;;) { midgard_instruction *ains = pins; @@ -251,33 +253,78 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction segment[segment_size++] = ains; - /* Only one set of embedded constants per - * bundle possible; if we have more, we must - * break the chain early, unfortunately */ + /* We try to reuse constants if possible, by adjusting + * the swizzle */ + + if (ains->has_blend_constant) { + bundle.has_blend_constant = 1; + bundle.has_embedded_constants = 1; + } else if (ains->has_constants) { + /* By definition, blend constants conflict with + * everything, so if there are already + * constants we break the bundle *now* */ + + if (bundle.has_blend_constant) + break; + + /* For anything but blend constants, we can do + * proper analysis, however */ + + /* TODO: Mask by which are used */ + uint32_t *constants = (uint32_t *) ains->constants; + uint32_t *bundles = (uint32_t *) bundle.constants; - if (ains->has_constants) { - if (bundle.has_embedded_constants) { - /* The blend constant needs to be - * alone, since it conflicts with - * everything by definition */ + uint32_t indices[4] = { 0 }; + bool break_bundle = false; - if (ains->has_blend_constant || bundle.has_blend_constant) + for (unsigned i = 0; i < 4; ++i) { + uint32_t cons = constants[i]; + bool constant_found = false; + + /* Search for the constant */ + for (unsigned j = 0; j < constant_count; ++j) { + if (bundles[j] != cons) + continue; + + /* We found it, reuse */ + indices[i] = j; + constant_found = true; break; + } + + if (constant_found) + continue; - /* ...but if there are already - * constants but these are the - * *same* constants, we let it - * through */ + /* We didn't find it, so allocate it */ + unsigned idx = constant_count++; - if (memcmp(bundle.constants, ains->constants, sizeof(bundle.constants))) + if (idx >= 4) { + /* Uh-oh, out of space */ + break_bundle = true; break; - } else { - bundle.has_embedded_constants = true; - memcpy(bundle.constants, ains->constants, sizeof(bundle.constants)); + } - /* If this is a blend shader special constant, track it for patching */ - bundle.has_blend_constant |= ains->has_blend_constant; + /* We have space, copy it in! */ + bundles[idx] = cons; + indices[i] = idx; } + + if (break_bundle) + break; + + /* Cool, we have it in. So use indices as a + * swizzle */ + + unsigned swizzle = SWIZZLE_FROM_ARRAY(indices); + unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT); + + if (ains->ssa_args.src0 == r_constant) + ains->alu.src1 = vector_alu_apply_swizzle(ains->alu.src1, swizzle); + + if (ains->ssa_args.src1 == r_constant) + ains->alu.src2 = vector_alu_apply_swizzle(ains->alu.src2, swizzle); + + bundle.has_embedded_constants = true; } if (ains->unit & UNITS_ANY_VECTOR) { -- 2.30.2