panfrost/midgard: Merge embedded constants
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 20 Jun 2019 22:11:57 +0000 (15:11 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 26 Jun 2019 17:01:36 +0000 (10:01 -0700)
In Midgard, a bundle consists of a few ALU instructions. Within the
bundle, there is room for an optional 128-bit constant; this constant is
shared across all instructions in the bundle.

Unfortunately, many instructions want a 128-bit constant all to
themselves (how selfish!). If we run out of space for constants in a
bundle, the bundle has to be broken up, incurring a performance and
space penalty.

As an optimization, the scheduler now analyzes the constants coming in
per-instruction and attempts to merge shared components, adjusting the
swizzle accessing the bundle's constants appropriately. Concretely,
given the GLSL:

   (a * vec4(1.5, 0.5, 0.5, 1.0)) + vec4(1.0, 2.3, 2.3, 0.5)

instead of compiling to the naive two bundles:

   vmul.fmul [temp], [a], r26
   fconstants 1.5, 0.5, 0.5, 1.0

   vadd.fadd [out], [temp], r26
   fconstants 1.0, 2.3, 2.3, 0.5

The scheduler can now fuse into a single (pipelined!) bundle:

   vmul.fmul [temp], [a], r26.xyyz
   vadd.fadd [out], [temp], r26.zwwy
   fconstants 1.5, 0.5, 1.0, 2.3

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/gallium/drivers/panfrost/midgard/midgard_schedule.c

index 0bf3502f41c44f190e67454300367a14b26adeee..7059f7bbe2abdfe9787a2f2a4eef3b3fae61d982 100644 (file)
@@ -147,6 +147,8 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
                 instructions_emitted = -1;
                 midgard_instruction *pins = ins;
 
+                unsigned constant_count = 0;
+
                 for (;;) {
                         midgard_instruction *ains = pins;
 
@@ -251,33 +253,78 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
 
                         segment[segment_size++] = ains;
 
-                        /* Only one set of embedded constants per
-                         * bundle possible; if we have more, we must
-                         * break the chain early, unfortunately */
+                        /* We try to reuse constants if possible, by adjusting
+                         * the swizzle */
+
+                        if (ains->has_blend_constant) {
+                                bundle.has_blend_constant = 1;
+                                bundle.has_embedded_constants = 1;
+                        } else if (ains->has_constants) {
+                                /* By definition, blend constants conflict with
+                                 * everything, so if there are already
+                                 * constants we break the bundle *now* */
+
+                                if (bundle.has_blend_constant)
+                                        break;
+
+                                /* For anything but blend constants, we can do
+                                 * proper analysis, however */
+
+                                /* TODO: Mask by which are used */
+                                uint32_t *constants = (uint32_t *) ains->constants;
+                                uint32_t *bundles = (uint32_t *) bundle.constants;
 
-                        if (ains->has_constants) {
-                                if (bundle.has_embedded_constants) {
-                                        /* The blend constant needs to be
-                                         * alone, since it conflicts with
-                                         * everything by definition */
+                                uint32_t indices[4] = { 0 };
+                                bool break_bundle = false;
 
-                                        if (ains->has_blend_constant || bundle.has_blend_constant)
+                                for (unsigned i = 0; i < 4; ++i) {
+                                        uint32_t cons = constants[i];
+                                        bool constant_found = false;
+
+                                        /* Search for the constant */
+                                        for (unsigned j = 0; j < constant_count; ++j) {
+                                                if (bundles[j] != cons)
+                                                        continue;
+
+                                                /* We found it, reuse */
+                                                indices[i] = j;
+                                                constant_found = true;
                                                 break;
+                                        }
+
+                                        if (constant_found)
+                                                continue;
 
-                                        /* ...but if there are already
-                                         * constants but these are the
-                                         * *same* constants, we let it
-                                         * through */
+                                        /* We didn't find it, so allocate it */
+                                        unsigned idx = constant_count++;
 
-                                        if (memcmp(bundle.constants, ains->constants, sizeof(bundle.constants)))
+                                        if (idx >= 4) {
+                                                /* Uh-oh, out of space */
+                                                break_bundle = true;
                                                 break;
-                                } else {
-                                        bundle.has_embedded_constants = true;
-                                        memcpy(bundle.constants, ains->constants, sizeof(bundle.constants));
+                                        }
 
-                                        /* If this is a blend shader special constant, track it for patching */
-                                        bundle.has_blend_constant |= ains->has_blend_constant;
+                                        /* We have space, copy it in! */
+                                        bundles[idx] = cons;
+                                        indices[i] = idx;
                                 }
+
+                                if (break_bundle)
+                                        break;
+
+                                /* Cool, we have it in. So use indices as a
+                                 * swizzle */
+
+                                unsigned swizzle = SWIZZLE_FROM_ARRAY(indices);
+                                unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+
+                                if (ains->ssa_args.src0 == r_constant)
+                                        ains->alu.src1 = vector_alu_apply_swizzle(ains->alu.src1, swizzle);
+
+                                if (ains->ssa_args.src1 == r_constant)
+                                        ains->alu.src2 = vector_alu_apply_swizzle(ains->alu.src2, swizzle);
+
+                                bundle.has_embedded_constants = true;
                         }
 
                         if (ains->unit & UNITS_ANY_VECTOR) {