panfrost/midgard: Rework mir_adjust_constants() to make it type/size agnostic
authorBoris Brezillon <boris.brezillon@collabora.com>
Mon, 20 Jan 2020 14:44:48 +0000 (15:44 +0100)
committerMarge Bot <eric+marge@anholt.net>
Wed, 22 Jan 2020 15:31:28 +0000 (15:31 +0000)
Right now, constant combining is not supported in 16 bit mode, and 64
bit mode is simply ignored. Let's rework the function to make it
type/bit-size agnostic.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3478>

src/panfrost/midgard/midgard_schedule.c

index 892f40d1aeb7072530974fc8d6bce6d95990a6b0..b88fe0e03c74f01819d3618059051ce517823978 100644 (file)
@@ -333,7 +333,7 @@ struct midgard_predicate {
          * will be adjusted to index into the constants array */
 
         midgard_constants *constants;
-        unsigned constant_count;
+        unsigned constant_mask;
         bool blend_constant;
 
         /* Exclude this destination (if not ~0) */
@@ -360,11 +360,11 @@ mir_adjust_constants(midgard_instruction *ins,
 {
         /* Blend constants dominate */
         if (ins->has_blend_constant) {
-                if (pred->constant_count)
+                if (pred->constant_mask)
                         return false;
                 else if (destructive) {
                         pred->blend_constant = true;
-                        pred->constant_count = 16;
+                        pred->constant_mask = 0xffff;
                         return true;
                 }
         }
@@ -373,115 +373,90 @@ mir_adjust_constants(midgard_instruction *ins,
         if (!ins->has_constants)
                 return true;
 
-        if (ins->alu.reg_mode != midgard_reg_mode_32) {
-                /* TODO: 16-bit constant combining */
-                if (pred->constant_count)
-                        return false;
-
-                uint16_t *bundles = pred->constants->u16;
-                const uint16_t *constants = ins->constants.u16;
+        unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+        midgard_reg_mode reg_mode = ins->alu.reg_mode;
 
-                /* Copy them wholesale */
-                for (unsigned i = 0; i < 4; ++i)
-                        bundles[i] = constants[i];
+        midgard_vector_alu_src const_src = { };
 
-                pred->constant_count = 16;
-        } else {
-                /* Pack 32-bit constants */
-                uint32_t *bundles = pred->constants->u32;
-                const uint32_t *constants = ins->constants.u32;
-                unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                unsigned mask = mir_from_bytemask(mir_bytemask_of_read_components(ins, r_constant), midgard_reg_mode_32);
+        if (ins->src[0] == r_constant)
+                const_src = vector_alu_from_unsigned(ins->alu.src1);
+        else if (ins->src[1] == r_constant)
+                const_src = vector_alu_from_unsigned(ins->alu.src2);
 
-                /* First, check if it fits */
-                unsigned count = DIV_ROUND_UP(pred->constant_count, sizeof(uint32_t));
-                unsigned existing_count = count;
+        unsigned type_size = mir_bytes_for_mode(reg_mode);
 
-                for (unsigned i = 0; i < 4; ++i) {
-                        if (!(mask & (1 << i)))
-                                continue;
+        /* If the ALU is converting up we need to divide type_size by 2 */
+        if (const_src.half)
+                type_size /= 2;
 
-                        bool ok = false;
+        unsigned max_comp = 16 / type_size;
+        unsigned comp_mask = mir_from_bytemask(mir_bytemask_of_read_components(ins, r_constant),
+                                               reg_mode);
+        unsigned type_mask = (1 << type_size) - 1;
+        unsigned bundle_constant_mask = pred->constant_mask;
+        unsigned comp_mapping[16] = { };
+        uint8_t bundle_constants[16];
 
-                        /* Look for existing constant */
-                        for (unsigned j = 0; j < existing_count; ++j) {
-                                if (bundles[j] == constants[i]) {
-                                        ok = true;
-                                        break;
-                                }
-                        }
+        memcpy(bundle_constants, pred->constants, 16);
 
-                        if (ok)
-                                continue;
-
-                        /* If the constant is new, check ourselves */
-                        for (unsigned j = 0; j < i; ++j) {
-                                if (constants[j] == constants[i] && (mask & (1 << j))) {
-                                        ok = true;
-                                        break;
-                                }
-                        }
-
-                        if (ok)
-                                continue;
-
-                        /* Otherwise, this is a new constant */
-                        count++;
-                }
-
-                /* Check if we have space */
-                if (count > 4)
-                        return false;
-
-                /* If non-destructive, we're done */
-                if (!destructive)
-                        return true;
-
-                /* If destructive, let's copy in the new constants and adjust
-                 * swizzles to pack it in. */
-
-                unsigned indices[16] = { 0 };
-
-                /* Reset count */
-                count = existing_count;
+        /* Let's try to find a place for each active component of the constant
+         * register.
+         */
+        for (unsigned comp = 0; comp < max_comp; comp++) {
+                if (!(comp_mask & (1 << comp)))
+                        continue;
 
-                for (unsigned i = 0; i < 4; ++i) {
-                        if (!(mask & (1 << i)))
-                                continue;
+                uint8_t *constantp = ins->constants.u8 + (type_size * comp);
+                unsigned best_reuse_bytes = 0;
+                signed best_place = -1;
+                unsigned i, j;
 
-                        uint32_t cons = constants[i];
-                        bool constant_found = false;
+                for (i = 0; i < 16; i += type_size) {
+                        unsigned reuse_bytes = 0;
 
-                        /* Search for the constant */
-                        for (unsigned j = 0; j < count; ++j) {
-                                if (bundles[j] != cons)
+                        for (j = 0; j < type_size; j++) {
+                                if (!(bundle_constant_mask & (1 << (i + j))))
                                         continue;
+                                if (constantp[j] != bundle_constants[i + j])
+                                        break;
 
-                                /* We found it, reuse */
-                                indices[i] = j;
-                                constant_found = true;
-                                break;
+                                reuse_bytes++;
                         }
 
-                        if (constant_found)
-                                continue;
+                        /* Select the place where existing bytes can be
+                         * reused so we leave empty slots to others
+                         */
+                        if (j == type_size &&
+                            (reuse_bytes > best_reuse_bytes || best_place < 0)) {
+                                best_reuse_bytes = reuse_bytes;
+                                best_place = i;
+                                break;
+                        }
+                }
 
-                        /* We didn't find it, so allocate it */
-                        unsigned idx = count++;
+                /* This component couldn't fit in the remaining constant slot,
+                 * no need check the remaining components, bail out now
+                 */
+                if (best_place < 0)
+                        return false;
 
-                        /* We have space, copy it in! */
-                        bundles[idx] = cons;
-                        indices[i] = idx;
-                }
+                memcpy(&bundle_constants[i], constantp, type_size);
+                bundle_constant_mask |= type_mask << best_place;
+                comp_mapping[comp] = best_place / type_size;
+        }
 
-                pred->constant_count = count * sizeof(uint32_t);
+        /* If non-destructive, we're done */
+        if (!destructive)
+                return true;
 
-                /* Use indices as a swizzle */
+       /* Otherwise update the constant_mask and constant values */
+        pred->constant_mask = bundle_constant_mask;
+        memcpy(pred->constants, bundle_constants, 16);
 
-                mir_foreach_src(ins, s) {
-                        if (ins->src[s] == r_constant)
-                                mir_compose_swizzle(ins->swizzle[s], indices, ins->swizzle[s]);
-                }
+        /* Use comp_mapping as a swizzle */
+        mir_foreach_src(ins, s) {
+                if (ins->src[s] == r_constant)
+                        mir_compose_swizzle(ins->swizzle[s], comp_mapping, ins->swizzle[s]);
         }
 
         return true;
@@ -1028,7 +1003,7 @@ mir_schedule_alu(
         mir_update_worklist(worklist, len, instructions, sadd);
 
         bundle.has_blend_constant = predicate.blend_constant;
-        bundle.has_embedded_constants = predicate.constant_count > 0;
+        bundle.has_embedded_constants = predicate.constant_mask != 0;
 
         unsigned padding = 0;