panfrost: Move instancing routines to encoder/

[mesa.git] / src / gallium / drivers / panfrost / pan_attributes.c
diff --git a/src/gallium/drivers/panfrost/pan_attributes.c b/src/gallium/drivers/panfrost/pan_attributes.c

index 4fa3546f3a38ae1f745f5bcb0524827be1816652..c1b58b366100af2a2a9bd046c5acdc34f83c1a62 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_attributes.c
+++ b/src/gallium/drivers/panfrost/pan_attributes.c
@@ -26,221 +26,6 @@
  #include "pan_bo.h"
  #include "pan_context.h"
  
-/* See mali_job for notes on how this works. But basically, for small vertex
- * counts, we have a lookup table, and for large vertex counts, we look at the
- * high bits as a heuristic. This has to match exactly how the hardware
- * calculates this (which is why the algorithm is so weird) or else instancing
- * will break. */
-
-/* Given an odd number (of the form 2k + 1), compute k */
-#define ODD(odd) ((odd - 1) >> 1)
-
-/* Given the shift/odd pair, recover the original padded integer */
-
-unsigned
-pan_expand_shift_odd(struct pan_shift_odd o)
-{
-        unsigned odd = 2*o.odd + 1;
-        unsigned shift = 1 << o.shift;
-        return odd * shift;
-}
-
-static inline struct pan_shift_odd
-pan_factored(unsigned pot, unsigned odd)
-{
-        struct pan_shift_odd out;
-
-        assert(util_is_power_of_two_or_zero(pot));
-        assert(odd & 1);
-
-        /* Odd is of the form (2k + 1) = (k << 1) + 1 = (k << 1) | 1.
-         *
-         * So (odd >> 1) = ((k << 1) | 1) >> 1 = ((k << 1) >> 1) | (1 >> 1)
-         *  = k | 0 = k */
-
-        out.odd = (odd >> 1);
-
-        /* POT is the form (1 << shift) */
-        out.shift = __builtin_ctz(pot);
-
-        return out;
-}
-
-
-/* For small vertices. Second argument is whether the primitive takes a
- * power-of-two argument, which determines how rounding works. True for POINTS
- * and LINES, false for TRIANGLES. Presumably true for QUADS but you'd be crazy
- * to try instanced quads on ES class hardware <3 */
-
-static struct {
-        unsigned pot;
-        unsigned odd;
-} small_lut[] = {
-        {  0, 1 },
-        {  1, 1 },
-        {  2, 1 },
-        {  1, 3 },
-        {  4, 1 },
-        {  1, 5 },
-        {  2, 3 },
-        {  1, 7 },
-        {  8, 1 },
-        {  1, 9 },
-        {  2, 5 },
-        {  4, 3 }, /* 11 */
-        {  4, 3 },
-        {  2, 7 }, /* 13 */
-        {  2, 7 },
-        { 16, 1 }, /* 15 */
-        { 16, 1 },
-        {  2, 9 },
-        {  4, 5 }, /* 20 */
-        {  4, 5 }
-};
-
-static struct pan_shift_odd
-panfrost_small_padded_vertex_count(unsigned idx)
-{
-        return pan_factored(
-                       small_lut[idx].pot,
-                       small_lut[idx].odd);
-}
-
-static struct pan_shift_odd
-panfrost_large_padded_vertex_count(uint32_t vertex_count)
-{
-        struct pan_shift_odd out = { 0 };
-
-        /* First, we have to find the highest set one */
-        unsigned highest = 32 - __builtin_clz(vertex_count);
-
-        /* Using that, we mask out the highest 4-bits */
-        unsigned n = highest - 4;
-        unsigned nibble = (vertex_count >> n) & 0xF;
-
-        /* Great, we have the nibble. Now we can just try possibilities. Note
-         * that we don't care about the bottom most bit in most cases, and we
-         * know the top bit must be 1 */
-
-        unsigned middle_two = (nibble >> 1) & 0x3;
-
-        switch (middle_two) {
-        case 0b00:
-                if (nibble & 1)
-                        return pan_factored(1 << n, 9);
-                else
-                        return pan_factored(1 << (n + 1), 5);
-        case 0b01:
-                return pan_factored(1 << (n + 2), 3);
-        case 0b10:
-                return pan_factored(1 << (n + 1), 7);
-        case 0b11:
-                return pan_factored(1 << (n + 4), 1);
-        default:
-                unreachable("Invalid two bits");
-        }
-
-        return out;
-}
-
-struct pan_shift_odd
-panfrost_padded_vertex_count(
-        unsigned vertex_count,
-        bool pot)
-{
-        assert(vertex_count > 0);
-
-        if (vertex_count < 20) {
-                /* Add an off-by-one if it won't align naturally (quirk of the hardware) */
-                //if (!pot)
-                //      vertex_count++;
-
-                return panfrost_small_padded_vertex_count(vertex_count);
-        } else
-                return panfrost_large_padded_vertex_count(vertex_count);
-}
-
-/* The much, much more irritating case -- instancing is enabled. See
- * panfrost_job.h for notes on how this works */
-
-static unsigned
-panfrost_vertex_instanced(
-        unsigned padded_count,
-        unsigned instance_shift, unsigned instance_odd,
-        unsigned divisor,
-        union mali_attr *attrs)
-{
-        /* Depending if there is an instance divisor or not, packing varies.
-         * When there is a divisor, the hardware-level divisor is actually the
-         * product of the instance divisor and the padded count */
-
-        unsigned hw_divisor = padded_count * divisor;
-
-        if (divisor == 0) {
-                /* Per-vertex attributes use the MODULO mode. First, compute
-                 * the modulus */
-
-                attrs->elements |= MALI_ATTR_MODULO;
-                attrs->shift = instance_shift;
-                attrs->extra_flags = instance_odd;
-
-                return 1;
-        } else if (util_is_power_of_two_or_zero(hw_divisor)) {
-                /* If there is a divisor but the hardware divisor works out to
-                 * a power of two (not terribly exceptional), we can use an
-                 * easy path (just shifting) */
-
-                attrs->elements |= MALI_ATTR_POT_DIVIDE;
-                attrs->shift = __builtin_ctz(hw_divisor);
-
-                return 1;
-        } else {
-                /* We have a NPOT divisor. Here's the fun one (multipling by
-                 * the inverse and shifting) */
-
-                /* floor(log2(d)) */
-                unsigned shift = util_logbase2(hw_divisor);
-
-                /* m = ceil(2^(32 + shift) / d) */
-                uint64_t shift_hi = 32 + shift;
-                uint64_t t = 1ll << shift_hi;
-                double t_f = t;
-                double hw_divisor_d = hw_divisor;
-                double m_f = ceil(t_f / hw_divisor_d);
-                unsigned m = m_f;
-
-                /* Default case */
-                uint32_t magic_divisor = m, extra_flags = 0;
-
-                /* e = 2^(shift + 32) % d */
-                uint64_t e = t % hw_divisor;
-
-                /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
-                 * seems to use a different condition */
-                if (e <= (1ll << shift)) {
-                        magic_divisor = m - 1;
-                        extra_flags = 1;
-                }
-
-                /* Top flag implicitly set */
-                assert(magic_divisor & (1u << 31));
-                magic_divisor &= ~(1u << 31);
-
-                /* Upload to two different slots */
-
-                attrs[0].elements |= MALI_ATTR_NPOT_DIVIDE;
-                attrs[0].shift = shift;
-                attrs[0].extra_flags = extra_flags;
-
-                attrs[1].unk = 0x20;
-                attrs[1].magic_divisor = magic_divisor;
-                attrs[1].zero = 0;
-                attrs[1].divisor = divisor;
-
-                return 2;
-        }
-}
-
  void
  panfrost_emit_vertex_data(struct panfrost_batch *batch)
  {