panfrost: add atomic_cmpxchg opcode
[mesa.git] / src / panfrost / lib / pan_attributes.c
index 1d9e7d3019d39c703b32c75e542e6d21cb209fab..1fe6d8cb47df6c72ff751dfef484cdc9ee4931dd 100644 (file)
@@ -91,7 +91,7 @@ panfrost_padded_vertex_count(unsigned vertex_count)
 /* The much, much more irritating case -- instancing is enabled. See
  * panfrost_job.h for notes on how this works */
 
-static unsigned
+unsigned
 panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
 {
         /* We have a NPOT divisor. Here's the fun one (multipling by
@@ -129,95 +129,62 @@ panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned
         return magic_divisor;
 }
 
-unsigned
-panfrost_vertex_instanced(
-        unsigned padded_count,
-        unsigned instance_shift, unsigned instance_odd,
-        unsigned divisor,
-        union mali_attr *attrs)
-{
-        /* Depending if there is an instance divisor or not, packing varies.
-         * When there is a divisor, the hardware-level divisor is actually the
-         * product of the instance divisor and the padded count */
-
-        unsigned hw_divisor = padded_count * divisor;
-
-        if (divisor == 0) {
-                /* Per-vertex attributes use the MODULO mode. First, compute
-                 * the modulus */
-
-                attrs->elements |= MALI_ATTR_MODULO;
-                attrs->shift = instance_shift;
-                attrs->extra_flags = instance_odd;
-
-                return 1;
-        } else if (util_is_power_of_two_or_zero(hw_divisor)) {
-                /* If there is a divisor but the hardware divisor works out to
-                 * a power of two (not terribly exceptional), we can use an
-                 * easy path (just shifting) */
-
-                attrs->elements |= MALI_ATTR_POT_DIVIDE;
-                attrs->shift = __builtin_ctz(hw_divisor);
-
-                return 1;
-        } else {
-                unsigned shift = 0, extra_flags = 0;
-
-                attrs[1].magic_divisor =
-                        panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
-
-                /* Upload to two different slots */
-
-                attrs[0].elements |= MALI_ATTR_NPOT_DIVIDE;
-                attrs[0].shift = shift;
-                attrs[0].extra_flags = extra_flags;
-
-                attrs[1].unk = 0x20;
-                attrs[1].zero = 0;
-                attrs[1].divisor = divisor;
-
-                return 2;
-        }
-}
-
 /* Records for gl_VertexID and gl_InstanceID use a slightly special encoding,
  * but the idea is the same */
 
 void
 panfrost_vertex_id(
         unsigned padded_count,
-        union mali_attr *attr)
+        struct mali_attribute_buffer_packed *attr,
+        bool instanced)
 {
         /* We factor the padded count as shift/odd and that's it */
-
-        attr->elements = MALI_ATTR_VERTEXID;
-        attr->shift = __builtin_ctz(padded_count);
-        attr->extra_flags = padded_count >> (attr->shift + 1);
-        attr->stride = attr->size = 0;
+        pan_pack(attr, ATTRIBUTE_BUFFER, cfg) {
+                cfg.special = MALI_ATTRIBUTE_SPECIAL_VERTEX_ID;
+                cfg.type = 0;
+
+                if (instanced) {
+                        cfg.divisor_r = __builtin_ctz(padded_count);
+                        cfg.divisor_p = padded_count >> (cfg.divisor_r + 1);
+                } else {
+                        /* Match the blob... */
+                        cfg.divisor_r = 0x1F;
+                        cfg.divisor_p = 0x4;
+                }
+        }
 }
 
 void
 panfrost_instance_id(
         unsigned padded_count,
-        union mali_attr *attr)
+        struct mali_attribute_buffer_packed *attr,
+        bool instanced)
 {
-        attr->elements = MALI_ATTR_INSTANCEID;
-        attr->stride = 0;
-        attr->extra_flags = 0;
-        attr->size = 0;
-        
-        /* POT records have just a shift directly with an off-by-one for
-         * unclear reasons. NPOT records have a magic divisor smushed into the
-         * stride field (which is unused for these special records) */
-
-        if (util_is_power_of_two_or_zero(padded_count)) {
-                attr->shift = __builtin_ctz(padded_count) - 1;
-        } else {
-                unsigned shift = 0, flags = 0;
-
-                attr->stride = panfrost_compute_magic_divisor(padded_count, &shift, &flags);
-                attr->shift = shift;
-                attr->extra_flags = flags;
+        pan_pack(attr, ATTRIBUTE_BUFFER, cfg) {
+                cfg.special = MALI_ATTRIBUTE_SPECIAL_INSTANCE_ID;
+                cfg.type = 0;
+
+                /* POT records have just a shift directly with an off-by-one for
+                 * unclear reasons. NPOT records have a magic divisor smushed into the
+                 * stride field (which is unused for these special records) */
+
+                if (!instanced || padded_count <= 1) {
+                        /* Match the blob... */
+                        cfg.stride = ((1u << 31) - 1);
+                        cfg.divisor_r = 0x1F;
+                        cfg.divisor_e = 0x1;
+                } else if(util_is_power_of_two_or_zero(padded_count)) {
+                        /* By above, padded_count > 1 => padded_count >= 2 so
+                         * since we're a power of two, ctz(padded_count) =
+                         * log2(padded_count) >= log2(2) = 1, so
+                         * ctz(padded_count) - 1 >= 0, so this can't underflow
+                         * */
+
+                        cfg.divisor_r = __builtin_ctz(padded_count) - 1;
+                } else {
+                        cfg.stride = panfrost_compute_magic_divisor(padded_count,
+                                        &cfg.divisor_r, &cfg.divisor_e);
+                }
         }
 }