#define MALI_ALPHA_TO_COVERAGE (1 << 1)
+#define MALI_SFBD_ENABLE (1 << 4)
+#define MALI_SFBD_SRGB (1 << 8)
#define MALI_NO_DITHER (1 << 9)
#define MALI_DEPTH_RANGE_A (1 << 12)
#define MALI_DEPTH_RANGE_B (1 << 13)
#define MALI_CHANNEL_FLOAT 7
#define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
-/* Applies to midgard1.flags_lo */
-
-/* Should be set when the fragment shader updates the depth value. */
-#define MALI_WRITES_Z (1 << 4)
-
-/* Should the hardware perform early-Z testing? Set if the shader does not use
- * discard, alpha-to-coverage, shader depth writes, and if the shader has no
- * side effects (writes to global memory or images) unless early-z testing is
- * forced in the shader.
- */
-
-#define MALI_EARLY_Z (1 << 6)
-
-/* Should the hardware calculate derivatives (via helper invocations)? Set in a
- * fragment shader that uses texturing or derivative functions */
-
-#define MALI_HELPER_INVOCATIONS (1 << 7)
-
-/* Flags denoting the fragment shader's use of tilebuffer readback. If the
- * shader might read any part of the tilebuffer, set MALI_READS_TILEBUFFER. If
- * it might read depth/stencil in particular, also set MALI_READS_ZS */
-
-#define MALI_READS_ZS (1 << 8)
-
-/* The shader might write to global memory (via OpenCL, SSBOs, or images).
- * Reading is okay, as are ordinary writes to the tilebuffer/varyings. Setting
- * incurs a performance penalty. On a fragment shader, this bit implies there
- * are side effects, hence it interacts with early-z. */
-#define MALI_WRITES_GLOBAL (1 << 9)
-
-#define MALI_READS_TILEBUFFER (1 << 10)
-
-/* Applies to midgard1.flags_hi */
-
-/* Should be set when the fragment shader updates the stencil value. */
-#define MALI_WRITES_S (1 << 2)
-
-/* Mode to suppress generation of Infinity and NaN values by clamping inf
- * (-inf) to MAX_FLOAT (-MIN_FLOAT) and flushing NaN to 0.0
- *
- * Compare suppress_inf/suppress_nan flags on the Bifrost clause header for the
- * same functionality.
- *
- * This is not conformant on GLES3 or OpenCL, but is optional on GLES2, where
- * it works around app bugs (e.g. in glmark2-es2 -bterrain with FP16).
- */
-#define MALI_SUPPRESS_INF_NAN (1 << 3)
-
-/* Flags for bifrost1.unk1 */
-
-/* Shader uses less than 32 registers, partitioned as [R0, R15] U [R48, R63],
- * allowing for full thread count. If clear, the full [R0, R63] register set is
- * available at half thread count */
-#define MALI_BIFROST_FULL_THREAD (1 << 9)
-
-/* Enable early-z testing (presumably). This flag may not be set if the shader:
- *
- * - Uses blending
- * - Uses discard
- * - Writes gl_FragDepth
- *
- * This differs from Midgard which sets the MALI_EARLY_Z flag even with
- * blending, although I've begun to suspect that flag does not in fact enable
- * EARLY_Z alone. */
-#define MALI_BIFROST_EARLY_Z (1 << 15)
-
-/* First clause type is ATEST */
-#define MALI_BIFROST_FIRST_ATEST (1 << 26)
-
/* The raw Midgard blend payload can either be an equation or a shader
* address, depending on the context */
u16 varying_count;
union {
- struct {
- u32 uniform_buffer_count : 4;
- u32 unk1 : 28; // = 0x800000 for vertex, 0x958020 for tiler
- } bifrost1;
- struct {
- unsigned uniform_buffer_count : 4;
- unsigned flags_lo : 12;
-
- /* vec4 units */
- unsigned work_count : 5;
- unsigned uniform_count : 5;
- unsigned flags_hi : 6;
- } midgard1;
+ struct mali_bifrost_properties_packed bifrost_props;
+ struct mali_midgard_properties_packed midgard_props;
};
/* Same as glPolygoOffset() arguments */
struct mali_stencil_packed stencil_back;
union {
- struct {
- u32 unk3 : 7;
- /* On Bifrost, some system values are preloaded in
- * registers R55-R62 by the thread dispatcher prior to
- * the start of shader execution. This is a bitfield
- * with one entry for each register saying which
- * registers need to be preloaded. Right now, the known
- * values are:
- *
- * Vertex/compute:
- * - R55 : gl_LocalInvocationID.xy
- * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
- * - R57 : gl_WorkGroupID.x
- * - R58 : gl_WorkGroupID.y
- * - R59 : gl_WorkGroupID.z
- * - R60 : gl_GlobalInvocationID.x
- * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
- * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
- *
- * Fragment:
- * - R55 : unknown, never seen (but the bit for this is
- * always set?)
- * - R56 : unknown (bit always unset)
- * - R57 : gl_PrimitiveID
- * - R58 : gl_FrontFacing in low bit, potentially other stuff
- * - R59 : u16 fragment coordinates (used to compute
- * gl_FragCoord.xy, together with sample positions)
- * - R60 : gl_SampleMask (used in epilog, so pretty
- * much always used, but the bit is always 0 -- is
- * this just always pushed?)
- * - R61 : gl_SampleMaskIn and gl_SampleID, used by
- * varying interpolation.
- * - R62 : unknown (bit always unset).
- *
- * Later GPUs (starting with Mali-G52?) support
- * preloading float varyings into r0-r7. This is
- * indicated by setting 0x40. There is no distinction
- * here between 1 varying and 2.
- */
- u32 preload_regs : 8;
- /* In units of 8 bytes or 64 bits, since the
- * uniform/const port loads 64 bits at a time.
- */
- u32 uniform_count : 7;
- u32 unk4 : 10; // = 2
- } bifrost2;
+ struct mali_preload_packed bifrost_preload;
struct {
u32 unknown2_7;
} midgard2;