#define MALI_ALPHA_TO_COVERAGE (1 << 1)
+#define MALI_SFBD_ENABLE (1 << 4)
+#define MALI_SFBD_SRGB (1 << 8)
#define MALI_NO_DITHER (1 << 9)
#define MALI_DEPTH_RANGE_A (1 << 12)
#define MALI_DEPTH_RANGE_B (1 << 13)
unsigned complement_dominant : 1;
} __attribute__((packed));
-struct mali_blend_equation {
- /* Of type mali_blend_mode */
- unsigned rgb_mode : 12;
- unsigned alpha_mode : 12;
-
- unsigned zero1 : 4;
-
- /* Corresponds to MALI_MASK_* above and glColorMask arguments */
-
- unsigned color_mask : 4;
-} __attribute__((packed));
-
/* Compressed per-pixel formats. Each of these formats expands to one to four
* floating-point or integer numbers, as defined by the OpenGL specification.
* There are various places in OpenGL where the user can specify a compressed
#define MALI_CHANNEL_FLOAT 7
#define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
-/* Applies to midgard1.flags_lo */
-
-/* Should be set when the fragment shader updates the depth value. */
-#define MALI_WRITES_Z (1 << 4)
-
-/* Should the hardware perform early-Z testing? Set if the shader does not use
- * discard, alpha-to-coverage, shader depth writes, and if the shader has no
- * side effects (writes to global memory or images) unless early-z testing is
- * forced in the shader.
- */
-
-#define MALI_EARLY_Z (1 << 6)
-
-/* Should the hardware calculate derivatives (via helper invocations)? Set in a
- * fragment shader that uses texturing or derivative functions */
-
-#define MALI_HELPER_INVOCATIONS (1 << 7)
-
-/* Flags denoting the fragment shader's use of tilebuffer readback. If the
- * shader might read any part of the tilebuffer, set MALI_READS_TILEBUFFER. If
- * it might read depth/stencil in particular, also set MALI_READS_ZS */
-
-#define MALI_READS_ZS (1 << 8)
-
-/* The shader might write to global memory (via OpenCL, SSBOs, or images).
- * Reading is okay, as are ordinary writes to the tilebuffer/varyings. Setting
- * incurs a performance penalty. On a fragment shader, this bit implies there
- * are side effects, hence it interacts with early-z. */
-#define MALI_WRITES_GLOBAL (1 << 9)
-
-#define MALI_READS_TILEBUFFER (1 << 10)
-
-/* Applies to midgard1.flags_hi */
-
-/* Should be set when the fragment shader updates the stencil value. */
-#define MALI_WRITES_S (1 << 2)
-
-/* Mode to suppress generation of Infinity and NaN values by clamping inf
- * (-inf) to MAX_FLOAT (-MIN_FLOAT) and flushing NaN to 0.0
- *
- * Compare suppress_inf/suppress_nan flags on the Bifrost clause header for the
- * same functionality.
- *
- * This is not conformant on GLES3 or OpenCL, but is optional on GLES2, where
- * it works around app bugs (e.g. in glmark2-es2 -bterrain with FP16).
- */
-#define MALI_SUPPRESS_INF_NAN (1 << 3)
-
-/* Flags for bifrost1.unk1 */
-
-/* Shader uses less than 32 registers, partitioned as [R0, R15] U [R48, R63],
- * allowing for full thread count. If clear, the full [R0, R63] register set is
- * available at half thread count */
-#define MALI_BIFROST_FULL_THREAD (1 << 9)
-
-/* Enable early-z testing (presumably). This flag may not be set if the shader:
- *
- * - Uses blending
- * - Uses discard
- * - Writes gl_FragDepth
- *
- * This differs from Midgard which sets the MALI_EARLY_Z flag even with
- * blending, although I've begun to suspect that flag does not in fact enable
- * EARLY_Z alone. */
-#define MALI_BIFROST_EARLY_Z (1 << 15)
-
-/* First clause type is ATEST */
-#define MALI_BIFROST_FIRST_ATEST (1 << 26)
-
/* The raw Midgard blend payload can either be an equation or a shader
* address, depending on the context */
mali_ptr shader;
struct {
- struct mali_blend_equation equation;
+ struct mali_blend_equation_packed equation;
float constant;
};
};
-/* We need to load the tilebuffer to blend (i.e. the destination factor is not
- * ZERO) */
-
-#define MALI_BLEND_LOAD_TIB (0x1)
-
-/* A blend shader is used to blend this render target */
-#define MALI_BLEND_MRT_SHADER (0x2)
-
-/* On MRT Midgard systems (using an MFBD), each render target gets its own
- * blend descriptor */
-
-#define MALI_BLEND_SRGB (0x400)
-
-/* Dithering is specified here for MFBD, otherwise NO_DITHER for SFBD */
-#define MALI_BLEND_NO_DITHER (0x800)
-
struct midgard_blend_rt {
- /* Flags base value of 0x200 to enable the render target.
- * OR with 0x1 for blending (anything other than REPLACE).
- * OR with 0x2 for programmable blending
- * OR with MALI_BLEND_SRGB for implicit sRGB
- */
-
- u64 flags;
+ struct mali_blend_flags_packed flags;
+ u32 zero;
union midgard_blend blend;
} __attribute__((packed));
*/
u16 constant;
- struct mali_blend_equation equation;
+ struct mali_blend_equation_packed equation;
/*
* - 0x19 normally
u16 varying_count;
union {
- struct {
- u32 uniform_buffer_count : 4;
- u32 unk1 : 28; // = 0x800000 for vertex, 0x958020 for tiler
- } bifrost1;
- struct {
- unsigned uniform_buffer_count : 4;
- unsigned flags_lo : 12;
-
- /* vec4 units */
- unsigned work_count : 5;
- unsigned uniform_count : 5;
- unsigned flags_hi : 6;
- } midgard1;
+ struct mali_bifrost_properties_packed bifrost_props;
+ struct mali_midgard_properties_packed midgard_props;
};
/* Same as glPolygoOffset() arguments */
struct mali_stencil_packed stencil_back;
union {
- struct {
- u32 unk3 : 7;
- /* On Bifrost, some system values are preloaded in
- * registers R55-R62 by the thread dispatcher prior to
- * the start of shader execution. This is a bitfield
- * with one entry for each register saying which
- * registers need to be preloaded. Right now, the known
- * values are:
- *
- * Vertex/compute:
- * - R55 : gl_LocalInvocationID.xy
- * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
- * - R57 : gl_WorkGroupID.x
- * - R58 : gl_WorkGroupID.y
- * - R59 : gl_WorkGroupID.z
- * - R60 : gl_GlobalInvocationID.x
- * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
- * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
- *
- * Fragment:
- * - R55 : unknown, never seen (but the bit for this is
- * always set?)
- * - R56 : unknown (bit always unset)
- * - R57 : gl_PrimitiveID
- * - R58 : gl_FrontFacing in low bit, potentially other stuff
- * - R59 : u16 fragment coordinates (used to compute
- * gl_FragCoord.xy, together with sample positions)
- * - R60 : gl_SampleMask (used in epilog, so pretty
- * much always used, but the bit is always 0 -- is
- * this just always pushed?)
- * - R61 : gl_SampleMaskIn and gl_SampleID, used by
- * varying interpolation.
- * - R62 : unknown (bit always unset).
- *
- * Later GPUs (starting with Mali-G52?) support
- * preloading float varyings into r0-r7. This is
- * indicated by setting 0x40. There is no distinction
- * here between 1 varying and 2.
- */
- u32 preload_regs : 8;
- /* In units of 8 bytes or 64 bits, since the
- * uniform/const port loads 64 bits at a time.
- */
- u32 uniform_count : 7;
- u32 unk4 : 10; // = 2
- } bifrost2;
+ struct mali_preload_packed bifrost_preload;
struct {
u32 unknown2_7;
} midgard2;
* 3. If e <= 2^shift, then we need to use the round-down algorithm. Set
* magic_divisor = m - 1 and extra_flags = 1.
* 4. Otherwise, set magic_divisor = m and extra_flags = 0.
- *
- * Unrelated to instancing/actual attributes, images (the OpenCL kind) are
- * implemented as special attributes, denoted by MALI_ATTR_IMAGE. For images,
- * let shift=extra_flags=0. Stride is set to the image format's bytes-per-pixel
- * (*NOT the row stride*). Size is set to the size of the image itself.
- *
- * Special internal attribtues and varyings (gl_VertexID, gl_FrontFacing, etc)
- * use particular fixed addresses with modified structures.
*/
-enum mali_attr_mode {
- MALI_ATTR_UNUSED = 0,
- MALI_ATTR_LINEAR = 1,
- MALI_ATTR_POT_DIVIDE = 2,
- MALI_ATTR_MODULO = 3,
- MALI_ATTR_NPOT_DIVIDE = 4,
- MALI_ATTR_IMAGE = 5,
-};
-
-/* Pseudo-address for gl_VertexID, gl_FragCoord, gl_FrontFacing */
-
-#define MALI_ATTR_VERTEXID (0x22)
-#define MALI_ATTR_INSTANCEID (0x24)
-#define MALI_VARYING_FRAG_COORD (0x25)
-#define MALI_VARYING_FRONT_FACING (0x26)
-
-/* This magic "pseudo-address" is used as `elements` to implement
- * gl_PointCoord. When read from a fragment shader, it generates a point
- * coordinate per the OpenGL ES 2.0 specification. Flipped coordinate spaces
- * require an affine transformation in the shader. */
-
-#define MALI_VARYING_POINT_COORD (0x61)
-
-/* Used for comparison to check if an address is special. Mostly a guess, but
- * it doesn't really matter. */
-
-#define MALI_RECORD_SPECIAL (0x100)
-
-union mali_attr {
- /* This is used for actual attributes. */
- struct {
- /* The bottom 3 bits are the mode */
- mali_ptr elements : 64 - 8;
- u32 shift : 5;
- u32 extra_flags : 3;
- u32 stride;
- u32 size;
- };
- /* The entry after an NPOT_DIVIDE entry has this format. It stores
- * extra information that wouldn't fit in a normal entry.
- */
- struct {
- u32 unk; /* = 0x20 */
- u32 magic_divisor;
- u32 zero;
- /* This is the original, GL-level divisor. */
- u32 divisor;
- };
-} __attribute__((packed));
-
#define FBD_MASK (~0x3f)
/* MFBD, rather than SFBD */