#define MALI_CULL_FACE_FRONT (1 << 6)
#define MALI_CULL_FACE_BACK (1 << 7)
-/* Flags apply to unknown2_3? */
-
-#define MALI_HAS_MSAA (1 << 0)
-
-/* Execute fragment shader per-sample if set (e.g. to implement gl_SampleID
- * reads) */
-#define MALI_PER_SAMPLE (1 << 2)
-#define MALI_CAN_DISCARD (1 << 5)
-
-/* Applies on SFBD systems, specifying that programmable blending is in use */
-#define MALI_HAS_BLEND_SHADER (1 << 6)
-
-/* func is mali_func */
-#define MALI_DEPTH_FUNC(func) (func << 8)
-#define MALI_GET_DEPTH_FUNC(flags) ((flags >> 8) & 0x7)
-#define MALI_DEPTH_FUNC_MASK MALI_DEPTH_FUNC(0x7)
-
-#define MALI_DEPTH_WRITEMASK (1 << 11)
-
-#define MALI_DEPTH_CLIP_NEAR (1 << 12)
-#define MALI_DEPTH_CLIP_FAR (1 << 13)
-
-/* Next flags to unknown2_4 */
-#define MALI_STENCIL_TEST (1 << 0)
-
-#define MALI_ALPHA_TO_COVERAGE (1 << 1)
-
-#define MALI_SFBD_ENABLE (1 << 4)
-#define MALI_SFBD_SRGB (1 << 8)
-#define MALI_NO_DITHER (1 << 9)
-#define MALI_DEPTH_RANGE_A (1 << 12)
-#define MALI_DEPTH_RANGE_B (1 << 13)
-#define MALI_NO_MSAA (1 << 14)
-
-#define MALI_MASK_R (1 << 0)
-#define MALI_MASK_G (1 << 1)
-#define MALI_MASK_B (1 << 2)
-#define MALI_MASK_A (1 << 3)
-
enum mali_nondominant_mode {
MALI_BLEND_NON_MIRROR = 0,
MALI_BLEND_NON_ZERO = 1
};
} __attribute__((packed));
-/* Descriptor for the shader. Following this is at least one, up to four blend
- * descriptors for each active render target */
-
-struct mali_shader_meta {
- struct mali_shader_packed shader;
-
- union {
- struct mali_bifrost_properties_packed bifrost_props;
- struct mali_midgard_properties_packed midgard_props;
- };
-
- /* Same as glPolygoOffset() arguments */
- float depth_units;
- float depth_factor;
-
- u32 unknown2_2;
-
- /* Generated from SAMPLE_COVERAGE_VALUE and SAMPLE_COVERAGE_INVERT. See
- * 13.8.3 ("Multisample Fragment Operations") in the OpenGL ES 3.2
- * specification. Only matters when multisampling is enabled. */
- u16 coverage_mask;
-
- u16 unknown2_3;
-
- u8 stencil_mask_front;
- u8 stencil_mask_back;
- u16 unknown2_4;
-
- struct mali_stencil_packed stencil_front;
- struct mali_stencil_packed stencil_back;
-
- union {
- struct mali_preload_packed bifrost_preload;
- struct {
- u32 unknown2_7;
- } midgard2;
- };
-
- u32 padding;
-
- /* Blending information for the older non-MRT Midgard HW. Check for
- * MALI_HAS_BLEND_SHADER to decide how to interpret.
- */
-
- union midgard_blend blend;
-} __attribute__((packed));
-
-/* This only concerns hardware jobs */
-
/* Possible values for job_descriptor_size */
#define MALI_JOB_32 0
* fused payloads.
*/
-/* Applies to unknown_draw */
-
-#define MALI_DRAW_INDEXED_UINT8 (0x10)
-#define MALI_DRAW_INDEXED_UINT16 (0x20)
-#define MALI_DRAW_INDEXED_UINT32 (0x30)
-#define MALI_DRAW_INDEXED_SIZE (0x30)
-#define MALI_DRAW_INDEXED_SHIFT (4)
-
-#define MALI_DRAW_VARYING_SIZE (0x100)
-
-/* Set to use first vertex as the provoking vertex for flatshading. Clear to
- * use the last vertex. This is the default in DX and VK, but not in GL. */
-
-#define MALI_DRAW_FLATSHADE_FIRST (0x800)
-
-#define MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX (0x10000)
-
struct mali_vertex_tiler_prefix {
- /* This is a dynamic bitfield containing the following things in this order:
- *
- * - gl_WorkGroupSize.x
- * - gl_WorkGroupSize.y
- * - gl_WorkGroupSize.z
- * - gl_NumWorkGroups.x
- * - gl_NumWorkGroups.y
- * - gl_NumWorkGroups.z
- *
- * The number of bits allocated for each number is based on the *_shift
- * fields below. For example, workgroups_y_shift gives the bit that
- * gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit
- * that gl_NumWorkGroups.z starts at (and therefore one after the bit
- * that gl_NumWorkGroups.y ends at). The actual value for each gl_*
- * value is one more than the stored value, since if any of the values
- * are zero, then there would be no invocations (and hence no job). If
- * there were 0 bits allocated to a given field, then it must be zero,
- * and hence the real value is one.
- *
- * Vertex jobs reuse the same job dispatch mechanism as compute jobs,
- * effectively doing glDispatchCompute(1, vertex_count, instance_count)
- * where vertex count is the number of vertices.
- */
- u32 invocation_count;
-
- /* Bitfield for shifts:
- *
- * size_y_shift : 5
- * size_z_shift : 5
- * workgroups_x_shift : 6
- * workgroups_y_shift : 6
- * workgroups_z_shift : 6
- * workgroups_x_shift_2 : 4
- */
- u32 invocation_shifts;
-
- u32 draw_mode : 4;
- u32 unknown_draw : 22;
-
- /* This is the the same as workgroups_x_shift_2 in compute shaders, but
- * always 5 for vertex jobs and 6 for tiler jobs. I suspect this has
- * something to do with how many quads get put in the same execution
- * engine, which is a balance (you don't want to starve the engine, but
- * you also want to distribute work evenly).
- */
- u32 workgroups_x_shift_3 : 6;
-
-
- /* Negative of min_index. This is used to compute
- * the unbiased index in tiler/fragment shader runs.
- *
- * The hardware adds offset_bias_correction in each run,
- * so that absent an index bias, the first vertex processed is
- * genuinely the first vertex (0). But with an index bias,
- * the first vertex process is numbered the same as the bias.
- *
- * To represent this more conviniently:
- * unbiased_index = lower_bound_index +
- * index_bias +
- * offset_bias_correction
- *
- * This is done since the hardware doesn't accept a index_bias
- * and this allows it to recover the unbiased index.
- */
- int32_t offset_bias_correction;
- u32 zero1;
-
- /* Like many other strictly nonzero quantities, index_count is
- * subtracted by one. For an indexed cube, this is equal to 35 = 6
- * faces * 2 triangles/per face * 3 vertices/per triangle - 1. That is,
- * for an indexed draw, index_count is the number of actual vertices
- * rendered whereas invocation_count is the number of unique vertices
- * rendered (the number of times the vertex shader must be invoked).
- * For non-indexed draws, this is just equal to invocation_count. */
-
- u32 index_count;
-
- /* No hidden structure; literally just a pointer to an array of uint
- * indices (width depends on flags). Thanks, guys, for not making my
- * life insane for once! NULL for non-indexed draws. */
-
- u64 indices;
+ struct mali_invocation_packed invocation;
+ struct mali_primitive_packed primitive;
} __attribute__((packed));
/* Point size / line width can either be specified as a 32-bit float (for
u64 zeros[20];
} __attribute__((packed));
-struct bifrost_tiler_only {
- /* 0x20 */
- union midgard_primitive_size primitive_size;
-
- mali_ptr tiler_meta;
-
- u64 zero1, zero2, zero3, zero4, zero5, zero6;
-} __attribute__((packed));
-
struct mali_vertex_tiler_postfix {
u16 gl_enables; // 0x6 on Midgard, 0x2 on Bifrost
struct bifrost_payload_tiler {
struct mali_vertex_tiler_prefix prefix;
- struct bifrost_tiler_only tiler;
+ union midgard_primitive_size primitive_size;
+ mali_ptr tiler_meta;
+ u64 zero1, zero2, zero3, zero4, zero5, zero6;
struct mali_vertex_tiler_postfix postfix;
} __attribute__((packed));
-struct bifrost_payload_fused {
- struct mali_vertex_tiler_prefix prefix;
- struct bifrost_tiler_only tiler;
- struct mali_vertex_tiler_postfix tiler_postfix;
- u64 padding; /* zero */
- struct mali_vertex_tiler_postfix vertex_postfix;
-} __attribute__((packed));
-
/* Purposeful off-by-one in width, height fields. For example, a (64, 64)
* texture is stored as (63, 63) in these fields. This adjusts for that.
* There's an identical pattern in the framebuffer descriptor. Even vertex