MALI_ASTC_HDR_SUPP = MALI_FORMAT_COMPRESSED | 0x17,
MALI_RGB565 = MALI_FORMAT_SPECIAL | 0x0,
+ MALI_RGB5_X1_UNORM = MALI_FORMAT_SPECIAL | 0x1,
MALI_RGB5_A1_UNORM = MALI_FORMAT_SPECIAL | 0x2,
MALI_RGB10_A2_UNORM = MALI_FORMAT_SPECIAL | 0x3,
MALI_RGB10_A2_SNORM = MALI_FORMAT_SPECIAL | 0x5,
MALI_RGB10_A2UI = MALI_FORMAT_SPECIAL | 0x7,
MALI_RGB10_A2I = MALI_FORMAT_SPECIAL | 0x9,
- /* YUV formats */
- MALI_NV12 = MALI_FORMAT_SPECIAL | 0xc,
+ MALI_RGB332_UNORM = MALI_FORMAT_SPECIAL | 0xb,
+ MALI_RGB233_UNORM = MALI_FORMAT_SPECIAL | 0xc,
- MALI_Z32_UNORM = MALI_FORMAT_SPECIAL | 0xD,
+ MALI_Z32_UNORM = MALI_FORMAT_SPECIAL | 0xd,
MALI_R32_FIXED = MALI_FORMAT_SPECIAL | 0x11,
MALI_RG32_FIXED = MALI_FORMAT_SPECIAL | 0x12,
MALI_RGB32_FIXED = MALI_FORMAT_SPECIAL | 0x13,
/* On Bifrost systems (all MRT), each render target gets one of these
* descriptors */
+enum bifrost_shader_type {
+ BIFROST_BLEND_F16 = 0,
+ BIFROST_BLEND_F32 = 1,
+ BIFROST_BLEND_I32 = 2,
+ BIFROST_BLEND_U32 = 3,
+ BIFROST_BLEND_I16 = 4,
+ BIFROST_BLEND_U16 = 5,
+};
+
+#define BIFROST_MAX_RENDER_TARGET_COUNT 8
+
struct bifrost_blend_rt {
/* This is likely an analogue of the flags on
* midgard_blend_rt */
* constant_hi = int(f / 255)
* constant_lo = 65535*f - (65535/255) * constant_hi
*/
-
u16 constant;
struct mali_blend_equation equation;
+
/*
* - 0x19 normally
* - 0x3 when this slot is unused (everything else is 0 except the index)
* - 0x11 when this is the fourth slot (and it's used)
-+ * - 0 when there is a blend shader
+ * - 0 when there is a blend shader
*/
u16 unk2;
+
/* increments from 0 to 3 */
u16 index;
- union {
- struct {
- /* So far, I've only seen:
- * - R001 for 1-component formats
- * - RG01 for 2-component formats
- * - RGB1 for 3-component formats
- * - RGBA for 4-component formats
- */
- u32 swizzle : 12;
- enum mali_format format : 8;
-
- /* Type of the shader output variable. Note, this can
- * be different from the format.
- *
- * 0: f16 (mediump float)
- * 1: f32 (highp float)
- * 2: i32 (highp int)
- * 3: u32 (highp uint)
- * 4: i16 (mediump int)
- * 5: u16 (mediump uint)
- */
- u32 shader_type : 3;
- u32 zero : 9;
- };
-
- /* Only the low 32 bits of the blend shader are stored, the
- * high 32 bits are implicitly the same as the original shader.
- * According to the kernel driver, the program counter for
- * shaders is actually only 24 bits, so shaders cannot cross
- * the 2^24-byte boundary, and neither can the blend shader.
- * The blob handles this by allocating a 2^24 byte pool for
- * shaders, and making sure that any blend shaders are stored
- * in the same pool as the original shader. The kernel will
- * make sure this allocation is aligned to 2^24 bytes.
- */
- u32 shader;
- };
+ union {
+ struct {
+ /* So far, I've only seen:
+ * - R001 for 1-component formats
+ * - RG01 for 2-component formats
+ * - RGB1 for 3-component formats
+ * - RGBA for 4-component formats
+ */
+ u32 swizzle : 12;
+ enum mali_format format : 8;
+
+ /* Type of the shader output variable. Note, this can
+ * be different from the format.
+ * enum bifrost_shader_type
+ */
+ u32 zero1 : 4;
+ u32 shader_type : 3;
+ u32 zero2 : 5;
+ };
+
+ /* Only the low 32 bits of the blend shader are stored, the
+ * high 32 bits are implicitly the same as the original shader.
+ * According to the kernel driver, the program counter for
+ * shaders is actually only 24 bits, so shaders cannot cross
+ * the 2^24-byte boundary, and neither can the blend shader.
+ * The blob handles this by allocating a 2^24 byte pool for
+ * shaders, and making sure that any blend shaders are stored
+ * in the same pool as the original shader. The kernel will
+ * make sure this allocation is aligned to 2^24 bytes.
+ */
+ u32 shader;
+ };
} __attribute__((packed));
/* Descriptor for the shader. Following this is at least one, up to four blend
* - R61 : gl_SampleMaskIn and gl_SampleID, used by
* varying interpolation.
* - R62 : unknown (bit always unset).
+ *
+ * Later GPUs (starting with Mali-G52?) support
+ * preloading float varyings into r0-r7. This is
+ * indicated by setting 0x40. There is no distinction
+ * here between 1 varying and 2.
*/
u32 preload_regs : 8;
/* In units of 8 bytes or 64 bits, since the
} midgard2;
};
- /* zero on bifrost */
- u32 unknown2_8;
+ u32 padding;
/* Blending information for the older non-MRT Midgard HW. Check for
* MALI_HAS_BLEND_SHADER to decide how to interpret.
/* ORed into an MFBD address to specify the fbx section is included */
#define MALI_MFBD_TAG_EXTRA (0x2)
-struct mali_uniform_buffer_meta {
- /* This is actually the size minus 1 (MALI_POSITIVE), in units of 16
- * bytes. This gives a maximum of 2^14 bytes, which just so happens to
- * be the GL minimum-maximum for GL_MAX_UNIFORM_BLOCK_SIZE.
- */
- u64 size : 10;
+/* Uniform buffer objects are 64-bit fields divided as:
+ *
+ * u64 size : 10;
+ * mali_ptr ptr : 64 - 10;
+ *
+ * The size is actually the size minus 1 (MALI_POSITIVE), in units of 16 bytes.
+ * This gives a maximum of 2^14 bytes, which just so happens to be the GL
+ * minimum-maximum for GL_MAX_UNIFORM_BLOCK_SIZE.
+ *
+ * The pointer is missing the bottom 2 bits and top 8 bits. The top 8 bits
+ * should be 0 for userspace pointers, according to
+ * https://lwn.net/Articles/718895/. By reusing these bits, we can make each
+ * entry in the table only 64 bits.
+ */
- /* This is missing the bottom 2 bits and top 8 bits. The top 8 bits
- * should be 0 for userspace pointers, according to
- * https://lwn.net/Articles/718895/. By reusing these bits, we can make
- * each entry in the table only 64 bits.
- */
- mali_ptr ptr : 64 - 10;
-};
+#define MALI_MAKE_UBO(elements, ptr) \
+ (MALI_POSITIVE((elements)) | (((ptr) >> 2) << 10))
/* On Bifrost, these fields are the same between the vertex and tiler payloads.
* They also seem to be the same between Bifrost and Midgard. They're shared in
u64 pointer;
};
-struct bifrost_vertex_only {
- u32 unk2; /* =0x2 */
-
- u32 zero0;
-
- u64 zero1;
-} __attribute__((packed));
-
struct bifrost_tiler_heap_meta {
u32 zero;
u32 heap_size;
mali_ptr tiler_heap_end;
/* hierarchy weights? but they're still 0 after the job has run... */
- u32 zeros[12];
+ u32 zeros[10];
+ u32 unk1;
+ u32 unk7e007e;
} __attribute__((packed));
struct bifrost_tiler_meta {
- u64 zero0;
- u16 hierarchy_mask;
+ u32 tiler_heap_next_start; /* To be written by the GPU */
+ u32 used_hierarchy_mask; /* To be written by the GPU */
+ u16 hierarchy_mask; /* Five values observed: 0xa, 0x14, 0x28, 0x50, 0xa0 */
u16 flags;
u16 width;
u16 height;
- u64 zero1;
+ u64 zero0;
mali_ptr tiler_heap_meta;
/* TODO what is this used for? */
u64 zeros[20];
mali_ptr tiler_meta;
u64 zero1, zero2, zero3, zero4, zero5, zero6;
-
- u32 gl_enables;
- u32 zero7;
- u64 zero8;
} __attribute__((packed));
struct mali_vertex_tiler_postfix {
+ u16 gl_enables; // 0x6 on Midgard, 0x2 on Bifrost
+
+ /* Both zero for non-instanced draws. For instanced draws, a
+ * decomposition of padded_num_vertices. See the comments about the
+ * corresponding fields in mali_attr for context. */
+
+ unsigned instance_shift : 5;
+ unsigned instance_odd : 3;
+
+ u8 zero4;
+
+ /* Offset for first vertex in buffer */
+ u32 offset_start;
+
+ u64 zero5;
+
/* Zero for vertex jobs. Pointer to the position (gl_Position) varying
* output from the vertex shader for tiler jobs.
*/
*/
u64 uniform_buffers;
- /* This is a pointer to an array of pointers to the texture
+ /* On Bifrost, this is a pointer to an array of bifrost_texture_descriptor.
+ * On Midgard, this is a pointer to an array of pointers to the texture
* descriptors, number of pointers bounded by number of textures. The
* indirection is needed to accomodate varying numbers and sizes of
* texture descriptors */
- u64 texture_trampoline;
+ u64 textures;
/* For OpenGL, from what I've seen, this is intimately connected to
* texture_meta. cwabbott says this is not the case under Vulkan, hence
struct midgard_payload_vertex_tiler {
struct mali_vertex_tiler_prefix prefix;
-
- u16 gl_enables; // 0x5
-
- /* Both zero for non-instanced draws. For instanced draws, a
- * decomposition of padded_num_vertices. See the comments about the
- * corresponding fields in mali_attr for context. */
-
- unsigned instance_shift : 5;
- unsigned instance_odd : 3;
-
- u8 zero4;
-
- /* Offset for first vertex in buffer */
- u32 offset_start;
-
- u64 zero5;
-
struct mali_vertex_tiler_postfix postfix;
union midgard_primitive_size primitive_size;
struct bifrost_payload_vertex {
struct mali_vertex_tiler_prefix prefix;
- struct bifrost_vertex_only vertex;
struct mali_vertex_tiler_postfix postfix;
} __attribute__((packed));
struct bifrost_tiler_only tiler;
struct mali_vertex_tiler_postfix tiler_postfix;
u64 padding; /* zero */
- struct bifrost_vertex_only vertex;
struct mali_vertex_tiler_postfix vertex_postfix;
} __attribute__((packed));
uint32_t unknown7;
} __attribute__((packed));
+/* While Midgard texture descriptors are variable length, Bifrost descriptors
+ * are fixed like samplers with more pointers to expand if necessary */
+
+struct bifrost_texture_descriptor {
+ unsigned format_unk : 4; /* 2 */
+ enum mali_texture_type type : 2;
+ unsigned format_unk2 : 16; /* 0 */
+ enum mali_format format : 8;
+ unsigned srgb : 1;
+ unsigned format_unk3 : 1; /* 0 */
+
+ uint16_t width; /* MALI_POSITIVE */
+ uint16_t height; /* MALI_POSITIVE */
+
+ /* OpenGL swizzle */
+ unsigned swizzle : 12;
+ enum mali_texture_layout layout : 4;
+ uint8_t levels : 8; /* Number of levels-1 if mipmapped, 0 if not */
+ unsigned unk1 : 8;
+
+ unsigned levels_unk : 24; /* 0 */
+ unsigned level_2 : 8; /* Number of levels, again? */
+
+ mali_ptr payload;
+
+ uint16_t array_size;
+ uint16_t unk4;
+
+ uint16_t depth;
+ uint16_t unk5;
+} __attribute__((packed));
+
/* filter_mode */
#define MALI_SAMP_MAG_NEAREST (1 << 0)
float border_color[4];
} __attribute__((packed));
+/* Bifrost sampler descriptors look pretty similar */
+
+#define BIFROST_SAMP_MIN_NEAREST (1)
+#define BIFROST_SAMP_MAG_LINEAR (1)
+
+struct bifrost_sampler_descriptor {
+ uint8_t unk1;
+
+ enum mali_wrap_mode wrap_r : 4;
+ enum mali_wrap_mode wrap_t : 4;
+ enum mali_wrap_mode wrap_s : 4;
+ uint8_t unk8 : 4;
+
+ uint8_t unk2 : 1;
+ uint8_t norm_coords : 1;
+ uint8_t unk3 : 1;
+ uint8_t min_filter : 1;
+ uint8_t zero1 : 1;
+ uint8_t mag_filter : 1;
+ uint8_t mip_filter : 1;
+
+ int16_t min_lod;
+ int16_t max_lod;
+
+ uint64_t zero2;
+ uint64_t zero3;
+ uint64_t zero4;
+} __attribute__((packed));
+
/* viewport0/viewport1 form the arguments to glViewport. viewport1 is
* modified by MALI_POSITIVE; viewport0 is as-is.
*/
mali_ptr unknown1;
} __attribute__((packed));
+/* Configures multisampling on Bifrost fragment jobs */
+struct bifrost_multisampling {
+ u64 zero1;
+ u64 zero2;
+ mali_ptr sample_locations;
+ u64 zero4;
+} __attribute__((packed));
struct mali_single_framebuffer {
struct mali_shared_memory shared_memory;
unsigned nr_channels : 2; /* MALI_POSITIVE */
- unsigned unk3 : 5;
+ unsigned unk3 : 4;
+ unsigned unk4 : 1;
enum mali_block_format block : 2;
unsigned flags : 4;
};
- u64 zero3, zero4;
+ u32 clear_color_1;
+ u32 clear_color_2;
+ u64 zero3;
} __attribute__((packed));
/* Flags for mfbd_flags */
#define MALI_MFBD_EXTRA (1 << 13)
struct mali_framebuffer {
- struct mali_shared_memory shared_memory;
+ union {
+ struct mali_shared_memory shared_memory;
+ struct bifrost_multisampling msaa;
+ };
/* 0x20 */
u16 width1, height1;
u32 mfbd_flags : 24; // = 0x100
float clear_depth;
- struct midgard_tiler_descriptor tiler;
+ union {
+ struct midgard_tiler_descriptor tiler;
+ struct {
+ mali_ptr tiler_meta;
+ u32 zeros[16];
+ };
+ };
/* optional: struct mali_framebuffer_extra extra */
/* struct mali_render_target rts[] */