X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fpanfrost%2Finclude%2Fpanfrost-job.h;h=879023eacb417f2af2040a44f09ca23667570406;hb=02e768e6a96e1a0aad6d1cbb002bfb883078ad40;hp=af70f56be3ab8ce730938678ca814377a934604e;hpb=acf77cbb3980a9918b2dd476efbcacadeebc6a88;p=mesa.git diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index af70f56be3a..879023eacb4 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -53,43 +53,6 @@ typedef uint64_t mali_ptr; #define MALI_CULL_FACE_FRONT (1 << 6) #define MALI_CULL_FACE_BACK (1 << 7) -/* Flags apply to unknown2_3? */ - -#define MALI_HAS_MSAA (1 << 0) - -/* Execute fragment shader per-sample if set (e.g. to implement gl_SampleID - * reads) */ -#define MALI_PER_SAMPLE (1 << 2) -#define MALI_CAN_DISCARD (1 << 5) - -/* Applies on SFBD systems, specifying that programmable blending is in use */ -#define MALI_HAS_BLEND_SHADER (1 << 6) - -/* func is mali_func */ -#define MALI_DEPTH_FUNC(func) (func << 8) -#define MALI_GET_DEPTH_FUNC(flags) ((flags >> 8) & 0x7) -#define MALI_DEPTH_FUNC_MASK MALI_DEPTH_FUNC(0x7) - -#define MALI_DEPTH_WRITEMASK (1 << 11) - -#define MALI_DEPTH_CLIP_NEAR (1 << 12) -#define MALI_DEPTH_CLIP_FAR (1 << 13) - -/* Next flags to unknown2_4 */ -#define MALI_STENCIL_TEST (1 << 0) - -#define MALI_ALPHA_TO_COVERAGE (1 << 1) - -#define MALI_NO_DITHER (1 << 9) -#define MALI_DEPTH_RANGE_A (1 << 12) -#define MALI_DEPTH_RANGE_B (1 << 13) -#define MALI_NO_MSAA (1 << 14) - -#define MALI_MASK_R (1 << 0) -#define MALI_MASK_G (1 << 1) -#define MALI_MASK_B (1 << 2) -#define MALI_MASK_A (1 << 3) - enum mali_nondominant_mode { MALI_BLEND_NON_MIRROR = 0, MALI_BLEND_NON_ZERO = 1 @@ -290,104 +253,6 @@ struct bifrost_blend_rt { }; } __attribute__((packed)); -/* Descriptor for the shader. Following this is at least one, up to four blend - * descriptors for each active render target */ - -struct mali_shader_meta { - mali_ptr shader; - u16 sampler_count; - u16 texture_count; - u16 attribute_count; - u16 varying_count; - - union { - struct mali_bifrost_properties_packed bifrost_props; - struct mali_midgard_properties_packed midgard_props; - }; - - /* Same as glPolygoOffset() arguments */ - float depth_units; - float depth_factor; - - u32 unknown2_2; - - /* Generated from SAMPLE_COVERAGE_VALUE and SAMPLE_COVERAGE_INVERT. See - * 13.8.3 ("Multisample Fragment Operations") in the OpenGL ES 3.2 - * specification. Only matters when multisampling is enabled. */ - u16 coverage_mask; - - u16 unknown2_3; - - u8 stencil_mask_front; - u8 stencil_mask_back; - u16 unknown2_4; - - struct mali_stencil_packed stencil_front; - struct mali_stencil_packed stencil_back; - - union { - struct { - u32 unk3 : 7; - /* On Bifrost, some system values are preloaded in - * registers R55-R62 by the thread dispatcher prior to - * the start of shader execution. This is a bitfield - * with one entry for each register saying which - * registers need to be preloaded. Right now, the known - * values are: - * - * Vertex/compute: - * - R55 : gl_LocalInvocationID.xy - * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits - * - R57 : gl_WorkGroupID.x - * - R58 : gl_WorkGroupID.y - * - R59 : gl_WorkGroupID.z - * - R60 : gl_GlobalInvocationID.x - * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base) - * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base) - * - * Fragment: - * - R55 : unknown, never seen (but the bit for this is - * always set?) - * - R56 : unknown (bit always unset) - * - R57 : gl_PrimitiveID - * - R58 : gl_FrontFacing in low bit, potentially other stuff - * - R59 : u16 fragment coordinates (used to compute - * gl_FragCoord.xy, together with sample positions) - * - R60 : gl_SampleMask (used in epilog, so pretty - * much always used, but the bit is always 0 -- is - * this just always pushed?) - * - R61 : gl_SampleMaskIn and gl_SampleID, used by - * varying interpolation. - * - R62 : unknown (bit always unset). - * - * Later GPUs (starting with Mali-G52?) support - * preloading float varyings into r0-r7. This is - * indicated by setting 0x40. There is no distinction - * here between 1 varying and 2. - */ - u32 preload_regs : 8; - /* In units of 8 bytes or 64 bits, since the - * uniform/const port loads 64 bits at a time. - */ - u32 uniform_count : 7; - u32 unk4 : 10; // = 2 - } bifrost2; - struct { - u32 unknown2_7; - } midgard2; - }; - - u32 padding; - - /* Blending information for the older non-MRT Midgard HW. Check for - * MALI_HAS_BLEND_SHADER to decide how to interpret. - */ - - union midgard_blend blend; -} __attribute__((packed)); - -/* This only concerns hardware jobs */ - /* Possible values for job_descriptor_size */ #define MALI_JOB_32 0 @@ -555,106 +420,9 @@ struct mali_payload_write_value { * fused payloads. */ -/* Applies to unknown_draw */ - -#define MALI_DRAW_INDEXED_UINT8 (0x10) -#define MALI_DRAW_INDEXED_UINT16 (0x20) -#define MALI_DRAW_INDEXED_UINT32 (0x30) -#define MALI_DRAW_INDEXED_SIZE (0x30) -#define MALI_DRAW_INDEXED_SHIFT (4) - -#define MALI_DRAW_VARYING_SIZE (0x100) - -/* Set to use first vertex as the provoking vertex for flatshading. Clear to - * use the last vertex. This is the default in DX and VK, but not in GL. */ - -#define MALI_DRAW_FLATSHADE_FIRST (0x800) - -#define MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX (0x10000) - struct mali_vertex_tiler_prefix { - /* This is a dynamic bitfield containing the following things in this order: - * - * - gl_WorkGroupSize.x - * - gl_WorkGroupSize.y - * - gl_WorkGroupSize.z - * - gl_NumWorkGroups.x - * - gl_NumWorkGroups.y - * - gl_NumWorkGroups.z - * - * The number of bits allocated for each number is based on the *_shift - * fields below. For example, workgroups_y_shift gives the bit that - * gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit - * that gl_NumWorkGroups.z starts at (and therefore one after the bit - * that gl_NumWorkGroups.y ends at). The actual value for each gl_* - * value is one more than the stored value, since if any of the values - * are zero, then there would be no invocations (and hence no job). If - * there were 0 bits allocated to a given field, then it must be zero, - * and hence the real value is one. - * - * Vertex jobs reuse the same job dispatch mechanism as compute jobs, - * effectively doing glDispatchCompute(1, vertex_count, instance_count) - * where vertex count is the number of vertices. - */ - u32 invocation_count; - - /* Bitfield for shifts: - * - * size_y_shift : 5 - * size_z_shift : 5 - * workgroups_x_shift : 6 - * workgroups_y_shift : 6 - * workgroups_z_shift : 6 - * workgroups_x_shift_2 : 4 - */ - u32 invocation_shifts; - - u32 draw_mode : 4; - u32 unknown_draw : 22; - - /* This is the the same as workgroups_x_shift_2 in compute shaders, but - * always 5 for vertex jobs and 6 for tiler jobs. I suspect this has - * something to do with how many quads get put in the same execution - * engine, which is a balance (you don't want to starve the engine, but - * you also want to distribute work evenly). - */ - u32 workgroups_x_shift_3 : 6; - - - /* Negative of min_index. This is used to compute - * the unbiased index in tiler/fragment shader runs. - * - * The hardware adds offset_bias_correction in each run, - * so that absent an index bias, the first vertex processed is - * genuinely the first vertex (0). But with an index bias, - * the first vertex process is numbered the same as the bias. - * - * To represent this more conviniently: - * unbiased_index = lower_bound_index + - * index_bias + - * offset_bias_correction - * - * This is done since the hardware doesn't accept a index_bias - * and this allows it to recover the unbiased index. - */ - int32_t offset_bias_correction; - u32 zero1; - - /* Like many other strictly nonzero quantities, index_count is - * subtracted by one. For an indexed cube, this is equal to 35 = 6 - * faces * 2 triangles/per face * 3 vertices/per triangle - 1. That is, - * for an indexed draw, index_count is the number of actual vertices - * rendered whereas invocation_count is the number of unique vertices - * rendered (the number of times the vertex shader must be invoked). - * For non-indexed draws, this is just equal to invocation_count. */ - - u32 index_count; - - /* No hidden structure; literally just a pointer to an array of uint - * indices (width depends on flags). Thanks, guys, for not making my - * life insane for once! NULL for non-indexed draws. */ - - u64 indices; + struct mali_invocation_packed invocation; + struct mali_primitive_packed primitive; } __attribute__((packed)); /* Point size / line width can either be specified as a 32-bit float (for