MALI_ATTR_NPOT_DIVIDE = 4,
};
+/* This magic "pseudo-address" is used as `elements` to implement
+ * gl_PointCoord. When read from a fragment shader, it generates a point
+ * coordinate per the OpenGL ES 2.0 specification. Flipped coordinate spaces
+ * require an affine transformation in the shader. */
+
+#define MALI_VARYING_POINT_COORD (0x60)
+
union mali_attr {
/* This is used for actual attributes. */
struct {
struct hash_table_u64 *uniform_nir_to_mdg;
int uniform_count;
- struct hash_table_u64 *varying_nir_to_mdg;
- int varying_count;
-
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
* guarantee correctness when considering some
* (common) edge cases XXX: FIXME */
- /* Look up how it was actually laid out */
-
- void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_mdg, offset + 1);
-
- if (!entry) {
- DBG("WARNING: skipping varying\n");
- break;
- }
-
- offset = (uintptr_t) (entry) - 1;
-
/* If this varying corresponds to a constant (why?!),
* emit that now since it won't get picked up by
* hoisting (since there is no corresponding move
}
}
- if (ctx->stage == MESA_SHADER_VERTEX) {
- ctx->varying_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
- /* First, collect the special varyings */
- nir_foreach_variable(var, &nir->outputs) {
- if (var->data.location == VARYING_SLOT_POS) {
- /* Set position first, always. It takes up two
- * spots, the latter one is de facto unused (at
- * least from the shader's perspective), we
- * just need to skip over the spot*/
-
- _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, var->data.driver_location + 1, (void *) ((uintptr_t) (0 + 1)));
- ctx->varying_count = MAX2(ctx->varying_count, 2);
- } else if (var->data.location == VARYING_SLOT_PSIZ) {
- /* Set point size second (third, see above) */
- _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, var->data.driver_location + 1, (void *) ((uintptr_t) (2 + 1)));
- ctx->varying_count = MAX2(ctx->varying_count, 3);
-
- program->writes_point_size = true;
- }
- }
+ /* Record the varying mapping for the command stream's bookkeeping */
- /* Now, collect normal varyings */
-
- nir_foreach_variable(var, &nir->outputs) {
- if (var->data.location == VARYING_SLOT_POS || var->data.location == VARYING_SLOT_PSIZ) continue;
+ struct exec_list *varyings =
+ ctx->stage == MESA_SHADER_VERTEX ? &nir->outputs : &nir->inputs;
- for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
- int id = ctx->varying_count++;
- _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
- }
- }
+ nir_foreach_variable(var, varyings) {
+ unsigned loc = var->data.driver_location;
+ program->varyings[loc] = var->data.location;
}
-
-
/* Lower vars -- not I/O -- before epilogue */
NIR_PASS_V(nir, nir_lower_var_copies);
int attribute_count;
int varying_count;
+ unsigned varyings[32];
+
/* Boolean properties of the program */
bool can_discard;
bool writes_point_size;
state->can_discard = program.can_discard;
state->writes_point_size = program.writes_point_size;
+ state->reads_point_coord = false;
/* Separate as primary uniform count is truncated */
state->uniform_count = program.uniform_count;
- /* gl_Position eats up an extra spot */
- if (type == JOB_TYPE_VERTEX)
- meta->varying_count += 1;
-
- /* Note: gl_FragCoord does -not- eat an extra spot; it will be included
- * in our count if we need it */
-
meta->midgard1.unknown2 = 8; /* XXX */
- /* Varyings are known only through the shader. We choose to upload this
- * information with the vertex shader, though the choice is perhaps
- * arbitrary */
-
- if (type == JOB_TYPE_VERTEX) {
- struct panfrost_varyings *varyings = &state->varyings;
-
- /* Measured in vec4 words. Don't include gl_Position */
- int varying_count = program.varying_count;
-
- /* Setup two buffers, one for position, the other for normal
- * varyings, as seen in traces. TODO: Are there other
- * configurations we might use? */
+ unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
+ unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
+ unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
- varyings->varying_buffer_count = 2;
+ /* Iterate the varyings and emit the corresponding descriptor */
+ unsigned general_purpose_count = 0;
- /* mediump vec4s sequentially */
- varyings->varyings_stride[0] = (2 * sizeof(float)) * varying_count;
+ for (unsigned i = 0; i < program.varying_count; ++i) {
+ unsigned location = program.varyings[i];
- /* highp gl_Position */
- varyings->varyings_stride[1] = 4 * sizeof(float);
-
- /* mediump gl_PointSize */
- if (program.writes_point_size) {
- ++varyings->varying_buffer_count;
- varyings->varyings_stride[2] = 2; /* sizeof(fp16) */
- }
-
- /* Setup gl_Position, its weirdo analogue, and gl_PointSize (optionally) */
- unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
- unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
-
- struct mali_attr_meta vertex_special_varyings[] = {
- {
- .index = 1,
- .format = MALI_VARYING_POS,
-
- .swizzle = default_vec4_swizzle,
- .unknown1 = 0x2,
- },
- {
- .index = 1,
- .format = MALI_RGBA16F,
-
- /* TODO: Wat? yyyy swizzle? */
- .swizzle = 0x249,
- .unknown1 = 0x0,
- },
- {
- .index = 2,
- .format = MALI_R16F,
- .swizzle = default_vec1_swizzle,
- .unknown1 = 0x2
- }
+ /* Default to a vec4 varying */
+ struct mali_attr_meta v = {
+ .format = MALI_RGBA16F,
+ .swizzle = default_vec4_swizzle,
+ .unknown1 = 0x2,
};
- /* How many special vertex varyings are actually required? */
- int vertex_special_count = 2 + (program.writes_point_size ? 1 : 0);
-
- /* Setup actual varyings. XXX: Don't assume vec4 */
-
- struct mali_attr_meta mali_varyings[PIPE_MAX_ATTRIBS];
-
- for (int i = 0; i < varying_count; ++i) {
- struct mali_attr_meta vec4_varying_meta = {
- .index = 0,
- .format = MALI_RGBA16F,
- .swizzle = default_vec4_swizzle,
- .unknown1 = 0x2,
-
- /* Set offset to keep everything back-to-back in
- * the same buffer */
- .src_offset = 8 * i,
- };
-
- mali_varyings[i] = vec4_varying_meta;
+ /* Check for special cases, otherwise assume general varying */
+
+ if (location == VARYING_SLOT_POS) {
+ v.index = 1;
+ v.format = MALI_VARYING_POS;
+ } else if (location == VARYING_SLOT_PSIZ) {
+ v.index = 2;
+ v.format = MALI_R16F;
+ v.swizzle = default_vec1_swizzle;
+
+ state->writes_point_size = true;
+ } else if (location == VARYING_SLOT_PNTC) {
+ v.index = 3;
+ v.format = MALI_RG16F;
+ v.swizzle = default_vec2_swizzle;
+
+ state->reads_point_coord = true;
+ } else {
+ v.index = 0;
+ v.src_offset = 8 * (general_purpose_count++);
}
- /* We don't count the weirdo gl_Position in our varying count */
- varyings->varying_count = varying_count - 1;
-
- /* In this context, position_meta represents the implicit
- * gl_FragCoord varying. So, upload all the varyings */
-
- unsigned varyings_size = sizeof(struct mali_attr_meta) * varyings->varying_count;
- unsigned vertex_special_size = sizeof(struct mali_attr_meta) * vertex_special_count;
- unsigned vertex_size = vertex_special_size + varyings_size;
- unsigned fragment_size = varyings_size + sizeof(struct mali_attr_meta);
-
- struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, vertex_size + fragment_size, HEAP_DESCRIPTOR);
-
- /* Copy varyings in the follow order:
- * - Position 1, 2
- * - Varyings 1, 2, ..., n
- * - Varyings 1, 2, ..., n (duplicate)
- * - Position 1
- */
-
- memcpy(transfer.cpu, vertex_special_varyings, vertex_special_size);
- memcpy(transfer.cpu + vertex_special_size, mali_varyings, varyings_size);
- memcpy(transfer.cpu + vertex_size, mali_varyings, varyings_size);
- memcpy(transfer.cpu + vertex_size + varyings_size, &vertex_special_varyings[0], sizeof(struct mali_attr_meta));
-
- /* Point to the descriptor */
- varyings->varyings_buffer_cpu = transfer.cpu;
- varyings->varyings_descriptor = transfer.gpu;
- varyings->varyings_descriptor_fragment = transfer.gpu + vertex_size;
+ state->varyings[i] = v;
}
+
+ /* Set the stride for the general purpose fp16 vec4 varyings */
+ state->general_varying_stride = (2 * 4) * general_purpose_count;
}
ctx->set_value_job = transfer.gpu;
}
+static mali_ptr
+panfrost_emit_varyings(
+ struct panfrost_context *ctx,
+ union mali_attr *slot,
+ unsigned stride,
+ unsigned count)
+{
+ mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height;
+
+ /* Fill out the descriptor */
+ slot->elements = varying_address | MALI_ATTR_LINEAR;
+ slot->stride = stride;
+ slot->size = stride * count;
+
+ ctx->varying_height += ALIGN(slot->size, 64);
+ assert(ctx->varying_height < ctx->varying_mem.size);
+
+ return varying_address;
+}
+
+static void
+panfrost_emit_point_coord(union mali_attr *slot)
+{
+ slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
+ slot->stride = slot->size = 0;
+}
+
+static void
+panfrost_emit_varying_descriptor(
+ struct panfrost_context *ctx,
+ unsigned invocation_count)
+{
+ /* Load the shaders */
+
+ struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
+ struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
+
+ /* Allocate the varying descriptor */
+
+ size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
+ size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
+
+ struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
+ vs_size + fs_size);
+
+ memcpy(trans.cpu, vs->varyings, vs_size);
+ memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
+
+ ctx->payload_vertex.postfix.varying_meta = trans.gpu;
+ ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size;
+
+ /* Buffer indices must be in this order per our convention */
+ union mali_attr varyings[PIPE_MAX_ATTRIBS];
+ unsigned idx = 0;
+
+ /* General varyings -- use the VS's, since those are more likely to be
+ * accurate on desktop */
+
+ panfrost_emit_varyings(ctx, &varyings[idx++],
+ vs->general_varying_stride, invocation_count);
+
+ /* fp32 vec4 gl_Position */
+ ctx->payload_tiler.postfix.position_varying =
+ panfrost_emit_varyings(ctx, &varyings[idx++],
+ sizeof(float) * 4, invocation_count);
+
+
+ if (vs->writes_point_size || fs->reads_point_coord) {
+ /* fp16 vec1 gl_PointSize */
+ ctx->payload_tiler.primitive_size.pointer =
+ panfrost_emit_varyings(ctx, &varyings[idx++],
+ 2, invocation_count);
+ }
+
+ if (fs->reads_point_coord) {
+ /* Special descriptor */
+ panfrost_emit_point_coord(&varyings[idx++]);
+ }
+
+ mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
+ ctx->payload_vertex.postfix.varyings = varyings_p;
+ ctx->payload_tiler.postfix.varyings = varyings_p;
+}
+
/* Emits attributes and varying descriptors, which should be called every draw,
* excepting some obscure circumstances */
{
/* TODO: Only update the dirtied buffers */
union mali_attr attrs[PIPE_MAX_ATTRIBS];
- union mali_attr varyings[PIPE_MAX_ATTRIBS];
unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
}
}
- struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings;
-
- for (int i = 0; i < vars->varying_buffer_count; ++i) {
- mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height;
-
- varyings[i].elements = varying_address | 1;
- varyings[i].stride = vars->varyings_stride[i];
- varyings[i].size = vars->varyings_stride[i] * invocation_count;
-
- /* If this varying has to be linked somewhere, do it now. See
- * pan_assemble.c for the indices. TODO: Use a more generic
- * linking interface */
-
- if (i == 1) {
- /* gl_Position */
- ctx->payload_tiler.postfix.position_varying = varying_address;
- } else if (i == 2) {
- /* gl_PointSize */
- ctx->payload_tiler.primitive_size.pointer = varying_address;
- }
-
- /* Varyings appear to need 64-byte alignment */
- ctx->varying_height += ALIGN(varyings[i].size, 64);
-
- /* Ensure that we fit */
- assert(ctx->varying_height < ctx->varying_mem.size);
- }
-
ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr));
- mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr));
- ctx->payload_vertex.postfix.varyings = varyings_p;
- ctx->payload_tiler.postfix.varyings = varyings_p;
+ panfrost_emit_varying_descriptor(ctx, invocation_count);
}
/* Go through dirty flags and actualise them in the cmdstream. */
struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
/* Late shader descriptor assignments */
+
vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX];
vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX];
vs->tripipe->midgard1.unknown1 = 0x2201;
ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4;
-
- /* Varying descriptor is tied to the vertex shader. Also the
- * fragment shader, I suppose, but it's generated with the
- * vertex shader so */
-
- struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings;
-
- ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor;
- ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment;
}
if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
int blend_work_count;
};
-/* Internal varyings descriptor */
-struct panfrost_varyings {
- /* Varyings information: stride of each chunk of memory used for
- * varyings (similar structure with attributes). Count is just the
- * number of vec4's. Buffer count is the number of varying chunks (<=
- * count). Height is used to calculate gl_Position's position ("it's
- * not a pun, Alyssa!"). Vertex-only varyings == descriptor for
- * gl_Position and something else apparently occupying the same space.
- * Varyings == main varyings descriptors following typical mali_attr
- * conventions. */
-
- unsigned varyings_stride[MAX_VARYINGS];
- unsigned varying_count;
- unsigned varying_buffer_count;
-
- /* Map of the actual varyings buffer */
- uint8_t *varyings_buffer_cpu;
- mali_ptr varyings_descriptor;
- mali_ptr varyings_descriptor_fragment;
-};
-
/* Variants bundle together to form the backing CSO, bundling multiple
* shaders with varying emulated features baked in (alpha test
* parameters, etc) */
int uniform_count;
bool can_discard;
bool writes_point_size;
+ bool reads_point_coord;
- /* Valid for vertex shaders only due to when this is calculated */
- struct panfrost_varyings varyings;
+ unsigned general_varying_stride;
+ struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
/* Information on this particular shader variant */
struct pipe_alpha_state alpha_state;