#include "tgsi/tgsi_dump.h"
void
-panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state)
+panfrost_shader_compile(
+ struct panfrost_context *ctx,
+ struct mali_shader_meta *meta,
+ enum pipe_shader_ir ir_type,
+ const void *ir,
+ gl_shader_stage stage,
+ struct panfrost_shader_state *state,
+ uint64_t *outputs_written)
{
+ struct panfrost_screen *screen = pan_screen(ctx->base.screen);
uint8_t *dst;
nir_shader *s;
- struct pipe_shader_state *cso = state->base;
-
- if (cso->type == PIPE_SHADER_IR_NIR) {
- s = nir_shader_clone(NULL, cso->ir.nir);
+ if (ir_type == PIPE_SHADER_IR_NIR) {
+ s = nir_shader_clone(NULL, ir);
} else {
- assert (cso->type == PIPE_SHADER_IR_TGSI);
- //tgsi_dump(cso->tokens, 0);
- s = tgsi_to_nir(cso->tokens, ctx->base.screen);
+ assert (ir_type == PIPE_SHADER_IR_TGSI);
+ s = tgsi_to_nir(ir, ctx->base.screen);
}
- s->info.stage = type == JOB_TYPE_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_FRAGMENT;
+ s->info.stage = stage;
- if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ if (stage == MESA_SHADER_FRAGMENT) {
/* Inject the alpha test now if we need to */
if (state->alpha_state.enabled) {
.alpha_ref = state->alpha_state.ref_value
};
- midgard_compile_shader_nir(s, &program, false);
+ midgard_compile_shader_nir(&ctx->compiler, s, &program, false);
/* Prepare the compiled binary for upload */
int size = program.compiled.size;
* I bet someone just thought that would be a cute pun. At least,
* that's how I'd do it. */
- meta->shader = panfrost_upload(&ctx->shaders, dst, size, true) | program.first_tag;
+ state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
+ memcpy(state->bo->cpu, dst, size);
+ meta->shader = state->bo->gpu | program.first_tag;
util_dynarray_fini(&program.compiled);
+ /* Sysvals are prepended */
+ program.uniform_count += program.sysval_count;
+ state->sysval_count = program.sysval_count;
+ memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
+
meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
- meta->attribute_count = program.attribute_count;
- meta->varying_count = program.varying_count;
meta->midgard1.work_count = program.work_register_count;
- state->can_discard = program.can_discard;
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ meta->attribute_count = util_bitcount64(s->info.inputs_read);
+ meta->varying_count = util_bitcount64(s->info.outputs_written);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ meta->attribute_count = 0;
+ meta->varying_count = util_bitcount64(s->info.inputs_read);
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* TODO: images */
+ meta->attribute_count = 0;
+ meta->varying_count = 0;
+ break;
+ default:
+ unreachable("Unknown shader state");
+ }
+
+ state->can_discard = s->info.fs.uses_discard;
state->writes_point_size = program.writes_point_size;
+ state->reads_point_coord = false;
+ state->helper_invocations = s->info.fs.needs_helper_invocations;
+
+ if (outputs_written)
+ *outputs_written = s->info.outputs_written;
/* Separate as primary uniform count is truncated */
state->uniform_count = program.uniform_count;
- /* gl_Position eats up an extra spot */
- if (type == JOB_TYPE_VERTEX)
- meta->varying_count += 1;
-
- /* Note: gl_FragCoord does -not- eat an extra spot; it will be included
- * in our count if we need it */
-
meta->midgard1.unknown2 = 8; /* XXX */
- /* Varyings are known only through the shader. We choose to upload this
- * information with the vertex shader, though the choice is perhaps
- * arbitrary */
-
- if (type == JOB_TYPE_VERTEX) {
- struct panfrost_varyings *varyings = &state->varyings;
-
- /* Measured in vec4 words. Don't include gl_Position */
- int varying_count = program.varying_count;
-
- /* Setup two buffers, one for position, the other for normal
- * varyings, as seen in traces. TODO: Are there other
- * configurations we might use? */
-
- varyings->varying_buffer_count = 2;
+ unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
+ unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
+ unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
- /* mediump vec4s sequentially */
- varyings->varyings_stride[0] = (2 * sizeof(float)) * varying_count;
+ /* Iterate the varyings and emit the corresponding descriptor */
+ for (unsigned i = 0; i < meta->varying_count; ++i) {
+ unsigned location = program.varyings[i];
- /* highp gl_Position */
- varyings->varyings_stride[1] = 4 * sizeof(float);
-
- /* mediump gl_PointSize */
- if (program.writes_point_size) {
- ++varyings->varying_buffer_count;
- varyings->varyings_stride[2] = 2; /* sizeof(fp16) */
- }
-
- /* Setup gl_Position, its weirdo analogue, and gl_PointSize (optionally) */
- unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
- unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
-
- struct mali_attr_meta vertex_special_varyings[] = {
- {
- .index = 1,
- .format = MALI_VARYING_POS,
-
- .swizzle = default_vec4_swizzle,
- .unknown1 = 0x2,
- },
- {
- .index = 1,
- .format = MALI_RGBA16F,
-
- /* TODO: Wat? yyyy swizzle? */
- .swizzle = 0x249,
- .unknown1 = 0x0,
- },
- {
- .index = 2,
- .format = MALI_R16F,
- .swizzle = default_vec1_swizzle,
- .unknown1 = 0x2
- }
+ /* Default to a vec4 varying */
+ struct mali_attr_meta v = {
+ .format = MALI_RGBA32F,
+ .swizzle = default_vec4_swizzle,
+ .unknown1 = 0x2,
};
- /* How many special vertex varyings are actually required? */
- int vertex_special_count = 2 + (program.writes_point_size ? 1 : 0);
+ /* Check for special cases, otherwise assume general varying */
- /* Setup actual varyings. XXX: Don't assume vec4 */
+ if (location == VARYING_SLOT_POS) {
+ if (stage == MESA_SHADER_FRAGMENT)
+ state->reads_frag_coord = true;
+ else
+ v.format = MALI_VARYING_POS;
+ } else if (location == VARYING_SLOT_PSIZ) {
+ v.format = MALI_R16F;
+ v.swizzle = default_vec1_swizzle;
- struct mali_attr_meta mali_varyings[PIPE_MAX_ATTRIBS];
+ state->writes_point_size = true;
+ } else if (location == VARYING_SLOT_PNTC) {
+ v.format = MALI_RG16F;
+ v.swizzle = default_vec2_swizzle;
- for (int i = 0; i < varying_count; ++i) {
- struct mali_attr_meta vec4_varying_meta = {
- .index = 0,
- .format = MALI_RGBA16F,
- .swizzle = default_vec4_swizzle,
- .unknown1 = 0x2,
+ state->reads_point_coord = true;
+ } else if (location == VARYING_SLOT_FACE) {
+ v.format = MALI_R32I;
+ v.swizzle = default_vec1_swizzle;
- /* Set offset to keep everything back-to-back in
- * the same buffer */
- .src_offset = 8 * i,
- };
-
- mali_varyings[i] = vec4_varying_meta;
+ state->reads_face = true;
}
- /* We don't count the weirdo gl_Position in our varying count */
- varyings->varying_count = varying_count - 1;
-
- /* In this context, position_meta represents the implicit
- * gl_FragCoord varying. So, upload all the varyings */
-
- unsigned varyings_size = sizeof(struct mali_attr_meta) * varyings->varying_count;
- unsigned vertex_special_size = sizeof(struct mali_attr_meta) * vertex_special_count;
- unsigned vertex_size = vertex_special_size + varyings_size;
- unsigned fragment_size = varyings_size + sizeof(struct mali_attr_meta);
-
- struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, vertex_size + fragment_size, HEAP_DESCRIPTOR);
-
- /* Copy varyings in the follow order:
- * - Position 1, 2
- * - Varyings 1, 2, ..., n
- * - Varyings 1, 2, ..., n (duplicate)
- * - Position 1
- */
-
- memcpy(transfer.cpu, vertex_special_varyings, vertex_special_size);
- memcpy(transfer.cpu + vertex_special_size, mali_varyings, varyings_size);
- memcpy(transfer.cpu + vertex_size, mali_varyings, varyings_size);
- memcpy(transfer.cpu + vertex_size + varyings_size, &vertex_special_varyings[0], sizeof(struct mali_attr_meta));
-
- /* Point to the descriptor */
- varyings->varyings_buffer_cpu = transfer.cpu;
- varyings->varyings_descriptor = transfer.gpu;
- varyings->varyings_descriptor_fragment = transfer.gpu + vertex_size;
+ state->varyings[i] = v;
+ state->varyings_loc[i] = location;
}
}