#include "main/mtypes.h"
#include "compiler/glsl/glsl_to_nir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
#include "compiler/nir_types.h"
#include "main/imports.h"
#include "compiler/nir/nir_builder.h"
* driver seems to do it that way */
#define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
#define M_LOAD_STORE(name, rname, uname) \
static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
.load_store = { \
.op = midgard_op_##name, \
.mask = 0xF, \
- .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \
+ .swizzle = SWIZZLE_XYZW, \
.address = address \
} \
}; \
int temp_count;
int max_hash;
- /* Uniform IDs for mdg */
- struct hash_table_u64 *uniform_nir_to_mdg;
- int uniform_count;
-
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
/* Mapping of texture register -> SSA index for unaliasing */
int texture_index[2];
- /* Count of special uniforms (viewport, etc) in vec4 units */
- int special_uniforms;
-
/* If any path hits a discard instruction */
bool can_discard;
/* The index corresponding to the fragment output */
unsigned fragment_output;
+
+ /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+ unsigned sysvals[MAX_SYSVAL_COUNT];
+ unsigned sysval_count;
+ struct hash_table_u64 *sysval_to_id;
} compiler_context;
/* Append instruction to end of current block */
return glsl_count_attribute_slots(type, false);
}
+static int
+uniform_type_size(const struct glsl_type *type)
+{
+ return st_glsl_storage_type_size(type, false);
+}
+
/* Lower fdot2 to a vector multiplication followed by channel addition */
static void
midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
}
+static int
+midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_viewport_scale:
+ return PAN_SYSVAL_VIEWPORT_SCALE;
+ case nir_intrinsic_load_viewport_offset:
+ return PAN_SYSVAL_VIEWPORT_OFFSET;
+ default:
+ return -1;
+ }
+}
+
+static void
+midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+{
+ int sysval = -1;
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ sysval = midgard_nir_sysval_for_intrinsic(intr);
+ }
+
+ if (sysval < 0)
+ return;
+
+ /* We have a sysval load; check if it's already been assigned */
+
+ if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
+ return;
+
+ /* It hasn't -- so assign it now! */
+
+ unsigned id = ctx->sysval_count++;
+ _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
+ ctx->sysvals[id] = sysval;
+}
+
+static void
+midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
+{
+ ctx->sysval_count = 0;
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl) continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ midgard_nir_assign_sysval_body(ctx, instr);
+ }
+ }
+ }
+}
+
static bool
midgard_nir_lower_fdot2(nir_shader *shader)
{
progress = false;
NIR_PASS(progress, nir, midgard_nir_lower_algebraic);
- NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
#undef ALU_CASE
+static void
+emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset)
+{
+ /* TODO: half-floats */
+
+ if (offset < ctx->uniform_cutoff) {
+ /* Fast path: For the first 16 uniform,
+ * accesses are 0-cycle, since they're
+ * just a register fetch in the usual
+ * case. So, we alias the registers
+ * while we're still in SSA-space */
+
+ int reg_slot = 23 - offset;
+ alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
+ } else {
+ /* Otherwise, read from the 'special'
+ * UBO to access higher-indexed
+ * uniforms, at a performance cost */
+
+ midgard_instruction ins = m_load_uniform_32(dest, offset);
+
+ /* TODO: Don't split */
+ ins.load_store.varying_parameters = (offset & 7) << 7;
+ ins.load_store.address = offset >> 3;
+
+ ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
+ emit_mir_instruction(ctx, ins);
+ }
+}
+
+static void
+emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+ /* First, pull out the destination */
+ unsigned dest = nir_dest_index(ctx, &instr->dest);
+
+ /* Now, figure out which uniform this is */
+ int sysval = midgard_nir_sysval_for_intrinsic(instr);
+ void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+
+ /* Sysvals are prefix uniforms */
+ unsigned uniform = ((uintptr_t) val) - 1;
+
+ emit_uniform_read(ctx, dest, uniform);
+}
+
static void
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
{
reg = nir_dest_index(ctx, &instr->dest);
if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
- /* TODO: half-floats */
-
- int uniform_offset = 0;
-
- if (offset >= SPECIAL_UNIFORM_BASE) {
- /* XXX: Resolve which uniform */
- uniform_offset = 0;
- } else {
- /* Offset away from the special
- * uniform block */
-
- void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1);
-
- /* XXX */
- if (!entry) {
- DBG("WARNING: Unknown uniform %d\n", offset);
- break;
- }
-
- uniform_offset = (uintptr_t) (entry) - 1;
- uniform_offset += ctx->special_uniforms;
- }
-
- if (uniform_offset < ctx->uniform_cutoff) {
- /* Fast path: For the first 16 uniform,
- * accesses are 0-cycle, since they're
- * just a register fetch in the usual
- * case. So, we alias the registers
- * while we're still in SSA-space */
-
- int reg_slot = 23 - uniform_offset;
- alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot));
- } else {
- /* Otherwise, read from the 'special'
- * UBO to access higher-indexed
- * uniforms, at a performance cost */
-
- midgard_instruction ins = m_load_uniform_32(reg, uniform_offset);
-
- /* TODO: Don't split */
- ins.load_store.varying_parameters = (uniform_offset & 7) << 7;
- ins.load_store.address = uniform_offset >> 3;
-
- ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
- emit_mir_instruction(ctx, ins);
- }
+ emit_uniform_read(ctx, reg, ctx->sysval_count + offset);
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
/* XXX: Half-floats? */
/* TODO: swizzle, mask */
_mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v);
break;
+ case nir_intrinsic_load_viewport_scale:
+ case nir_intrinsic_load_viewport_offset:
+ emit_sysval_read(ctx, instr);
+ break;
default:
printf ("Unhandled intrinsic\n");
* */
static void
-write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no)
+write_transformed_position(nir_builder *b, nir_src input_point_src)
{
nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4);
+ nir_ssa_def *scale = nir_load_viewport_scale(b);
+ nir_ssa_def *offset = nir_load_viewport_offset(b);
- /* Get viewport from the uniforms */
- nir_intrinsic_instr *load;
- load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
- load->num_components = 4;
- load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
- nir_builder_instr_insert(b, &load->instr);
-
- /* Formatted as <width, height, centerx, centery> */
- nir_ssa_def *viewport_vec4 = &load->dest.ssa;
- nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0);
- nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1);
- nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4);
-
- /* XXX: From uniforms? */
- nir_ssa_def *depth_near = nir_imm_float(b, 0.0);
- nir_ssa_def *depth_far = nir_imm_float(b, 1.0);
-
- /* World space to normalised device coordinates */
+ /* World space to normalised device coordinates to screen space */
nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3));
nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip);
-
- /* Normalised device coordinates to screen space */
-
- nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2);
- nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset);
-
- nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
- nir_ssa_def *depth_offset = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
- nir_ssa_def *screen_depth = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset);
+ nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset);
/* gl_Position will be written out in screenspace xyz, with w set to
* the reciprocal we computed earlier. The transformed w component is
* used in depth clipping computations */
nir_ssa_def *screen_space = nir_vec4(b,
- nir_channel(b, viewport_xy, 0),
- nir_channel(b, viewport_xy, 1),
- screen_depth,
+ nir_channel(b, screen, 0),
+ nir_channel(b, screen, 1),
+ nir_channel(b, screen, 2),
w_recip);
/* Finally, write out the transformed values to the varying */
nir_builder_init(&b, func->impl);
b.cursor = nir_before_instr(instr);
- write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT);
+ write_transformed_position(&b, intr->src[0]);
nir_instr_remove(instr);
}
}
/* TODO: Decide this at runtime */
ctx->uniform_cutoff = 8;
- switch (ctx->stage) {
- case MESA_SHADER_VERTEX:
- ctx->special_uniforms = 1;
- break;
-
- default:
- ctx->special_uniforms = 0;
- break;
- }
-
- /* Append epilogue uniforms if necessary. The cmdstream depends on
- * these being at the -end-; see assign_var_locations. */
-
- if (ctx->stage == MESA_SHADER_VERTEX) {
- nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport");
- }
-
/* Assign var locations early, so the epilogue can use them if necessary */
nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
- nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
+ nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size);
/* Initialize at a global (not block) level hash tables */
ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL);
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+ ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- /* Assign actual uniform location, skipping over samplers */
-
- ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
- nir_foreach_variable(var, &nir->uniforms) {
- if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
-
- unsigned length = glsl_get_aoa_size(var->type);
-
- if (!length) {
- length = glsl_get_length(var->type);
- }
-
- if (!length) {
- length = glsl_get_matrix_columns(var->type);
- }
-
- for (int col = 0; col < length; ++col) {
- int id = ctx->uniform_count++;
- _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
- }
- }
-
/* Record the varying mapping for the command stream's bookkeeping */
struct exec_list *varyings =
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
- NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+
+ NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0);
+ NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0);
/* Append vertex epilogue before optimisation, so the epilogue itself
* is optimised */
nir_print_shader(nir, stdout);
}
- /* Assign counts, now that we're sure (post-optimisation) */
+ /* Assign sysvals and counts, now that we're sure
+ * (post-optimisation) */
+
+ midgard_nir_assign_sysvals(ctx, nir);
+
program->uniform_count = nir->num_uniforms;
+ program->sysval_count = ctx->sysval_count;
+ memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
-
nir_foreach_function(func, nir) {
if (!func->impl)
continue;
}
}
- /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
/* For flipped-Y buffers (signaled by negative scale), the translate is
if (invert_y)
translate_y = ctx->pipe_framebuffer.height - translate_y;
- float viewport_vec4[] = {
- vp->scale[0],
- fabsf(vp->scale[1]),
-
- vp->translate[0],
- translate_y
- };
-
- for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
+ for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
- if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) {
- /* It doesn't matter if we don't use all the memory;
- * we'd need a dummy UBO anyway. Compute the max */
-
- size_t size = sizeof(viewport_vec4) + buf->size;
- struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
+ struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
+ struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
+ struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
- /* Keep track how much we've uploaded */
- off_t offset = 0;
+ /* Allocate room for the sysval and the uniforms */
+ size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+ size_t size = sys_size + buf->size;
+ struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
- if (i == PIPE_SHADER_VERTEX) {
- /* Upload viewport */
- memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4));
- offset += sizeof(viewport_vec4);
+ /* Upload sysvals requested by the shader */
+ float *uniforms = (float *) transfer.cpu;
+ for (unsigned i = 0; i < ss->sysval_count; ++i) {
+ int sysval = ss->sysval[i];
+
+ if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
+ uniforms[4*i + 0] = vp->scale[0];
+ uniforms[4*i + 1] = fabsf(vp->scale[1]);
+ uniforms[4*i + 2] = vp->scale[2];
+ } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
+ uniforms[4*i + 0] = vp->translate[0];
+ uniforms[4*i + 1] = translate_y;
+ uniforms[4*i + 2] = vp->translate[2];
+ } else {
+ assert(0);
}
+ }
- /* Upload uniforms */
- memcpy(transfer.cpu + offset, buf->buffer, buf->size);
+ /* Upload uniforms */
+ memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
- int uniform_count = 0;
+ int uniform_count = 0;
- struct mali_vertex_tiler_postfix *postfix;
+ struct mali_vertex_tiler_postfix *postfix;
- switch (i) {
- case PIPE_SHADER_VERTEX:
- uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
- postfix = &ctx->payload_vertex.postfix;
- break;
+ switch (i) {
+ case PIPE_SHADER_VERTEX:
+ uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
+ postfix = &ctx->payload_vertex.postfix;
+ break;
- case PIPE_SHADER_FRAGMENT:
- uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
- postfix = &ctx->payload_tiler.postfix;
- break;
+ case PIPE_SHADER_FRAGMENT:
+ uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
+ postfix = &ctx->payload_tiler.postfix;
+ break;
- default:
- DBG("Unknown shader stage %d in uniform upload\n", i);
- assert(0);
- }
+ default:
+ DBG("Unknown shader stage %d in uniform upload\n", i);
+ assert(0);
+ }
- /* Also attach the same buffer as a UBO for extended access */
+ /* Also attach the same buffer as a UBO for extended access */
- struct mali_uniform_buffer_meta uniform_buffers[] = {
- {
- .size = MALI_POSITIVE((2 + uniform_count)),
- .ptr = transfer.gpu >> 2,
- },
- };
+ struct mali_uniform_buffer_meta uniform_buffers[] = {
+ {
+ .size = MALI_POSITIVE((2 + uniform_count)),
+ .ptr = transfer.gpu >> 2,
+ },
+ };
- mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
- postfix->uniforms = transfer.gpu;
- postfix->uniform_buffers = ubufs;
+ mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
+ postfix->uniforms = transfer.gpu;
+ postfix->uniform_buffers = ubufs;
- buf->dirty = 0;
- }
+ buf->dirty = 0;
}
/* TODO: Upload the viewport somewhere more appropriate */