#include "main/mtypes.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/nir_types.h"
-#include "main/imports.h"
+#include "util/imports.h"
#include "compiler/nir/nir_builder.h"
#include "util/half_float.h"
#include "util/u_math.h"
{
midgard_block *blk = rzalloc(ctx, midgard_block);
- blk->predecessors = _mesa_set_create(blk,
+ blk->base.predecessors = _mesa_set_create(blk,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
- blk->source_id = ctx->block_source_count++;
+ blk->base.name = ctx->block_source_count++;
return blk;
}
-static void
-midgard_block_add_successor(midgard_block *block, midgard_block *successor)
-{
- assert(block);
- assert(successor);
-
- /* Deduplicate */
- for (unsigned i = 0; i < block->nr_successors; ++i) {
- if (block->successors[i] == successor)
- return;
- }
-
- block->successors[block->nr_successors++] = successor;
- assert(block->nr_successors <= ARRAY_SIZE(block->successors));
-
- /* Note the predecessor in the other direction */
- _mesa_set_add(successor->predecessors, block);
-}
-
static void
schedule_barrier(compiler_context *ctx)
{
midgard_block *temp = ctx->after_block;
ctx->after_block = create_empty_block(ctx);
ctx->block_count++;
- list_addtail(&ctx->after_block->link, &ctx->blocks);
- list_inithead(&ctx->after_block->instructions);
- midgard_block_add_successor(ctx->current_block, ctx->after_block);
+ list_addtail(&ctx->after_block->base.link, &ctx->blocks);
+ list_inithead(&ctx->after_block->base.instructions);
+ pan_block_add_successor(&ctx->current_block->base, &ctx->after_block->base);
ctx->current_block = ctx->after_block;
ctx->after_block = temp;
}
M_LOAD(ld_ubo_int4);
M_LOAD(ld_int4);
M_STORE(st_int4);
-M_LOAD(ld_color_buffer_8);
+M_LOAD(ld_color_buffer_32u);
//M_STORE(st_vary_16);
M_STORE(st_vary_32);
M_LOAD(ld_cubemap_coords);
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
}
-static int
-midgard_sysval_for_ssbo(nir_intrinsic_instr *instr)
+static bool
+midgard_nir_lower_fdot2(nir_shader *shader)
{
- /* This is way too meta */
- bool is_store = instr->intrinsic == nir_intrinsic_store_ssbo;
- unsigned idx_idx = is_store ? 1 : 0;
-
- nir_src index = instr->src[idx_idx];
- assert(nir_src_is_const(index));
- uint32_t uindex = nir_src_as_uint(index);
-
- return PAN_SYSVAL(SSBO, uindex);
-}
+ bool progress = false;
-static int
-midgard_sysval_for_sampler(nir_intrinsic_instr *instr)
-{
- /* TODO: indirect samplers !!! */
- nir_src index = instr->src[0];
- assert(nir_src_is_const(index));
- uint32_t uindex = nir_src_as_uint(index);
+ nir_foreach_function(function, shader) {
+ if (!function->impl) continue;
- return PAN_SYSVAL(SAMPLER, uindex);
-}
+ nir_builder _b;
+ nir_builder *b = &_b;
+ nir_builder_init(b, function->impl);
-static int
-midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
-{
- switch (instr->intrinsic) {
- case nir_intrinsic_load_viewport_scale:
- return PAN_SYSVAL_VIEWPORT_SCALE;
- case nir_intrinsic_load_viewport_offset:
- return PAN_SYSVAL_VIEWPORT_OFFSET;
- case nir_intrinsic_load_num_work_groups:
- return PAN_SYSVAL_NUM_WORK_GROUPS;
- case nir_intrinsic_load_ssbo:
- case nir_intrinsic_store_ssbo:
- return midgard_sysval_for_ssbo(instr);
- case nir_intrinsic_load_sampler_lod_parameters_pan:
- return midgard_sysval_for_sampler(instr);
- default:
- return ~0;
- }
-}
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_alu) continue;
-static int sysval_for_instr(compiler_context *ctx, nir_instr *instr,
- unsigned *dest)
-{
- nir_intrinsic_instr *intr;
- nir_dest *dst = NULL;
- nir_tex_instr *tex;
- int sysval = -1;
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ midgard_nir_lower_fdot2_body(b, alu);
- bool is_store = false;
+ progress |= true;
+ }
+ }
- switch (instr->type) {
- case nir_instr_type_intrinsic:
- intr = nir_instr_as_intrinsic(instr);
- sysval = midgard_nir_sysval_for_intrinsic(intr);
- dst = &intr->dest;
- is_store |= intr->intrinsic == nir_intrinsic_store_ssbo;
- break;
- case nir_instr_type_tex:
- tex = nir_instr_as_tex(instr);
- if (tex->op != nir_texop_txs)
- break;
+ nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
- sysval = PAN_SYSVAL(TEXTURE_SIZE,
- PAN_TXS_SYSVAL_ID(tex->texture_index,
- nir_tex_instr_dest_size(tex) -
- (tex->is_array ? 1 : 0),
- tex->is_array));
- dst = &tex->dest;
- break;
- default:
- break;
}
- if (dest && dst && !is_store)
- *dest = nir_dest_index(ctx, dst);
-
- return sysval;
+ return progress;
}
-static void
-midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+/* Midgard can't write depth and stencil separately. It has to happen in a
+ * single store operation containing both. Let's add a panfrost specific
+ * intrinsic and turn all depth/stencil stores into a packed depth+stencil
+ * one.
+ */
+static bool
+midgard_nir_lower_zs_store(nir_shader *nir)
{
- int sysval;
-
- sysval = sysval_for_instr(ctx, instr, NULL);
- if (sysval < 0)
- return;
-
- /* We have a sysval load; check if it's already been assigned */
+ if (nir->info.stage != MESA_SHADER_FRAGMENT)
+ return false;
- if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
- return;
+ nir_variable *z_var = NULL, *s_var = NULL;
- /* It hasn't -- so assign it now! */
+ nir_foreach_variable(var, &nir->outputs) {
+ if (var->data.location == FRAG_RESULT_DEPTH)
+ z_var = var;
+ else if (var->data.location == FRAG_RESULT_STENCIL)
+ s_var = var;
+ }
- unsigned id = ctx->sysval_count++;
- _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
- ctx->sysvals[id] = sysval;
-}
+ if (!z_var && !s_var)
+ return false;
-static void
-midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
-{
- ctx->sysval_count = 0;
+ bool progress = false;
- nir_foreach_function(function, shader) {
+ nir_foreach_function(function, nir) {
if (!function->impl) continue;
+ nir_intrinsic_instr *z_store = NULL, *s_store = NULL, *last_store = NULL;
+
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
- midgard_nir_assign_sysval_body(ctx, instr);
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ if (z_var && nir_intrinsic_base(intr) == z_var->data.driver_location) {
+ assert(!z_store);
+ z_store = intr;
+ last_store = intr;
+ }
+
+ if (s_var && nir_intrinsic_base(intr) == s_var->data.driver_location) {
+ assert(!s_store);
+ s_store = intr;
+ last_store = intr;
+ }
}
}
- }
-}
-static bool
-midgard_nir_lower_fdot2(nir_shader *shader)
-{
- bool progress = false;
+ if (!z_store && !s_store) continue;
- nir_foreach_function(function, shader) {
- if (!function->impl) continue;
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
- nir_builder _b;
- nir_builder *b = &_b;
- nir_builder_init(b, function->impl);
+ b.cursor = nir_before_instr(&last_store->instr);
- nir_foreach_block(block, function->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_alu) continue;
+ nir_ssa_def *zs_store_src;
- nir_alu_instr *alu = nir_instr_as_alu(instr);
- midgard_nir_lower_fdot2_body(b, alu);
+ if (z_store && s_store) {
+ nir_ssa_def *srcs[2] = {
+ nir_ssa_for_src(&b, z_store->src[0], 1),
+ nir_ssa_for_src(&b, s_store->src[0], 1),
+ };
- progress |= true;
- }
+ zs_store_src = nir_vec(&b, srcs, 2);
+ } else {
+ zs_store_src = nir_ssa_for_src(&b, last_store->src[0], 1);
}
- nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+ nir_intrinsic_instr *zs_store;
+
+ zs_store = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_store_zs_output_pan);
+ zs_store->src[0] = nir_src_for_ssa(zs_store_src);
+ zs_store->num_components = z_store && s_store ? 2 : 1;
+ nir_intrinsic_set_component(zs_store, z_store ? 0 : 1);
+
+ /* Replace the Z and S store by a ZS store */
+ nir_builder_instr_insert(&b, &zs_store->instr);
+
+ if (z_store)
+ nir_instr_remove(&z_store->instr);
+ if (s_store)
+ nir_instr_remove(&s_store->instr);
+
+ nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+ progress = true;
}
return progress;
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
{
nir_ssa_def def = instr->def;
- float *v = rzalloc_array(NULL, float, 4);
- nir_const_value_to_array(v, instr->value, instr->def.num_components, f32);
+ midgard_constants *consts = rzalloc(NULL, midgard_constants);
+
+ assert(instr->def.num_components * instr->def.bit_size <= sizeof(*consts) * 8);
+
+#define RAW_CONST_COPY(bits) \
+ nir_const_value_to_array(consts->u##bits, instr->value, \
+ instr->def.num_components, u##bits)
+
+ switch (instr->def.bit_size) {
+ case 64:
+ RAW_CONST_COPY(64);
+ break;
+ case 32:
+ RAW_CONST_COPY(32);
+ break;
+ case 16:
+ RAW_CONST_COPY(16);
+ break;
+ case 8:
+ RAW_CONST_COPY(8);
+ break;
+ default:
+ unreachable("Invalid bit_size for load_const instruction\n");
+ }
/* Shifted for SSA, +1 for off-by-one */
- _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1, v);
+ _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1, consts);
}
/* Normally constants are embedded implicitly, but for I/O and such we have to
bool is_ssa = instr->dest.dest.is_ssa;
- unsigned dest = nir_dest_index(ctx, &instr->dest.dest);
+ unsigned dest = nir_dest_index(&instr->dest.dest);
unsigned nr_components = nir_dest_num_components(instr->dest.dest);
unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
ALU_CASE(fexp2, fexp2);
ALU_CASE(flog2, flog2);
+ ALU_CASE(f2i64, f2i_rtz);
+ ALU_CASE(f2u64, f2u_rtz);
+ ALU_CASE(i2f64, i2f_rtz);
+ ALU_CASE(u2f64, u2f_rtz);
+
ALU_CASE(f2i32, f2i_rtz);
ALU_CASE(f2u32, f2u_rtz);
ALU_CASE(i2f32, i2f_rtz);
case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
- case nir_op_u2u64: {
- op = midgard_alu_op_imov;
+ case nir_op_u2u64:
+ case nir_op_f2f16:
+ case nir_op_f2f32:
+ case nir_op_f2f64: {
+ if (instr->op == nir_op_f2f16 || instr->op == nir_op_f2f32 ||
+ instr->op == nir_op_f2f64)
+ op = midgard_alu_op_fmov;
+ else
+ op = midgard_alu_op_imov;
if (dst_bitsize == (src_bitsize * 2)) {
/* Converting up */
break;
}
- case nir_op_f2f16: {
- assert(src_bitsize == 32);
-
- op = midgard_alu_op_fmov;
- dest_override = midgard_dest_override_lower;
- break;
- }
-
- case nir_op_f2f32: {
- assert(src_bitsize == 16);
-
- op = midgard_alu_op_fmov;
- half_2 = true;
- reg_mode++;
- break;
- }
-
-
/* For greater-or-equal, we lower to less-or-equal and flip the
* arguments */
ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
ins.has_constants = true;
- if (instr->op == nir_op_b2f32) {
- float f = 1.0f;
- memcpy(&ins.constants, &f, sizeof(float));
- } else {
- ins.constants[0] = 1;
- }
-
+ if (instr->op == nir_op_b2f32)
+ ins.constants.f32[0] = 1.0f;
+ else
+ ins.constants.i32[0] = 1;
for (unsigned c = 0; c < 16; ++c)
ins.swizzle[1][c] = 0;
ins.has_inline_constant = false;
ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
ins.has_constants = true;
- ins.constants[0] = 0;
+ ins.constants.u32[0] = 0;
for (unsigned c = 0; c < 16; ++c)
ins.swizzle[1][c] = 0;
}
/* Once we have the NIR mask, we need to normalize to work in 32-bit space */
- unsigned bytemask = mir_to_bytemask(mir_mode_for_destsize(dsize), nir_mask);
+ unsigned bytemask = pan_to_bytemask(dsize, nir_mask);
mir_set_bytemask(ins, bytemask);
if (dsize == 64)
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
* optimized) versions of UBO #0 */
-midgard_instruction *
+static midgard_instruction *
emit_ubo_read(
compiler_context *ctx,
nir_instr *instr,
unsigned dest,
unsigned offset,
nir_src *indirect_offset,
+ unsigned indirect_shift,
unsigned index)
{
/* TODO: half-floats */
midgard_instruction ins = m_ld_ubo_int4(dest, 0);
- ins.constants[0] = offset;
+ ins.constants.u32[0] = offset;
if (instr->type == nir_instr_type_intrinsic)
mir_set_intr_mask(instr, &ins, true);
if (indirect_offset) {
ins.src[2] = nir_src_index(ctx, indirect_offset);
- ins.load_store.arg_2 = 0x80;
+ ins.load_store.arg_2 = (indirect_shift << 5);
} else {
ins.load_store.arg_2 = 0x1E;
}
return emit_mir_instruction(ctx, ins);
}
-/* SSBO reads are like UBO reads if you squint */
+/* Globals are like UBOs if you squint. And shared memory is like globals if
+ * you squint even harder */
static void
-emit_ssbo_access(
+emit_global(
compiler_context *ctx,
nir_instr *instr,
bool is_read,
unsigned srcdest,
- unsigned offset,
- nir_src *indirect_offset,
- unsigned index)
+ nir_src *offset,
+ bool is_shared)
{
/* TODO: types */
midgard_instruction ins;
if (is_read)
- ins = m_ld_int4(srcdest, offset);
+ ins = m_ld_int4(srcdest, 0);
else
- ins = m_st_int4(srcdest, offset);
-
- /* SSBO reads use a generic memory read interface, so we need the
- * address of the SSBO as the first argument. This is a sysval. */
-
- unsigned addr = make_compiler_temp(ctx);
- emit_sysval_read(ctx, instr, addr, 2);
-
- /* The source array:
- *
- * src[0] = store ? value : unused
- * src[1] = arg_1
- * src[2] = arg_2
- *
- * We would like arg_1 = the address and
- * arg_2 = the offset.
- */
-
- ins.src[1] = addr;
-
- /* TODO: What is this? It looks superficially like a shift << 5, but
- * arg_1 doesn't take a shift Should it be E0 or A0? We also need the
- * indirect offset. */
-
- if (indirect_offset) {
- ins.load_store.arg_1 |= 0xE0;
- ins.src[2] = nir_src_index(ctx, indirect_offset);
- } else {
- ins.load_store.arg_2 = 0x7E;
- }
-
- /* TODO: Bounds check */
-
- /* Finally, we emit the direct offset */
+ ins = m_st_int4(srcdest, 0);
- ins.load_store.varying_parameters = (offset & 0x1FF) << 1;
- ins.load_store.address = (offset >> 9);
+ mir_set_offset(ctx, &ins, offset, is_shared);
mir_set_intr_mask(instr, &ins, is_read);
emit_mir_instruction(ctx, ins);
emit_mir_instruction(ctx, ins);
}
-void
-emit_sysval_read(compiler_context *ctx, nir_instr *instr, signed dest_override,
- unsigned nr_components)
+static void
+emit_sysval_read(compiler_context *ctx, nir_instr *instr,
+ unsigned nr_components, unsigned offset)
{
- unsigned dest = 0;
+ nir_dest nir_dest;
/* Figure out which uniform this is */
- int sysval = sysval_for_instr(ctx, instr, &dest);
- void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+ int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
+ void *val = _mesa_hash_table_u64_search(ctx->sysvals.sysval_to_id, sysval);
- if (dest_override >= 0)
- dest = dest_override;
+ unsigned dest = nir_dest_index(&nir_dest);
/* Sysvals are prefix uniforms */
unsigned uniform = ((uintptr_t) val) - 1;
/* Emit the read itself -- this is never indirect */
midgard_instruction *ins =
- emit_ubo_read(ctx, instr, dest, uniform * 16, NULL, 0);
+ emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, 0);
ins->mask = mask_of(nr_components);
}
}
}
-/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
- * Handle MRT here */
-static void
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
-
static void
-emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
+emit_fragment_store(compiler_context *ctx, unsigned src, enum midgard_rt_id rt)
{
+ assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+
+ midgard_instruction *br = ctx->writeout_branch[rt];
+
+ assert(!br);
+
emit_explicit_constant(ctx, src, src);
struct midgard_instruction ins =
/* Add dependencies */
ins.src[0] = src;
- ins.constants[0] = rt * 0x100;
+ ins.constants.u32[0] = rt == MIDGARD_ZS_RT ?
+ 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
/* Emit the branch */
- midgard_instruction *br = emit_mir_instruction(ctx, ins);
+ br = emit_mir_instruction(ctx, ins);
schedule_barrier(ctx);
- br->branch.target_block = ctx->block_count - 1;
+ ctx->writeout_branch[rt] = br;
- emit_fragment_epilogue(ctx, rt);
+ /* Push our current location = current block count - 1 = where we'll
+ * jump to. Maybe a bit too clever for my own good */
+
+ br->branch.target_block = ctx->block_count - 1;
}
static void
emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
{
- unsigned reg = nir_dest_index(ctx, &instr->dest);
+ unsigned reg = nir_dest_index(&instr->dest);
midgard_instruction ins = m_ld_compute_id(reg, 0);
ins.mask = mask_of(3);
+ ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
emit_mir_instruction(ctx, ins);
}
static void
emit_vertex_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
{
- unsigned reg = nir_dest_index(ctx, &instr->dest);
+ unsigned reg = nir_dest_index(&instr->dest);
emit_attr_read(ctx, reg, vertex_builtin_arg(instr->intrinsic), 1, nir_type_int);
}
+static void
+emit_control_barrier(compiler_context *ctx)
+{
+ midgard_instruction ins = {
+ .type = TAG_TEXTURE_4,
+ .src = { ~0, ~0, ~0, ~0 },
+ .texture = {
+ .op = TEXTURE_OP_BARRIER,
+
+ /* TODO: optimize */
+ .barrier_buffer = 1,
+ .barrier_shared = 1
+ }
+ };
+
+ emit_mir_instruction(ctx, ins);
+}
+
+static const nir_variable *
+search_var(struct exec_list *vars, unsigned driver_loc)
+{
+ nir_foreach_variable(var, vars) {
+ if (var->data.driver_location == driver_loc)
+ return var;
+ }
+
+ return NULL;
+}
+
static void
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
{
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_global:
+ case nir_intrinsic_load_shared:
case nir_intrinsic_load_input:
case nir_intrinsic_load_interpolated_input: {
bool is_uniform = instr->intrinsic == nir_intrinsic_load_uniform;
bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
- bool is_ssbo = instr->intrinsic == nir_intrinsic_load_ssbo;
+ bool is_global = instr->intrinsic == nir_intrinsic_load_global;
+ bool is_shared = instr->intrinsic == nir_intrinsic_load_shared;
bool is_flat = instr->intrinsic == nir_intrinsic_load_input;
bool is_interp = instr->intrinsic == nir_intrinsic_load_interpolated_input;
/* Get the base type of the intrinsic */
/* TODO: Infer type? Does it matter? */
nir_alu_type t =
- (is_ubo || is_ssbo) ? nir_type_uint :
+ (is_ubo || is_global || is_shared) ? nir_type_uint :
(is_interp) ? nir_type_float :
nir_intrinsic_type(instr);
t = nir_alu_type_get_base_type(t);
- if (!(is_ubo || is_ssbo)) {
+ if (!(is_ubo || is_global)) {
offset = nir_intrinsic_base(instr);
}
/* We may need to apply a fractional offset */
int component = (is_flat || is_interp) ?
nir_intrinsic_component(instr) : 0;
- reg = nir_dest_index(ctx, &instr->dest);
+ reg = nir_dest_index(&instr->dest);
if (is_uniform && !ctx->is_blend) {
- emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysval_count + offset) * 16, indirect_offset, 0);
+ emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count + offset) * 16, indirect_offset, 4, 0);
} else if (is_ubo) {
nir_src index = instr->src[0];
- /* We don't yet support indirect UBOs. For indirect
- * block numbers (if that's possible), we don't know
- * enough about the hardware yet. For indirect sources,
- * we know what we need but we need to add some NIR
- * support for lowering correctly with respect to
- * 128-bit reads */
-
+ /* TODO: Is indirect block number possible? */
assert(nir_src_is_const(index));
- assert(nir_src_is_const(*src_offset));
uint32_t uindex = nir_src_as_uint(index) + 1;
- emit_ubo_read(ctx, &instr->instr, reg, offset, NULL, uindex);
- } else if (is_ssbo) {
- nir_src index = instr->src[0];
- assert(nir_src_is_const(index));
- uint32_t uindex = nir_src_as_uint(index);
-
- emit_ssbo_access(ctx, &instr->instr, true, reg, offset, indirect_offset, uindex);
+ emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
+ } else if (is_global || is_shared) {
+ emit_global(ctx, &instr->instr, true, reg, src_offset, is_shared);
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t, is_flat);
} else if (ctx->is_blend) {
/* Artefact of load_interpolated_input. TODO: other barycentric modes */
case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid:
break;
/* Reads 128-bit value raw off the tilebuffer during blending, tasty */
case nir_intrinsic_load_raw_output_pan:
case nir_intrinsic_load_output_u8_as_fp16_pan:
- reg = nir_dest_index(ctx, &instr->dest);
+ reg = nir_dest_index(&instr->dest);
assert(ctx->is_blend);
/* T720 and below use different blend opcodes with slightly
* different semantics than T760 and up */
- midgard_instruction ld = m_ld_color_buffer_8(reg, 0);
+ midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
bool old_blend = ctx->quirks & MIDGARD_OLD_BLEND;
if (instr->intrinsic == nir_intrinsic_load_output_u8_as_fp16_pan) {
case nir_intrinsic_load_blend_const_color_rgba: {
assert(ctx->is_blend);
- reg = nir_dest_index(ctx, &instr->dest);
+ reg = nir_dest_index(&instr->dest);
/* Blend constants are embedded directly in the shader and
* patched in, so we use some magic routing */
break;
}
+ case nir_intrinsic_store_zs_output_pan: {
+ assert(ctx->stage == MESA_SHADER_FRAGMENT);
+ emit_fragment_store(ctx, nir_src_index(ctx, &instr->src[0]),
+ MIDGARD_ZS_RT);
+
+ midgard_instruction *br = ctx->writeout_branch[MIDGARD_ZS_RT];
+
+ if (!nir_intrinsic_component(instr))
+ br->writeout_depth = true;
+ if (nir_intrinsic_component(instr) ||
+ instr->num_components)
+ br->writeout_stencil = true;
+ assert(br->writeout_depth | br->writeout_stencil);
+ break;
+ }
+
case nir_intrinsic_store_output:
assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
reg = nir_src_index(ctx, &instr->src[0]);
if (ctx->stage == MESA_SHADER_FRAGMENT) {
- /* Determine number of render targets */
- emit_fragment_store(ctx, reg, offset);
+ const nir_variable *var;
+ enum midgard_rt_id rt;
+
+ var = search_var(&ctx->nir->outputs,
+ nir_intrinsic_base(instr));
+ assert(var);
+ if (var->data.location == FRAG_RESULT_COLOR)
+ rt = MIDGARD_COLOR_RT0;
+ else if (var->data.location >= FRAG_RESULT_DATA0)
+ rt = MIDGARD_COLOR_RT0 + var->data.location -
+ FRAG_RESULT_DATA0;
+ else
+ assert(0);
+
+ emit_fragment_store(ctx, reg, rt);
} else if (ctx->stage == MESA_SHADER_VERTEX) {
/* We should have been vectorized, though we don't
* currently check that st_vary is emitted only once
emit_explicit_constant(ctx, reg, reg);
- unsigned component = nir_intrinsic_component(instr);
+ unsigned dst_component = nir_intrinsic_component(instr);
unsigned nr_comp = nir_src_num_components(instr->src[0]);
midgard_instruction st = m_st_vary_32(reg, offset);
break;
}
- for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle[0]); ++i)
- st.swizzle[0][i] = MIN2(i + component, nr_comp);
+ /* nir_intrinsic_component(store_intr) encodes the
+ * destination component start. Source component offset
+ * adjustment is taken care of in
+ * install_registers_instr(), when offset_swizzle() is
+ * called.
+ */
+ unsigned src_component = COMPONENT_X;
+
+ assert(nr_comp > 0);
+ for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle); ++i) {
+ st.swizzle[0][i] = src_component;
+ if (i >= dst_component && i < dst_component + nr_comp - 1)
+ src_component++;
+ }
emit_mir_instruction(ctx, st);
} else {
break;
- case nir_intrinsic_store_ssbo:
- assert(nir_src_is_const(instr->src[1]));
-
- bool direct_offset = nir_src_is_const(instr->src[2]);
- offset = direct_offset ? nir_src_as_uint(instr->src[2]) : 0;
- nir_src *indirect_offset = direct_offset ? NULL : &instr->src[2];
+ case nir_intrinsic_store_global:
+ case nir_intrinsic_store_shared:
reg = nir_src_index(ctx, &instr->src[0]);
+ emit_explicit_constant(ctx, reg, reg);
- uint32_t uindex = nir_src_as_uint(instr->src[1]);
+ emit_global(ctx, &instr->instr, false, reg, &instr->src[1], instr->intrinsic == nir_intrinsic_store_shared);
+ break;
- emit_explicit_constant(ctx, reg, reg);
- emit_ssbo_access(ctx, &instr->instr, false, reg, offset, indirect_offset, uindex);
+ case nir_intrinsic_load_ssbo_address:
+ emit_sysval_read(ctx, &instr->instr, 1, 0);
break;
+ case nir_intrinsic_get_buffer_size:
+ emit_sysval_read(ctx, &instr->instr, 1, 8);
+ break;
+
case nir_intrinsic_load_viewport_scale:
case nir_intrinsic_load_viewport_offset:
case nir_intrinsic_load_num_work_groups:
case nir_intrinsic_load_sampler_lod_parameters_pan:
- emit_sysval_read(ctx, &instr->instr, ~0, 3);
+ emit_sysval_read(ctx, &instr->instr, 3, 0);
break;
case nir_intrinsic_load_work_group_id:
emit_vertex_builtin(ctx, instr);
break;
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_shared:
+ break;
+
+ case nir_intrinsic_control_barrier:
+ schedule_barrier(ctx);
+ emit_control_barrier(ctx);
+ schedule_barrier(ctx);
+ break;
+
default:
- printf ("Unhandled intrinsic\n");
+ fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name);
assert(0);
break;
}
{
/* TODO */
//assert (!instr->sampler);
- //assert (!instr->texture_array_size);
int texture_index = instr->texture_index;
int sampler_index = texture_index;
midgard_instruction ins = {
.type = TAG_TEXTURE_4,
.mask = 0xF,
- .dest = nir_dest_index(ctx, &instr->dest),
+ .dest = nir_dest_index(&instr->dest),
.src = { ~0, ~0, ~0, ~0 },
.swizzle = SWIZZLE_IDENTITY_4,
.texture = {
unsigned coord_mask = mask_of(instr->coord_components);
+ bool flip_zw = (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) && (coord_mask & (1 << COMPONENT_Z));
+
+ if (flip_zw)
+ coord_mask ^= ((1 << COMPONENT_Z) | (1 << COMPONENT_W));
+
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
/* texelFetch is undefined on samplerCube */
assert(midgard_texop != TEXTURE_OP_TEXEL_FETCH);
/* mov coord_temp, coords */
midgard_instruction mov = v_mov(index, coords);
mov.mask = coord_mask;
+
+ if (flip_zw)
+ mov.swizzle[1][COMPONENT_W] = COMPONENT_Z;
+
emit_mir_instruction(ctx, mov);
} else {
coords = index;
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) {
- /* Array component in w but NIR wants it in z */
+ /* Array component in w but NIR wants it in z,
+ * but if we have a temp coord we already fixed
+ * that up */
+
if (nr_components == 3) {
ins.swizzle[1][2] = COMPONENT_Z;
- ins.swizzle[1][3] = COMPONENT_Z;
+ ins.swizzle[1][3] = needs_temp_coord ? COMPONENT_W : COMPONENT_Z;
} else if (nr_components == 2) {
ins.swizzle[1][2] =
instr->is_shadow ? COMPONENT_Z : COMPONENT_X;
break;
}
- default:
- unreachable("Unknown texture source type\n");
+ default: {
+ fprintf(stderr, "Unknown texture source type: %d\n", instr->src[i].src_type);
+ assert(0);
+ }
}
}
emit_texop_native(ctx, instr, TEXTURE_OP_TEXEL_FETCH);
break;
case nir_texop_txs:
- emit_sysval_read(ctx, &instr->instr, ~0, 4);
+ emit_sysval_read(ctx, &instr->instr, 4, 0);
break;
- default:
- unreachable("Unhanlded texture op");
+ default: {
+ fprintf(stderr, "Unhandled texture op: %d\n", instr->op);
+ assert(0);
+ }
}
}
/* Scale constant appropriately, if we can legally */
uint16_t scaled_constant = 0;
- if (midgard_is_integer_op(op) || is_16) {
- unsigned int *iconstants = (unsigned int *) ins->constants;
- scaled_constant = (uint16_t) iconstants[component];
+ if (is_16) {
+ scaled_constant = ins->constants.u16[component];
+ } else if (midgard_is_integer_op(op)) {
+ scaled_constant = ins->constants.u32[component];
/* Constant overflow after resize */
- if (scaled_constant != iconstants[component])
+ if (scaled_constant != ins->constants.u32[component])
continue;
} else {
- float *f = (float *) ins->constants;
- float original = f[component];
+ float original = ins->constants.f32[component];
scaled_constant = _mesa_float_to_half(original);
/* Check for loss of precision. If this is
/* Make sure that the constant is not itself a vector
* by checking if all accessed values are the same. */
- uint32_t *cons = ins->constants;
- uint32_t value = cons[component];
+ const midgard_constants *cons = &ins->constants;
+ uint32_t value = is_16 ? cons->u16[component] : cons->u32[component];
bool is_vector = false;
unsigned mask = effective_writemask(&ins->alu, ins->mask);
if (!(mask & (1 << c)))
continue;
- uint32_t test = cons[ins->swizzle[1][c]];
+ uint32_t test = is_16 ?
+ cons->u16[ins->swizzle[1][c]] :
+ cons->u32[ins->swizzle[1][c]];
if (test != value) {
is_vector = true;
return progress;
}
-static void
+static unsigned
emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
{
- /* Include a move to specify the render target */
-
- if (rt > 0) {
- midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
- SSA_FIXED_REGISTER(1));
- rt_move.mask = 1 << COMPONENT_Z;
- rt_move.unit = UNIT_SADD;
- emit_mir_instruction(ctx, rt_move);
- }
-
/* Loop to ourselves */
-
+ midgard_instruction *br = ctx->writeout_branch[rt];
struct midgard_instruction ins = v_branch(false, false);
ins.writeout = true;
+ ins.writeout_depth = br->writeout_depth;
+ ins.writeout_stencil = br->writeout_stencil;
ins.branch.target_block = ctx->block_count - 1;
+ ins.constants.u32[0] = br->constants.u32[0];
emit_mir_instruction(ctx, ins);
ctx->current_block->epilogue = true;
schedule_barrier(ctx);
+ return ins.branch.target_block;
}
static midgard_block *
if (!this_block)
this_block = create_empty_block(ctx);
- list_addtail(&this_block->link, &ctx->blocks);
+ list_addtail(&this_block->base.link, &ctx->blocks);
- this_block->is_scheduled = false;
+ this_block->scheduled = false;
++ctx->block_count;
/* Set up current block */
- list_inithead(&this_block->instructions);
+ list_inithead(&this_block->base.instructions);
ctx->current_block = this_block;
nir_foreach_instr(instr, block) {
ctx->after_block = create_empty_block(ctx);
- midgard_block_add_successor(before_block, then_block);
- midgard_block_add_successor(before_block, else_block);
+ pan_block_add_successor(&before_block->base, &then_block->base);
+ pan_block_add_successor(&before_block->base, &else_block->base);
- midgard_block_add_successor(end_then_block, ctx->after_block);
- midgard_block_add_successor(end_else_block, ctx->after_block);
+ pan_block_add_successor(&end_then_block->base, &ctx->after_block->base);
+ pan_block_add_successor(&end_else_block->base, &ctx->after_block->base);
}
static void
emit_mir_instruction(ctx, br_back);
/* Mark down that branch in the graph. */
- midgard_block_add_successor(start_block, loop_block);
- midgard_block_add_successor(ctx->current_block, loop_block);
+ pan_block_add_successor(&start_block->base, &loop_block->base);
+ pan_block_add_successor(&ctx->current_block->base, &loop_block->base);
/* Find the index of the block about to follow us (note: we don't add
* one; blocks are 0-indexed so we get a fencepost problem) */
* now that we can allocate a block number for them */
ctx->after_block = create_empty_block(ctx);
- list_for_each_entry_from(struct midgard_block, block, start_block, &ctx->blocks, link) {
- mir_foreach_instr_in_block(block, ins) {
+ mir_foreach_block_from(ctx, start_block, _block) {
+ mir_foreach_instr_in_block(((midgard_block *) _block), ins) {
if (ins->type != TAG_ALU_4) continue;
if (!ins->compact_branch) continue;
ins->branch.target_type = TARGET_GOTO;
ins->branch.target_block = break_block_idx;
- midgard_block_add_successor(block, ctx->after_block);
+ pan_block_add_successor(_block, &ctx->after_block->base);
}
}
{
midgard_block *initial_block = mir_get_block(ctx, block_idx);
- unsigned first_tag = 0;
-
- mir_foreach_block_from(ctx, initial_block, v) {
+ mir_foreach_block_from(ctx, initial_block, _v) {
+ midgard_block *v = (midgard_block *) _v;
if (v->quadword_count) {
midgard_bundle *initial_bundle =
util_dynarray_element(&v->bundles, midgard_bundle, 0);
- first_tag = initial_bundle->tag;
- break;
+ return initial_bundle->tag;
}
}
- return first_tag;
+ /* Default to a tag 1 which will break from the shader, in case we jump
+ * to the exit block (i.e. `return` in a compute shader) */
+
+ return 1;
}
static unsigned
MALI_NR_CHANNELS(4);
}
+/* For each fragment writeout instruction, generate a writeout loop to
+ * associate with it */
+
+static void
+mir_add_writeout_loops(compiler_context *ctx)
+{
+ for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+ midgard_instruction *br = ctx->writeout_branch[rt];
+ if (!br) continue;
+
+ unsigned popped = br->branch.target_block;
+ pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), &ctx->current_block->base);
+ br->branch.target_block = emit_fragment_epilogue(ctx, rt);
+
+ /* If we have more RTs, we'll need to restore back after our
+ * loop terminates */
+
+ if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
+ midgard_instruction uncond = v_branch(false, false);
+ uncond.branch.target_block = popped;
+ emit_mir_instruction(ctx, uncond);
+ pan_block_add_successor(&ctx->current_block->base, &(mir_get_block(ctx, popped)->base));
+ schedule_barrier(ctx);
+ } else {
+ /* We're last, so we can terminate here */
+ br->last_writeout = true;
+ }
+ }
+}
+
int
-midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
+midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
{
struct util_dynarray *compiled = &program->compiled;
ctx->stage = nir->info.stage;
ctx->is_blend = is_blend;
ctx->alpha_ref = program->alpha_ref;
- ctx->blend_rt = blend_rt;
+ ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
ctx->quirks = midgard_get_quirks(gpu_id);
/* Start off with a safe cutoff, allowing usage of all 16 work
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
- ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
/* Record the varying mapping for the command stream's bookkeeping */
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+ NIR_PASS_V(nir, nir_lower_ssbo);
+ NIR_PASS_V(nir, midgard_nir_lower_zs_store);
/* Optimisation passes */
/* Assign sysvals and counts, now that we're sure
* (post-optimisation) */
- midgard_nir_assign_sysvals(ctx, nir);
+ panfrost_nir_assign_sysvals(&ctx->sysvals, nir);
program->uniform_count = nir->num_uniforms;
- program->sysval_count = ctx->sysval_count;
- memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
+ program->sysval_count = ctx->sysvals.sysval_count;
+ memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
nir_foreach_function(func, nir) {
if (!func->impl)
/* Per-block lowering before opts */
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
inline_alu_constants(ctx, block);
midgard_opt_promote_fmov(ctx, block);
embedded_to_inline_constant(ctx, block);
do {
progress = false;
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
progress |= midgard_opt_pos_propagate(ctx, block);
progress |= midgard_opt_copy_prop(ctx, block);
progress |= midgard_opt_dead_code_eliminate(ctx, block);
}
} while (progress);
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
midgard_lower_invert(ctx, block);
midgard_lower_derivatives(ctx, block);
}
/* Nested control-flow can result in dead branches at the end of the
* block. This messes with our analysis and is just dead code, so cull
* them */
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
midgard_opt_cull_dead_branch(ctx, block);
}
assert(!ins->invert);
}
+ if (ctx->stage == MESA_SHADER_FRAGMENT)
+ mir_add_writeout_loops(ctx);
+
/* Schedule! */
- schedule_program(ctx);
+ midgard_schedule_program(ctx);
mir_ra(ctx);
/* Now that all the bundles are scheduled and we can calculate block
int br_block_idx = 0;
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) {
for (int c = 0; c < bundle->instruction_count; ++c) {
midgard_instruction *ins = bundle->instructions[c];
/* Cache _all_ bundles in source order for lookahead across failed branches */
int bundle_count = 0;
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
bundle_count += block->bundles.size / sizeof(midgard_bundle);
}
midgard_bundle **source_order_bundles = malloc(sizeof(midgard_bundle *) * bundle_count);
int bundle_idx = 0;
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) {
source_order_bundles[bundle_idx++] = bundle;
}
/* Midgard prefetches instruction types, so during emission we
* need to lookahead. Unless this is the last instruction, in
- * which we return 1. Or if this is the second to last and the
- * last is an ALU, then it's also 1... */
+ * which we return 1. */
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
mir_foreach_bundle_in_block(block, bundle) {
int lookahead = 1;
- if (current_bundle + 1 < bundle_count) {
- uint8_t next = source_order_bundles[current_bundle + 1]->tag;
-
- if (!(current_bundle + 2 < bundle_count) && IS_ALU(next)) {
- lookahead = 1;
- } else {
- lookahead = next;
- }
- }
+ if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+ lookahead = source_order_bundles[current_bundle + 1]->tag;
emit_binary_bundle(ctx, bundle, compiled, lookahead);
++current_bundle;
program->tls_size = ctx->tls_size;
if (midgard_debug & MIDGARD_DBG_SHADERS)
- disassemble_midgard(program->compiled.data, program->compiled.size, gpu_id, ctx->stage);
+ disassemble_midgard(stdout, program->compiled.data, program->compiled.size, gpu_id, ctx->stage);
if (midgard_debug & MIDGARD_DBG_SHADERDB || shaderdb) {
unsigned nr_bundles = 0, nr_ins = 0;
/* Count instructions and bundles */
- mir_foreach_block(ctx, block) {
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
nr_bundles += util_dynarray_num_elements(
&block->bundles, midgard_bundle);