M_STORE(st_int4, nir_type_uint32);
M_LOAD(ld_color_buffer_32u, nir_type_uint32);
M_LOAD(ld_color_buffer_as_fp16, nir_type_float16);
+M_LOAD(ld_color_buffer_as_fp32, nir_type_float32);
M_STORE(st_vary_32, nir_type_uint32);
M_LOAD(ld_cubemap_coords, nir_type_uint32);
M_LOAD(ld_compute_id, nir_type_uint32);
var->data.location < FRAG_RESULT_DATA0)
continue;
+ if (var->data.index)
+ continue;
+
assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
nir_builder b;
return progress;
}
+/* Real writeout stores, which break execution, need to be moved to after
+ * dual-source stores, which are just standard register writes. */
+static bool
+midgard_nir_reorder_writeout(nir_shader *nir)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, nir) {
+ if (!function->impl) continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_instr *last_writeout = NULL;
+
+ nir_foreach_instr_reverse_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ const nir_variable *var = search_var(&nir->outputs, nir_intrinsic_base(intr));
+
+ if (var->data.index) {
+ if (!last_writeout)
+ last_writeout = instr;
+ continue;
+ }
+
+ if (!last_writeout)
+ continue;
+
+ /* This is a real store, so move it to after dual-source stores */
+ exec_node_remove(&instr->node);
+ exec_node_insert_after(&last_writeout->node, &instr->node);
+
+ progress = true;
+ }
+ }
+ }
+
+ return progress;
+}
+
/* Flushes undefined values to zero */
static void
* fsat alone.
*/
- if (!is_int && !(opcode_props & OP_TYPE_CONVERT)) {
+ if (!midgard_is_integer_out_op(op)) {
bool fpos = mir_accept_dest_mod(ctx, &dest, nir_op_fclamp_pos);
bool fsat = mir_accept_dest_mod(ctx, &dest, nir_op_fsat);
bool ssat = mir_accept_dest_mod(ctx, &dest, nir_op_fsat_signed);
emit_attr_read(ctx, reg, vertex_builtin_arg(instr->intrinsic), 1, nir_type_int);
}
+static void
+emit_msaa_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+ unsigned reg = nir_dest_index(&instr->dest);
+
+ midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
+ ld.load_store.op = midgard_op_ld_color_buffer_32u_old;
+ ld.load_store.address = 97;
+ ld.load_store.arg_2 = 0x1E;
+
+ for (int i = 0; i < 4; ++i)
+ ld.swizzle[0][i] = COMPONENT_X;
+
+ emit_mir_instruction(ctx, ld);
+}
+
static void
emit_control_barrier(compiler_context *ctx)
{
if (loc == FRAG_RESULT_STENCIL)
return 0x1E;
- assert(0);
+ unreachable("Invalid RT to load from");
}
static void
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t | nir_dest_bit_size(instr->dest), is_flat);
} else if (ctx->is_blend) {
- /* ctx->blend_input will be precoloured to r0, where
+ /* ctx->blend_input will be precoloured to r0/r2, where
* the input is preloaded */
- if (ctx->blend_input == ~0)
- ctx->blend_input = reg;
+ unsigned *input = offset ? &ctx->blend_src1 : &ctx->blend_input;
+
+ if (*input == ~0)
+ *input = reg;
else
- emit_mir_instruction(ctx, v_mov(ctx->blend_input, reg));
+ emit_mir_instruction(ctx, v_mov(*input, reg));
} else if (ctx->stage == MESA_SHADER_VERTEX) {
emit_attr_read(ctx, reg, offset, nr_comp, t);
} else {
case nir_intrinsic_load_output: {
reg = nir_dest_index(&instr->dest);
- midgard_instruction ld = m_ld_color_buffer_as_fp16(reg, 0);
+ unsigned bits = nir_dest_bit_size(instr->dest);
+
+ midgard_instruction ld;
+ if (bits == 16)
+ ld = m_ld_color_buffer_as_fp16(reg, 0);
+ else
+ ld = m_ld_color_buffer_as_fp32(reg, 0);
ld.load_store.arg_2 = output_load_rt_addr(ctx->nir, instr);
ld.swizzle[0][c] = 0;
if (ctx->quirks & MIDGARD_OLD_BLEND) {
- ld.load_store.op = midgard_op_ld_color_buffer_as_fp16_old;
+ if (bits == 16)
+ ld.load_store.op = midgard_op_ld_color_buffer_as_fp16_old;
+ else
+ ld.load_store.op = midgard_op_ld_color_buffer_as_fp32_old;
ld.load_store.address = 1;
ld.load_store.arg_2 = 0x1E;
}
nir_intrinsic_store_combined_output_pan;
const nir_variable *var;
- enum midgard_rt_id rt;
-
var = search_var(&ctx->nir->outputs,
nir_intrinsic_base(instr));
assert(var);
+
+ /* Dual-source blend writeout is done by leaving the
+ * value in r2 for the blend shader to use. */
+ if (var->data.index) {
+ if (instr->src[0].is_ssa) {
+ emit_explicit_constant(ctx, reg, reg);
+
+ unsigned out = make_compiler_temp(ctx);
+
+ midgard_instruction ins = v_mov(reg, out);
+ emit_mir_instruction(ctx, ins);
+
+ ctx->blend_src1 = out;
+ } else {
+ ctx->blend_src1 = reg;
+ }
+
+ break;
+ }
+
+ enum midgard_rt_id rt;
if (var->data.location == FRAG_RESULT_COLOR)
rt = MIDGARD_COLOR_RT0;
else if (var->data.location >= FRAG_RESULT_DATA0)
emit_vertex_builtin(ctx, instr);
break;
+ case nir_intrinsic_load_sample_id:
+ emit_msaa_builtin(ctx, instr);
+ break;
+
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_shared:
break;
ctx->alpha_ref = program->alpha_ref;
ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
ctx->blend_input = ~0;
+ ctx->blend_src1 = ~0;
ctx->quirks = midgard_get_quirks(gpu_id);
/* Start off with a safe cutoff, allowing usage of all 16 work
optimise_nir(nir, ctx->quirks, is_blend);
+ NIR_PASS_V(nir, midgard_nir_reorder_writeout);
+
if (midgard_debug & MIDGARD_DBG_SHADERS) {
nir_print_shader(nir, stdout);
}