do { if (midgard_debug & MIDGARD_DBG_MSGS) \
fprintf(stderr, "%s:%d: "fmt, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
-
-static bool
-midgard_is_branch_unit(unsigned unit)
-{
- return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
-}
-
static midgard_block *
create_empty_block(compiler_context *ctx)
{
M_LOAD(ld_ubo_int4);
M_LOAD(ld_int4);
M_STORE(st_int4);
-M_LOAD(ld_color_buffer_8);
+M_LOAD(ld_color_buffer_32u);
//M_STORE(st_vary_16);
M_STORE(st_vary_32);
M_LOAD(ld_cubemap_coords);
M_LOAD(ld_compute_id);
+M_LOAD(pack_colour);
static midgard_instruction
v_branch(bool conditional, bool invert)
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
* optimized) versions of UBO #0 */
-midgard_instruction *
+static midgard_instruction *
emit_ubo_read(
compiler_context *ctx,
nir_instr *instr,
unsigned dest,
unsigned offset,
nir_src *indirect_offset,
+ unsigned indirect_shift,
unsigned index)
{
/* TODO: half-floats */
if (indirect_offset) {
ins.src[2] = nir_src_index(ctx, indirect_offset);
- ins.load_store.arg_2 = 0x80;
+ ins.load_store.arg_2 = (indirect_shift << 5);
} else {
ins.load_store.arg_2 = 0x1E;
}
/* Emit the read itself -- this is never indirect */
midgard_instruction *ins =
- emit_ubo_read(ctx, instr, dest, uniform * 16, NULL, 0);
+ emit_ubo_read(ctx, instr, dest, uniform * 16, NULL, 0, 0);
ins->mask = mask_of(nr_components);
}
}
}
-/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
- * Handle MRT here */
-static void
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
-
static void
emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
{
/* Emit the branch */
midgard_instruction *br = emit_mir_instruction(ctx, ins);
schedule_barrier(ctx);
- br->branch.target_block = ctx->block_count - 1;
- emit_fragment_epilogue(ctx, rt);
+ assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+ assert(!ctx->writeout_branch[rt]);
+ ctx->writeout_branch[rt] = br;
+
+ /* Push our current location = current block count - 1 = where we'll
+ * jump to. Maybe a bit too clever for my own good */
+
+ br->branch.target_block = ctx->block_count - 1;
}
static void
reg = nir_dest_index(ctx, &instr->dest);
if (is_uniform && !ctx->is_blend) {
- emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysval_count + offset) * 16, indirect_offset, 0);
+ emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysval_count + offset) * 16, indirect_offset, 4, 0);
} else if (is_ubo) {
nir_src index = instr->src[0];
- /* We don't yet support indirect UBOs. For indirect
- * block numbers (if that's possible), we don't know
- * enough about the hardware yet. For indirect sources,
- * we know what we need but we need to add some NIR
- * support for lowering correctly with respect to
- * 128-bit reads */
-
+ /* TODO: Is indirect block number possible? */
assert(nir_src_is_const(index));
- assert(nir_src_is_const(*src_offset));
uint32_t uindex = nir_src_as_uint(index) + 1;
- emit_ubo_read(ctx, &instr->instr, reg, offset, NULL, uindex);
+ emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
} else if (is_ssbo) {
nir_src index = instr->src[0];
assert(nir_src_is_const(index));
/* T720 and below use different blend opcodes with slightly
* different semantics than T760 and up */
- midgard_instruction ld = m_ld_color_buffer_8(reg, 0);
+ midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
bool old_blend = ctx->quirks & MIDGARD_OLD_BLEND;
if (instr->intrinsic == nir_intrinsic_load_output_u8_as_fp16_pan) {
reg = nir_src_index(ctx, &instr->src[0]);
if (ctx->stage == MESA_SHADER_FRAGMENT) {
- /* Determine number of render targets */
emit_fragment_store(ctx, reg, offset);
} else if (ctx->stage == MESA_SHADER_VERTEX) {
/* We should have been vectorized, though we don't
emit_explicit_constant(ctx, reg, reg);
- unsigned component = nir_intrinsic_component(instr);
+ unsigned dst_component = nir_intrinsic_component(instr);
unsigned nr_comp = nir_src_num_components(instr->src[0]);
midgard_instruction st = m_st_vary_32(reg, offset);
break;
}
- for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle[0]); ++i)
- st.swizzle[0][i] = MIN2(i + component, nr_comp);
+ /* nir_intrinsic_component(store_intr) encodes the
+ * destination component start. Source component offset
+ * adjustment is taken care of in
+ * install_registers_instr(), when offset_swizzle() is
+ * called.
+ */
+ unsigned src_component = COMPONENT_X;
+
+ assert(nr_comp > 0);
+ for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle); ++i) {
+ st.swizzle[0][i] = src_component;
+ if (i >= dst_component && i < dst_component + nr_comp - 1)
+ src_component++;
+ }
emit_mir_instruction(ctx, st);
} else {
unsigned coord_mask = mask_of(instr->coord_components);
+ bool flip_zw = (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) && (coord_mask & (1 << COMPONENT_Z));
+
+ if (flip_zw)
+ coord_mask ^= ((1 << COMPONENT_Z) | (1 << COMPONENT_W));
+
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
/* texelFetch is undefined on samplerCube */
assert(midgard_texop != TEXTURE_OP_TEXEL_FETCH);
/* mov coord_temp, coords */
midgard_instruction mov = v_mov(index, coords);
mov.mask = coord_mask;
+
+ if (flip_zw)
+ mov.swizzle[1][COMPONENT_W] = COMPONENT_Z;
+
emit_mir_instruction(ctx, mov);
} else {
coords = index;
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) {
- /* Array component in w but NIR wants it in z */
+ /* Array component in w but NIR wants it in z,
+ * but if we have a temp coord we already fixed
+ * that up */
+
if (nr_components == 3) {
ins.swizzle[1][2] = COMPONENT_Z;
- ins.swizzle[1][3] = COMPONENT_Z;
+ ins.swizzle[1][3] = needs_temp_coord ? COMPONENT_W : COMPONENT_Z;
} else if (nr_components == 2) {
ins.swizzle[1][2] =
instr->is_shadow ? COMPONENT_Z : COMPONENT_X;
return progress;
}
-static void
+static unsigned
emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
{
- /* Include a move to specify the render target */
-
- if (rt > 0) {
- midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
- SSA_FIXED_REGISTER(1));
- rt_move.mask = 1 << COMPONENT_Z;
- rt_move.unit = UNIT_SADD;
- emit_mir_instruction(ctx, rt_move);
- }
-
/* Loop to ourselves */
struct midgard_instruction ins = v_branch(false, false);
ins.writeout = true;
ins.branch.target_block = ctx->block_count - 1;
+ ins.constants[0] = rt * 0x100;
emit_mir_instruction(ctx, ins);
ctx->current_block->epilogue = true;
schedule_barrier(ctx);
+ return ins.branch.target_block;
}
static midgard_block *
MALI_NR_CHANNELS(4);
}
+/* For each fragment writeout instruction, generate a writeout loop to
+ * associate with it */
+
+static void
+mir_add_writeout_loops(compiler_context *ctx)
+{
+ for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+ midgard_instruction *br = ctx->writeout_branch[rt];
+ if (!br) continue;
+
+ unsigned popped = br->branch.target_block;
+ midgard_block_add_successor(mir_get_block(ctx, popped - 1), ctx->current_block);
+ br->branch.target_block = emit_fragment_epilogue(ctx, rt);
+
+ /* If we have more RTs, we'll need to restore back after our
+ * loop terminates */
+
+ if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
+ midgard_instruction uncond = v_branch(false, false);
+ uncond.branch.target_block = popped;
+ emit_mir_instruction(ctx, uncond);
+ midgard_block_add_successor(ctx->current_block, mir_get_block(ctx, popped));
+ schedule_barrier(ctx);
+ } else {
+ /* We're last, so we can terminate here */
+ br->last_writeout = true;
+ }
+ }
+}
+
int
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
{
progress |= midgard_opt_fuse_dest_invert(ctx, block);
progress |= midgard_opt_csel_invert(ctx, block);
progress |= midgard_opt_drop_cmp_invert(ctx, block);
+ progress |= midgard_opt_invert_branch(ctx, block);
}
} while (progress);
assert(!ins->invert);
}
+ if (ctx->stage == MESA_SHADER_FRAGMENT)
+ mir_add_writeout_loops(ctx);
+
/* Schedule! */
- schedule_program(ctx);
+ midgard_schedule_program(ctx);
mir_ra(ctx);
/* Now that all the bundles are scheduled and we can calculate block
/* Midgard prefetches instruction types, so during emission we
* need to lookahead. Unless this is the last instruction, in
- * which we return 1. Or if this is the second to last and the
- * last is an ALU, then it's also 1... */
+ * which we return 1. */
mir_foreach_block(ctx, block) {
mir_foreach_bundle_in_block(block, bundle) {
int lookahead = 1;
- if (current_bundle + 1 < bundle_count) {
- uint8_t next = source_order_bundles[current_bundle + 1]->tag;
-
- if (!(current_bundle + 2 < bundle_count) && IS_ALU(next)) {
- lookahead = 1;
- } else {
- lookahead = next;
- }
- }
+ if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+ lookahead = source_order_bundles[current_bundle + 1]->tag;
emit_binary_bundle(ctx, bundle, compiled, lookahead);
++current_bundle;