X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fpanfrost%2Fmidgard%2Fmidgard_compile.c;h=67c5c8482144aa0c767538caafff6ebd38c98270;hb=e3a418fe866aea61b149db728f85c30f218adcd2;hp=46f0725741bd5870e550e560645c687ca69c6dbb;hpb=9beb3391b5500b17c860356facab9fcff032693b;p=mesa.git diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 46f0725741b..67c5c848214 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -78,8 +78,20 @@ midgard_is_branch_unit(unsigned unit) static void midgard_block_add_successor(midgard_block *block, midgard_block *successor) { + assert(block); + assert(successor); + + /* Deduplicate */ + for (unsigned i = 0; i < block->nr_successors; ++i) { + if (block->successors[i] == successor) + return; + } + block->successors[block->nr_successors++] = successor; assert(block->nr_successors <= ARRAY_SIZE(block->successors)); + + /* Note the predecessor in the other direction */ + _mesa_set_add(successor->predecessors, block); } /* Helpers to generate midgard_instruction's using macro magic, since every @@ -87,15 +99,14 @@ midgard_block_add_successor(midgard_block *block, midgard_block *successor) #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__)); -#define M_LOAD_STORE(name, rname, uname) \ +#define M_LOAD_STORE(name, store) \ static midgard_instruction m_##name(unsigned ssa, unsigned address) { \ midgard_instruction i = { \ .type = TAG_LOAD_STORE_4, \ .mask = 0xF, \ .ssa_args = { \ - .rname = ssa, \ - .uname = -1, \ - .src1 = -1 \ + .dest = -1, \ + .src = { -1, -1, -1 }, \ }, \ .load_store = { \ .op = midgard_op_##name, \ @@ -103,12 +114,17 @@ midgard_block_add_successor(midgard_block *block, midgard_block *successor) .address = address \ } \ }; \ + \ + if (store) \ + i.ssa_args.src[0] = ssa; \ + else \ + i.ssa_args.dest = ssa; \ \ return i; \ } -#define M_LOAD(name) M_LOAD_STORE(name, dest, src0) -#define M_STORE(name) M_LOAD_STORE(name, src0, dest) +#define M_LOAD(name) M_LOAD_STORE(name, false) +#define M_STORE(name) M_LOAD_STORE(name, true) /* Inputs a NIR ALU source, with modifiers attached if necessary, and outputs * the corresponding Midgard source */ @@ -168,12 +184,14 @@ vector_alu_modifiers(nir_alu_src *src, bool is_int, unsigned broadcast_count, M_LOAD(ld_attr_32); //M_LOAD(ld_vary_16); M_LOAD(ld_vary_32); -//M_LOAD(ld_uniform_16); -M_LOAD(ld_uniform_32); +M_LOAD(ld_ubo_int4); +M_LOAD(ld_int4); +M_STORE(st_int4); M_LOAD(ld_color_buffer_8); //M_STORE(st_vary_16); M_STORE(st_vary_32); M_LOAD(st_cubemap_coords); +M_LOAD(ld_compute_id); static midgard_instruction v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, unsigned cond) @@ -193,7 +211,11 @@ v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, u .unit = ALU_ENAB_BR_COMPACT, .prepacked_branch = true, .compact_branch = true, - .br_compact = compact + .br_compact = compact, + .ssa_args = { + .dest = -1, + .src = { -1, -1, -1 }, + } }; if (op == midgard_jmp_writeout_op_writeout) @@ -212,6 +234,10 @@ v_branch(bool conditional, bool invert) .branch = { .conditional = conditional, .invert_conditional = invert + }, + .ssa_args = { + .dest = -1, + .src = { -1, -1, -1 }, } }; @@ -224,7 +250,11 @@ midgard_create_branch_extended( midgard_condition cond, unsigned dest_tag, signed quadword_offset) { - /* For unclear reasons, the condition code is repeated 8 times */ + /* The condition code is actually a LUT describing a function to + * combine multiple condition codes. However, we only support a single + * condition code at the moment, so we just duplicate over a bunch of + * times. */ + uint16_t duplicated_cond = (cond << 14) | (cond << 12) | @@ -280,6 +310,20 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu) nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum)); } +static int +midgard_sysval_for_ssbo(nir_intrinsic_instr *instr) +{ + /* This is way too meta */ + bool is_store = instr->intrinsic == nir_intrinsic_store_ssbo; + unsigned idx_idx = is_store ? 1 : 0; + + nir_src index = instr->src[idx_idx]; + assert(nir_src_is_const(index)); + uint32_t uindex = nir_src_as_uint(index); + + return PAN_SYSVAL(SSBO, uindex); +} + static int midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) { @@ -288,28 +332,16 @@ midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) return PAN_SYSVAL_VIEWPORT_SCALE; case nir_intrinsic_load_viewport_offset: return PAN_SYSVAL_VIEWPORT_OFFSET; + case nir_intrinsic_load_num_work_groups: + return PAN_SYSVAL_NUM_WORK_GROUPS; + case nir_intrinsic_load_ssbo: + case nir_intrinsic_store_ssbo: + return midgard_sysval_for_ssbo(instr); default: return -1; } } -static unsigned -nir_dest_index(compiler_context *ctx, nir_dest *dst) -{ - if (dst->is_ssa) - return (dst->ssa.index << 1) | 0; - else { - assert(!dst->reg.indirect); - return (dst->reg.reg->index << 1) | IS_REG; - } -} - -static unsigned -make_compiler_temp(compiler_context *ctx) -{ - return ctx->func->impl->ssa_alloc + ctx->func->impl->reg_alloc + ctx->temp_alloc++; -} - static int sysval_for_instr(compiler_context *ctx, nir_instr *instr, unsigned *dest) { @@ -318,11 +350,14 @@ static int sysval_for_instr(compiler_context *ctx, nir_instr *instr, nir_tex_instr *tex; int sysval = -1; + bool is_store = false; + switch (instr->type) { case nir_instr_type_intrinsic: intr = nir_instr_as_intrinsic(instr); sysval = midgard_nir_sysval_for_intrinsic(intr); dst = &intr->dest; + is_store |= intr->intrinsic == nir_intrinsic_store_ssbo; break; case nir_instr_type_tex: tex = nir_instr_as_tex(instr); @@ -340,7 +375,7 @@ static int sysval_for_instr(compiler_context *ctx, nir_instr *instr, break; } - if (dest && dst) + if (dest && dst && !is_store) *dest = nir_dest_index(ctx, dst); return sysval; @@ -428,17 +463,12 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, midgard_nir_lower_fdot2); NIR_PASS(progress, nir, nir_lower_idiv); - nir_lower_tex_options lower_tex_1st_pass_options = { - .lower_rect = true, - .lower_txp = ~0 - }; - - nir_lower_tex_options lower_tex_2nd_pass_options = { + nir_lower_tex_options lower_tex_options = { .lower_txs_lod = true, + .lower_txp = ~0 }; - NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_1st_pass_options); - NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_2nd_pass_options); + NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); do { progress = false; @@ -553,23 +583,6 @@ emit_explicit_constant(compiler_context *ctx, unsigned node, unsigned to) } } -static unsigned -nir_src_index(compiler_context *ctx, nir_src *src) -{ - if (src->is_ssa) - return (src->ssa->index << 1) | 0; - else { - assert(!src->reg.indirect); - return (src->reg.reg->index << 1) | IS_REG; - } -} - -static unsigned -nir_alu_src_index(compiler_context *ctx, nir_alu_src *src) -{ - return nir_src_index(ctx, &src->src); -} - static bool nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) { @@ -609,8 +622,7 @@ emit_condition(compiler_context *ctx, nir_src *src, bool for_branch, unsigned co .mask = 1 << COMPONENT_W, .ssa_args = { - .src0 = condition, - .src1 = condition, + .src = { condition, condition, -1 }, .dest = SSA_FIXED_REGISTER(31), }, @@ -649,8 +661,7 @@ emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp) .precede_break = true, .mask = mask_of(nr_comp), .ssa_args = { - .src0 = condition, - .src1 = condition, + .src = { condition, condition, -1 }, .dest = SSA_FIXED_REGISTER(31), }, .alu = { @@ -666,37 +677,6 @@ emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp) emit_mir_instruction(ctx, ins); } - - -/* Likewise, indirect offsets are put in r27.w. TODO: Allow componentwise - * pinning to eliminate this move in all known cases */ - -static void -emit_indirect_offset(compiler_context *ctx, nir_src *src) -{ - int offset = nir_src_index(ctx, src); - - midgard_instruction ins = { - .type = TAG_ALU_4, - .mask = 1 << COMPONENT_W, - .ssa_args = { - .src0 = SSA_UNUSED_1, - .src1 = offset, - .dest = SSA_FIXED_REGISTER(REGISTER_OFFSET), - }, - .alu = { - .op = midgard_alu_op_imov, - .outmod = midgard_outmod_int_wrap, - .reg_mode = midgard_reg_mode_32, - .dest_override = midgard_dest_override_none, - .src1 = vector_alu_srco_unsigned(zero_alu_src), - .src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx) - }, - }; - - emit_mir_instruction(ctx, ins); -} - #define ALU_CASE(nir, _op) \ case nir_op_##nir: \ op = midgard_alu_op_##_op; \ @@ -748,6 +728,14 @@ reg_mode_for_nir(nir_alu_instr *instr) static void emit_alu(compiler_context *ctx, nir_alu_instr *instr) { + /* Derivatives end up emitted on the texture pipe, not the ALUs. This + * is handled elsewhere */ + + if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) { + midgard_emit_derivatives(ctx, instr); + return; + } + bool is_ssa = instr->dest.dest.is_ssa; unsigned dest = nir_dest_index(ctx, &instr->dest.dest); @@ -859,8 +847,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(fsin, fsin); ALU_CASE(fcos, fcos); - /* Second op implicit #0 */ - ALU_CASE(inot, inor); + /* We'll set invert */ + ALU_CASE(inot, imov); ALU_CASE(iand, iand); ALU_CASE(ior, ior); ALU_CASE(ixor, ixor); @@ -1041,8 +1029,11 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) midgard_instruction ins = { .type = TAG_ALU_4, .ssa_args = { - .src0 = quirk_flipped_r24 ? SSA_UNUSED_1 : src0, - .src1 = quirk_flipped_r24 ? src0 : src1, + .src = { + quirk_flipped_r24 ? SSA_UNUSED_1 : src0, + quirk_flipped_r24 ? src0 : src1, + -1 + }, .dest = dest, } }; @@ -1100,7 +1091,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) * constants) */ ins.ssa_args.inline_constant = false; - ins.ssa_args.src1 = SSA_FIXED_REGISTER(REGISTER_CONSTANT); + ins.ssa_args.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); ins.has_constants = true; if (instr->op == nir_op_b2f32) { @@ -1115,14 +1106,12 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) } else if (nr_inputs == 1 && !quirk_flipped_r24) { /* Lots of instructions need a 0 plonked in */ ins.ssa_args.inline_constant = false; - ins.ssa_args.src1 = SSA_FIXED_REGISTER(REGISTER_CONSTANT); + ins.ssa_args.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); ins.has_constants = true; ins.constants[0] = 0.0f; ins.alu.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx); } else if (instr->op == nir_op_inot) { - /* ~b = ~(b & b), so duplicate the source */ - ins.ssa_args.src1 = ins.ssa_args.src0; - ins.alu.src2 = ins.alu.src1; + ins.invert = true; } if ((opcode_props & UNITS_ALL) == UNIT_VLUT) { @@ -1157,12 +1146,24 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) #undef ALU_CASE +static unsigned +mir_mask_for_intr(nir_instr *instr, bool is_read) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (is_read) + return mask_of(nir_intrinsic_dest_components(intr)); + else + return nir_intrinsic_write_mask(intr); +} + /* Uniforms and UBOs use a shared code path, as uniforms are just (slightly * optimized) versions of UBO #0 */ -void +midgard_instruction * emit_ubo_read( compiler_context *ctx, + nir_instr *instr, unsigned dest, unsigned offset, nir_src *indirect_offset, @@ -1170,19 +1171,94 @@ emit_ubo_read( { /* TODO: half-floats */ - midgard_instruction ins = m_ld_uniform_32(dest, offset); + midgard_instruction ins = m_ld_ubo_int4(dest, offset); + + assert((offset & 0xF) == 0); + offset /= 16; /* TODO: Don't split */ ins.load_store.varying_parameters = (offset & 7) << 7; ins.load_store.address = offset >> 3; + ins.mask = mir_mask_for_intr(instr, true); if (indirect_offset) { - emit_indirect_offset(ctx, indirect_offset); - ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */ + ins.ssa_args.src[1] = nir_src_index(ctx, indirect_offset); + ins.load_store.arg_2 = 0x80; } else { - ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */ + ins.load_store.arg_2 = 0x1E; } + ins.load_store.arg_1 = index; + + return emit_mir_instruction(ctx, ins); +} + +/* SSBO reads are like UBO reads if you squint */ + +static void +emit_ssbo_access( + compiler_context *ctx, + nir_instr *instr, + bool is_read, + unsigned srcdest, + unsigned offset, + nir_src *indirect_offset, + unsigned index) +{ + /* TODO: types */ + + midgard_instruction ins; + + if (is_read) + ins = m_ld_int4(srcdest, offset); + else + ins = m_st_int4(srcdest, offset); + + /* SSBO reads use a generic memory read interface, so we need the + * address of the SSBO as the first argument. This is a sysval. */ + + unsigned addr = make_compiler_temp(ctx); + emit_sysval_read(ctx, instr, addr, 2); + + /* The source array is a bit of a leaky abstraction for SSBOs. + * Nevertheless, for loads: + * + * src[0] = arg_1 + * src[1] = arg_2 + * src[2] = unused + * + * Whereas for stores: + * + * src[0] = value + * src[1] = arg_1 + * src[2] = arg_2 + * + * We would like arg_1 = the address and + * arg_2 = the offset. + */ + + ins.ssa_args.src[is_read ? 0 : 1] = addr; + + /* TODO: What is this? It looks superficially like a shift << 5, but + * arg_1 doesn't take a shift Should it be E0 or A0? */ + if (indirect_offset) + ins.load_store.arg_1 |= 0xE0; + + /* We also need to emit the indirect offset */ + + if (indirect_offset) + ins.ssa_args.src[is_read ? 1 : 2] = nir_src_index(ctx, indirect_offset); + else + ins.load_store.arg_2 = 0x7E; + + /* TODO: Bounds check */ + + /* Finally, we emit the direct offset */ + + ins.load_store.varying_parameters = (offset & 0x1FF) << 1; + ins.load_store.address = (offset >> 9); + ins.mask = mir_mask_for_intr(instr, is_read); + emit_mir_instruction(ctx, ins); } @@ -1210,14 +1286,12 @@ emit_varying_read( memcpy(&u, &p, sizeof(p)); ins.load_store.varying_parameters = u; - if (indirect_offset) { - /* We need to add in the dynamic index, moved to r27.w */ - emit_indirect_offset(ctx, indirect_offset); - ins.load_store.unknown = 0x79e; /* xxx: what is this? */ - } else { - /* Just a direct load */ - ins.load_store.unknown = 0x1e9e; /* xxx: what is this? */ - } + if (indirect_offset) + ins.ssa_args.src[1] = nir_src_index(ctx, indirect_offset); + else + ins.load_store.arg_2 = 0x1E; + + ins.load_store.arg_1 = 0x9E; /* Use the type appropriate load */ switch (type) { @@ -1239,8 +1313,9 @@ emit_varying_read( emit_mir_instruction(ctx, ins); } -static void -emit_sysval_read(compiler_context *ctx, nir_instr *instr) +void +emit_sysval_read(compiler_context *ctx, nir_instr *instr, signed dest_override, + unsigned nr_components) { unsigned dest = 0; @@ -1248,13 +1323,99 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr) int sysval = sysval_for_instr(ctx, instr, &dest); void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval); + if (dest_override >= 0) + dest = dest_override; + /* Sysvals are prefix uniforms */ unsigned uniform = ((uintptr_t) val) - 1; /* Emit the read itself -- this is never indirect */ - emit_ubo_read(ctx, dest, uniform, NULL, 0); + midgard_instruction *ins = + emit_ubo_read(ctx, instr, dest, uniform * 16, NULL, 0); + + ins->mask = mask_of(nr_components); +} + +static unsigned +compute_builtin_arg(nir_op op) +{ + switch (op) { + case nir_intrinsic_load_work_group_id: + return 0x14; + case nir_intrinsic_load_local_invocation_id: + return 0x10; + default: + unreachable("Invalid compute paramater loaded"); + } +} + +/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO: + * Handle MRT here */ + +static void +emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt) +{ + /* First, move in whatever we're outputting */ + midgard_instruction move = v_mov(src, blank_alu_src, SSA_FIXED_REGISTER(0)); + if (rt != 0) { + /* Force a tight schedule. TODO: Make the scheduler MRT aware */ + move.unit = UNIT_VMUL; + move.precede_break = true; + move.dont_eliminate = true; + } + + emit_mir_instruction(ctx, move); + + /* If we're doing MRT, we need to specify the render target */ + + midgard_instruction rt_move = { + .ssa_args = { + .dest = -1 + } + }; + + if (rt != 0) { + /* We'll write to r1.z */ + rt_move = v_mov(-1, blank_alu_src, SSA_FIXED_REGISTER(1)); + rt_move.mask = 1 << COMPONENT_Z; + rt_move.unit = UNIT_SADD; + + /* r1.z = (rt * 0x100) */ + rt_move.ssa_args.inline_constant = true; + rt_move.inline_constant = (rt * 0x100); + + /* r1 */ + ctx->work_registers = MAX2(ctx->work_registers, 1); + + /* Do the write */ + emit_mir_instruction(ctx, rt_move); + } + + /* Next, generate the branch. For R render targets in the writeout, the + * i'th render target jumps to pseudo-offset [2(R-1) + i] */ + + unsigned offset = (2 * (ctx->nir->num_outputs - 1)) + rt; + + struct midgard_instruction ins = + v_alu_br_compact_cond(midgard_jmp_writeout_op_writeout, TAG_ALU_4, offset, midgard_condition_always); + + /* Add dependencies */ + ins.ssa_args.src[0] = move.ssa_args.dest; + ins.ssa_args.src[1] = rt_move.ssa_args.dest; + + /* Emit the branch */ + emit_mir_instruction(ctx, ins); } +static void +emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) +{ + unsigned reg = nir_dest_index(ctx, &instr->dest); + midgard_instruction ins = m_ld_compute_id(reg, 0); + ins.mask = mask_of(3); + ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic); + emit_mir_instruction(ctx, ins); +} static void emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) { @@ -1276,17 +1437,19 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) case nir_intrinsic_load_uniform: case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: case nir_intrinsic_load_input: { bool is_uniform = instr->intrinsic == nir_intrinsic_load_uniform; bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo; + bool is_ssbo = instr->intrinsic == nir_intrinsic_load_ssbo; /* Get the base type of the intrinsic */ /* TODO: Infer type? Does it matter? */ nir_alu_type t = - is_ubo ? nir_type_uint : nir_intrinsic_type(instr); + (is_ubo || is_ssbo) ? nir_type_uint : nir_intrinsic_type(instr); t = nir_alu_type_get_base_type(t); - if (!is_ubo) { + if (!(is_ubo || is_ssbo)) { offset = nir_intrinsic_base(instr); } @@ -1295,6 +1458,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) nir_src *src_offset = nir_get_io_offset_src(instr); bool direct = nir_src_is_const(*src_offset); + nir_src *indirect_offset = direct ? NULL : src_offset; if (direct) offset += nir_src_as_uint(*src_offset); @@ -1305,7 +1469,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) reg = nir_dest_index(ctx, &instr->dest); if (is_uniform && !ctx->is_blend) { - emit_ubo_read(ctx, reg, ctx->sysval_count + offset, !direct ? &instr->src[0] : NULL, 0); + emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysval_count + offset) * 16, indirect_offset, 0); } else if (is_ubo) { nir_src index = instr->src[0]; @@ -1319,11 +1483,14 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) assert(nir_src_is_const(index)); assert(nir_src_is_const(*src_offset)); - /* TODO: Alignment */ - assert((offset & 0xF) == 0); - uint32_t uindex = nir_src_as_uint(index) + 1; - emit_ubo_read(ctx, reg, offset / 16, NULL, uindex); + emit_ubo_read(ctx, &instr->instr, reg, offset, NULL, uindex); + } else if (is_ssbo) { + nir_src index = instr->src[0]; + assert(nir_src_is_const(index)); + uint32_t uindex = nir_src_as_uint(index); + + emit_ssbo_access(ctx, &instr->instr, true, reg, offset, indirect_offset, uindex); } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) { emit_varying_read(ctx, reg, offset, nr_comp, component, !direct ? &instr->src[0] : NULL, t); } else if (ctx->is_blend) { @@ -1334,7 +1501,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) emit_mir_instruction(ctx, move); } else if (ctx->stage == MESA_SHADER_VERTEX) { midgard_instruction ins = m_ld_attr_32(reg, offset); - ins.load_store.unknown = 0x1E1E; /* XXX: What is this? */ + ins.load_store.arg_1 = 0x1E; + ins.load_store.arg_2 = 0x1E; ins.mask = mask_of(nr_comp); /* Use the type appropriate load */ @@ -1395,19 +1563,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) reg = nir_src_index(ctx, &instr->src[0]); if (ctx->stage == MESA_SHADER_FRAGMENT) { - /* gl_FragColor is not emitted with load/store - * instructions. Instead, it gets plonked into - * r0 at the end of the shader and we do the - * framebuffer writeout dance. TODO: Defer - * writes */ - - midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0)); - emit_mir_instruction(ctx, move); - - /* Save the index we're writing to for later reference - * in the epilogue */ - - ctx->fragment_output = reg; + /* Determine number of render targets */ + emit_fragment_store(ctx, reg, offset); } else if (ctx->stage == MESA_SHADER_VERTEX) { /* We should have been vectorized, though we don't * currently check that st_vary is emitted only once @@ -1422,7 +1579,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) unsigned component = nir_intrinsic_component(instr); midgard_instruction st = m_st_vary_32(reg, offset); - st.load_store.unknown = 0x1E9E; /* XXX: What is this? */ + st.load_store.arg_1 = 0x9E; + st.load_store.arg_2 = 0x1E; st.load_store.swizzle = SWIZZLE_XYZW << (2*component); emit_mir_instruction(ctx, st); } else { @@ -1436,11 +1594,22 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) case nir_intrinsic_store_raw_output_pan: assert (ctx->stage == MESA_SHADER_FRAGMENT); reg = nir_src_index(ctx, &instr->src[0]); + emit_fragment_store(ctx, reg, 0); + + break; + + case nir_intrinsic_store_ssbo: + assert(nir_src_is_const(instr->src[1])); - midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0)); - emit_mir_instruction(ctx, move); - ctx->fragment_output = reg; + bool direct_offset = nir_src_is_const(instr->src[2]); + offset = direct_offset ? nir_src_as_uint(instr->src[2]) : 0; + nir_src *indirect_offset = direct_offset ? NULL : &instr->src[2]; + reg = nir_src_index(ctx, &instr->src[0]); + uint32_t uindex = nir_src_as_uint(instr->src[1]); + + emit_explicit_constant(ctx, reg, reg); + emit_ssbo_access(ctx, &instr->instr, false, reg, offset, indirect_offset, uindex); break; case nir_intrinsic_load_alpha_ref_float: @@ -1448,14 +1617,21 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) float ref_value = ctx->alpha_ref; + /* See emit_load_const */ float *v = ralloc_array(NULL, float, 4); memcpy(v, &ref_value, sizeof(float)); - _mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v); + _mesa_hash_table_u64_insert(ctx->ssa_constants, (instr->dest.ssa.index << 1) + 1, v); break; case nir_intrinsic_load_viewport_scale: case nir_intrinsic_load_viewport_offset: - emit_sysval_read(ctx, &instr->instr); + case nir_intrinsic_load_num_work_groups: + emit_sysval_read(ctx, &instr->instr, -1, 3); + break; + + case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_local_invocation_id: + emit_compute_builtin(ctx, instr); break; default: @@ -1475,6 +1651,7 @@ midgard_tex_format(enum glsl_sampler_dim dim) case GLSL_SAMPLER_DIM_2D: case GLSL_SAMPLER_DIM_EXTERNAL: + case GLSL_SAMPLER_DIM_RECT: return MALI_TEX_2D; case GLSL_SAMPLER_DIM_3D: @@ -1555,8 +1732,7 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, .mask = 0xF, .ssa_args = { .dest = nir_dest_index(ctx, &instr->dest), - .src0 = -1, - .src1 = -1, + .src = { -1, -1, -1 }, }, .texture = { .op = midgard_texop, @@ -1614,17 +1790,16 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, * texture register */ unsigned temp = make_compiler_temp(ctx); - midgard_instruction st = m_st_cubemap_coords(temp, 0); - st.ssa_args.src0 = index; - st.load_store.unknown = 0x24; /* XXX: What is this? */ + st.ssa_args.src[0] = index; st.mask = 0x3; /* xy */ + st.load_store.arg_1 = 0x20; st.load_store.swizzle = alu_src.swizzle; emit_mir_instruction(ctx, st); - ins.ssa_args.src0 = temp; + ins.ssa_args.src[0] = temp; } else { - ins.ssa_args.src0 = index; + ins.ssa_args.src[0] = index; } if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D) { @@ -1644,7 +1819,7 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, break; ins.texture.lod_register = true; - ins.ssa_args.src1 = index; + ins.ssa_args.src[1] = index; emit_explicit_constant(ctx, index, index); break; @@ -1684,7 +1859,7 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr) emit_texop_native(ctx, instr, TEXTURE_OP_TEXEL_FETCH); break; case nir_texop_txs: - emit_sysval_read(ctx, &instr->instr); + emit_sysval_read(ctx, &instr->instr, -1, 4); break; default: unreachable("Unhanlded texture op"); @@ -1767,13 +1942,10 @@ inline_alu_constants(compiler_context *ctx) /* If there is already a constant here, we can do nothing */ if (alu->has_constants) continue; - /* It makes no sense to inline constants on a branch */ - if (alu->compact_branch || alu->prepacked_branch) continue; - - CONDITIONAL_ATTACH(src0); + CONDITIONAL_ATTACH(src[0]); if (!alu->has_constants) { - CONDITIONAL_ATTACH(src1) + CONDITIONAL_ATTACH(src[1]) } else if (!alu->inline_constant) { /* Corner case: _two_ vec4 constants, for instance with a * csel. For this case, we can only use a constant @@ -1785,18 +1957,18 @@ inline_alu_constants(compiler_context *ctx) * to the destination register. */ - void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->ssa_args.src1 + 1); + void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->ssa_args.src[1] + 1); unsigned scratch = alu->ssa_args.dest; if (entry) { midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), blank_alu_src, scratch); - attach_constants(ctx, &ins, entry, alu->ssa_args.src1 + 1); + attach_constants(ctx, &ins, entry, alu->ssa_args.src[1] + 1); /* Force a break XXX Defer r31 writes */ ins.unit = UNIT_VLUT; /* Set the source */ - alu->ssa_args.src1 = scratch; + alu->ssa_args.src[1] = scratch; /* Inject us -before- the last instruction which set r31 */ mir_insert_instruction_before(mir_prev_op(alu), ins); @@ -1805,6 +1977,41 @@ inline_alu_constants(compiler_context *ctx) } } +/* Being a little silly with the names, but returns the op that is the bitwise + * inverse of the op with the argument switched. I.e. (f and g are + * contrapositives): + * + * f(a, b) = ~g(b, a) + * + * Corollary: if g is the contrapositve of f, f is the contrapositive of g: + * + * f(a, b) = ~g(b, a) + * ~f(a, b) = g(b, a) + * ~f(a, b) = ~h(a, b) where h is the contrapositive of g + * f(a, b) = h(a, b) + * + * Thus we define this function in pairs. + */ + +static inline midgard_alu_op +mir_contrapositive(midgard_alu_op op) +{ + switch (op) { + case midgard_alu_op_flt: + return midgard_alu_op_fle; + case midgard_alu_op_fle: + return midgard_alu_op_flt; + + case midgard_alu_op_ilt: + return midgard_alu_op_ile; + case midgard_alu_op_ile: + return midgard_alu_op_ilt; + + default: + unreachable("No known contrapositive"); + } +} + /* Midgard supports two types of constants, embedded constants (128-bit) and * inline constants (16-bit). Sometimes, especially with scalar ops, embedded * constants can be demoted to inline constants, for space savings and @@ -1834,14 +2041,20 @@ embedded_to_inline_constant(compiler_context *ctx) int op = ins->alu.op; - if (ins->ssa_args.src0 == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { + if (ins->ssa_args.src[0] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { + bool flip = alu_opcode_props[op].props & OP_COMMUTES; + switch (op) { - /* These ops require an operational change to flip - * their arguments TODO */ + /* Conditionals can be inverted */ case midgard_alu_op_flt: - case midgard_alu_op_fle: case midgard_alu_op_ilt: + case midgard_alu_op_fle: case midgard_alu_op_ile: + ins->alu.op = mir_contrapositive(ins->alu.op); + ins->invert = true; + flip = true; + break; + case midgard_alu_op_fcsel: case midgard_alu_op_icsel: DBG("Missed non-commutative flip (%s)\n", alu_opcode_props[op].name); @@ -1849,10 +2062,10 @@ embedded_to_inline_constant(compiler_context *ctx) break; } - if (alu_opcode_props[op].props & OP_COMMUTES) { + if (flip) { /* Flip the SSA numbers */ - ins->ssa_args.src0 = ins->ssa_args.src1; - ins->ssa_args.src1 = SSA_FIXED_REGISTER(REGISTER_CONSTANT); + ins->ssa_args.src[0] = ins->ssa_args.src[1]; + ins->ssa_args.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); /* And flip the modifiers */ @@ -1864,7 +2077,7 @@ embedded_to_inline_constant(compiler_context *ctx) } } - if (ins->ssa_args.src1 == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { + if (ins->ssa_args.src[1] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { /* Extract the source information */ midgard_vector_alu_src *src; @@ -1904,8 +2117,7 @@ embedded_to_inline_constant(compiler_context *ctx) /* We don't know how to handle these with a constant */ - bool is_int = midgard_is_integer_op(ins->alu.op); - if (mir_nontrivial_raw_mod(*src, is_int) || src->half || src->rep_low || src->rep_high) { + if (mir_nontrivial_source2_mod_simple(ins) || src->rep_low || src->rep_high) { DBG("Bailing inline constant...\n"); continue; } @@ -1938,7 +2150,7 @@ embedded_to_inline_constant(compiler_context *ctx) /* Get rid of the embedded constant */ ins->has_constants = false; - ins->ssa_args.src1 = SSA_UNUSED_0; + ins->ssa_args.src[1] = -1; ins->ssa_args.inline_constant = true; ins->inline_constant = scaled_constant; } @@ -2004,7 +2216,7 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block) if (ins->alu.outmod != midgard_outmod_pos) continue; /* TODO: Registers? */ - unsigned src = ins->ssa_args.src1; + unsigned src = ins->ssa_args.src[1]; if (src & IS_REG) continue; assert(!mir_has_multiple_writes(ctx, src)); @@ -2039,20 +2251,33 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block) static void emit_fragment_epilogue(compiler_context *ctx) { - emit_explicit_constant(ctx, ctx->fragment_output, SSA_FIXED_REGISTER(0)); + /* Just emit the last chunk with the branch */ + EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always); +} + +static midgard_block * +create_empty_block(compiler_context *ctx) +{ + midgard_block *blk = rzalloc(ctx, midgard_block); - /* Perform the actual fragment writeout. We have two writeout/branch - * instructions, forming a loop until writeout is successful as per the - * docs. TODO: gl_FragDepth */ + blk->predecessors = _mesa_set_create(blk, + _mesa_hash_pointer, + _mesa_key_pointer_equal); - EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, 0, midgard_condition_always); - EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always); + blk->source_id = ctx->block_source_count++; + + return blk; } static midgard_block * emit_block(compiler_context *ctx, nir_block *block) { - midgard_block *this_block = calloc(sizeof(midgard_block), 1); + midgard_block *this_block = ctx->after_block; + ctx->after_block = NULL; + + if (!this_block) + this_block = create_empty_block(ctx); + list_addtail(&this_block->link, &ctx->blocks); this_block->is_scheduled = false; @@ -2061,10 +2286,6 @@ emit_block(compiler_context *ctx, nir_block *block) ctx->texture_index[0] = -1; ctx->texture_index[1] = -1; - /* Add us as a successor to the block we are following */ - if (ctx->current_block) - midgard_block_add_successor(ctx->current_block, this_block); - /* Set up current block */ list_inithead(&this_block->instructions); ctx->current_block = this_block; @@ -2084,19 +2305,10 @@ emit_block(compiler_context *ctx, nir_block *block) } } - if (block == nir_start_block(ctx->func->impl)) - ctx->initial_block = this_block; - - if (block == nir_impl_last_block(ctx->func->impl)) - ctx->final_block = this_block; - /* Allow the next control flow to access us retroactively, for * branching etc */ ctx->current_block = this_block; - /* Document the fallthrough chain */ - ctx->previous_source_block = this_block; - return this_block; } @@ -2105,6 +2317,8 @@ static midgard_block *emit_cf_list(struct compiler_context *ctx, struct exec_lis static void emit_if(struct compiler_context *ctx, nir_if *nif) { + midgard_block *before_block = ctx->current_block; + /* Conditional branches expect the condition in r31.w; emit a move for * that in the _previous_ block (which is the current block). */ emit_condition(ctx, &nif->condition, true, COMPONENT_X); @@ -2113,8 +2327,9 @@ emit_if(struct compiler_context *ctx, nir_if *nif) EMIT(branch, true, true); midgard_instruction *then_branch = mir_last_in_block(ctx->current_block); - /* Emit the two subblocks */ + /* Emit the two subblocks. */ midgard_block *then_block = emit_cf_list(ctx, &nif->then_list); + midgard_block *end_then_block = ctx->current_block; /* Emit a jump from the end of the then block to the end of the else */ EMIT(branch, false, false); @@ -2125,6 +2340,7 @@ emit_if(struct compiler_context *ctx, nir_if *nif) int else_idx = ctx->block_count; int count_in = ctx->instruction_count; midgard_block *else_block = emit_cf_list(ctx, &nif->else_list); + midgard_block *end_else_block = ctx->current_block; int after_else_idx = ctx->block_count; /* Now that we have the subblocks emitted, fix up the branches */ @@ -2140,6 +2356,16 @@ emit_if(struct compiler_context *ctx, nir_if *nif) then_branch->branch.target_block = else_idx; then_exit->branch.target_block = after_else_idx; } + + /* Wire up the successors */ + + ctx->after_block = create_empty_block(ctx); + + midgard_block_add_successor(before_block, then_block); + midgard_block_add_successor(before_block, else_block); + + midgard_block_add_successor(end_then_block, ctx->after_block); + midgard_block_add_successor(end_else_block, ctx->after_block); } static void @@ -2155,17 +2381,16 @@ emit_loop(struct compiler_context *ctx, nir_loop *nloop) int start_idx = ctx->block_count; /* Emit the body itself */ - emit_cf_list(ctx, &nloop->body); + midgard_block *loop_block = emit_cf_list(ctx, &nloop->body); /* Branch back to loop back */ struct midgard_instruction br_back = v_branch(false, false); br_back.branch.target_block = start_idx; emit_mir_instruction(ctx, br_back); - /* Mark down that branch in the graph. Note that we're really branching - * to the block *after* we started in. TODO: Why doesn't the branch - * itself have an off-by-one then...? */ - midgard_block_add_successor(ctx->current_block, start_block->successors[0]); + /* Mark down that branch in the graph. */ + midgard_block_add_successor(start_block, loop_block); + midgard_block_add_successor(ctx->current_block, loop_block); /* Find the index of the block about to follow us (note: we don't add * one; blocks are 0-indexed so we get a fencepost problem) */ @@ -2173,6 +2398,7 @@ emit_loop(struct compiler_context *ctx, nir_loop *nloop) /* Fix up the break statements we emitted to point to the right place, * now that we can allocate a block number for them */ + ctx->after_block = create_empty_block(ctx); list_for_each_entry_from(struct midgard_block, block, start_block, &ctx->blocks, link) { mir_foreach_instr_in_block(block, ins) { @@ -2191,6 +2417,8 @@ emit_loop(struct compiler_context *ctx, nir_loop *nloop) ins->branch.target_type = TARGET_GOTO; ins->branch.target_block = break_block_idx; + + midgard_block_add_successor(block, ctx->after_block); } } @@ -2246,19 +2474,16 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx) unsigned first_tag = 0; - do { - midgard_bundle *initial_bundle = util_dynarray_element(&initial_block->bundles, midgard_bundle, 0); + mir_foreach_block_from(ctx, initial_block, v) { + midgard_bundle *initial_bundle = + util_dynarray_element(&v->bundles, midgard_bundle, 0); if (initial_bundle) { first_tag = initial_bundle->tag; break; } + } - /* Initial block is empty, try the next block */ - initial_block = list_first_entry(&(initial_block->link), midgard_block, link); - } while(initial_block != NULL); - - assert(first_tag); return first_tag; } @@ -2269,19 +2494,14 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga midgard_debug = debug_get_option_midgard_debug(); - compiler_context ictx = { - .nir = nir, - .screen = screen, - .stage = nir->info.stage, - .temp_alloc = 0, + /* TODO: Bound against what? */ + compiler_context *ctx = rzalloc(NULL, compiler_context); - .is_blend = is_blend, - .blend_constant_offset = 0, - - .alpha_ref = program->alpha_ref - }; - - compiler_context *ctx = &ictx; + ctx->nir = nir; + ctx->screen = screen; + ctx->stage = nir->info.stage; + ctx->is_blend = is_blend; + ctx->alpha_ref = program->alpha_ref; /* Start off with a safe cutoff, allowing usage of all 16 work * registers. Later, we'll promote uniform reads to uniform registers @@ -2357,7 +2577,15 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga ctx->func = func; emit_cf_list(ctx, &func->impl->body); - emit_block(ctx, func->impl->end_block); + + /* Emit empty exit block with successor */ + + struct midgard_block *semi_end = ctx->current_block; + + struct midgard_block *end = + emit_block(ctx, func->impl->end_block); + + midgard_block_add_successor(semi_end, end); break; /* TODO: Multi-function shaders */ } @@ -2377,9 +2605,17 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga progress |= midgard_opt_dead_code_eliminate(ctx, block); progress |= midgard_opt_combine_projection(ctx, block); progress |= midgard_opt_varying_projection(ctx, block); + progress |= midgard_opt_not_propagate(ctx, block); + progress |= midgard_opt_fuse_src_invert(ctx, block); + progress |= midgard_opt_fuse_dest_invert(ctx, block); } } while (progress); + mir_foreach_block(ctx, block) { + midgard_lower_invert(ctx, block); + midgard_lower_derivatives(ctx, block); + } + /* Nested control-flow can result in dead branches at the end of the * block. This messes with our analysis and is just dead code, so cull * them */ @@ -2387,6 +2623,11 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga midgard_opt_cull_dead_branch(ctx, block); } + /* Ensure we were lowered */ + mir_foreach_instr_global(ctx, ins) { + assert(!ins->invert); + } + /* Schedule! */ schedule_program(ctx); @@ -2423,23 +2664,7 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga int quadword_offset = 0; if (is_discard) { - /* Jump to the end of the shader. We - * need to include not only the - * following blocks, but also the - * contents of our current block (since - * discard can come in the middle of - * the block) */ - - midgard_block *blk = mir_get_block(ctx, br_block_idx + 1); - - for (midgard_bundle *bun = bundle + 1; bun < (midgard_bundle *)((char*) block->bundles.data + block->bundles.size); ++bun) { - quadword_offset += quadword_size(bun->tag); - } - - mir_foreach_block_from(ctx, blk, b) { - quadword_offset += b->quadword_count; - } - + /* Ignored */ } else if (target_number > br_block_idx) { /* Jump forward */ @@ -2576,7 +2801,7 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga program->tls_size = ctx->tls_size; if (midgard_debug & MIDGARD_DBG_SHADERS) - disassemble_midgard(program->compiled.data, program->compiled.size); + disassemble_midgard(program->compiled.data, program->compiled.size, false, 0, ""); if (midgard_debug & MIDGARD_DBG_SHADERDB) { unsigned nr_bundles = 0, nr_ins = 0, nr_quadwords = 0; @@ -2618,6 +2843,7 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga ctx->spills, ctx->fills); } + ralloc_free(ctx); return 0; }