X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fpanfrost%2Fmidgard%2Fmidgard_compile.c;h=5dcb9c55c748a60576f09fd7c19d2c3315b8df73;hp=c67e3dc61c6a5d34d06bc3b53d0e42d67a038031;hb=f6e19dd3f45de7bc9edfe0aa9254abc3412c2610;hpb=bea6a652db5f7d46545a4a91dcd11a2984c72c77 diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index c67e3dc61c6..5dcb9c55c74 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -160,37 +160,6 @@ v_branch(bool conditional, bool invert) return ins; } -static midgard_branch_extended -midgard_create_branch_extended( midgard_condition cond, - midgard_jmp_writeout_op op, - unsigned dest_tag, - signed quadword_offset) -{ - /* The condition code is actually a LUT describing a function to - * combine multiple condition codes. However, we only support a single - * condition code at the moment, so we just duplicate over a bunch of - * times. */ - - uint16_t duplicated_cond = - (cond << 14) | - (cond << 12) | - (cond << 10) | - (cond << 8) | - (cond << 6) | - (cond << 4) | - (cond << 2) | - (cond << 0); - - midgard_branch_extended branch = { - .op = op, - .dest_tag = dest_tag, - .offset = quadword_offset, - .cond = duplicated_cond - }; - - return branch; -} - static void attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name) { @@ -492,6 +461,23 @@ midgard_nir_reorder_writeout(nir_shader *nir) return progress; } +static bool +mdg_is_64(const nir_instr *instr, const void *_unused) +{ + const nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (nir_dest_bit_size(alu->dest.dest) == 64) + return true; + + switch (alu->op) { + case nir_op_umul_high: + case nir_op_imul_high: + return true; + default: + return false; + } +} + /* Flushes undefined values to zero */ static void @@ -574,6 +560,8 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) NIR_PASS(progress, nir, nir_opt_vectorize); } while (progress); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL); + /* Run after opts so it can hit more */ if (!is_blend) NIR_PASS(progress, nir, nir_fuse_io_16); @@ -889,8 +877,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) /* Should we swap arguments? */ bool flip_src12 = false; - unsigned src_bitsize = nir_src_bit_size(instr->src[0].src); - unsigned dst_bitsize = nir_dest_bit_size(*dest); + ASSERTED unsigned src_bitsize = nir_src_bit_size(instr->src[0].src); + ASSERTED unsigned dst_bitsize = nir_dest_bit_size(*dest); enum midgard_roundmode roundmode = MIDGARD_RTE; @@ -912,6 +900,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(iadd, iadd); ALU_CASE(isub, isub); ALU_CASE(imul, imul); + ALU_CASE(imul_high, imul); + ALU_CASE(umul_high, imul); /* Zero shoved as second-arg */ ALU_CASE(iabs, iabsdiff); @@ -919,7 +909,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(mov, imov); ALU_CASE_CMP(feq32, feq, false); - ALU_CASE_CMP(fne32, fne, false); + ALU_CASE_CMP(fneu32, fne, false); ALU_CASE_CMP(flt32, flt, false); ALU_CASE_CMP(ieq32, ieq, true); ALU_CASE_CMP(ine32, ine, true); @@ -1090,7 +1080,9 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) unsigned outmod = 0; bool is_int = midgard_is_integer_op(op); - if (midgard_is_integer_out_op(op)) { + if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) { + outmod = midgard_outmod_int_high; + } else if (midgard_is_integer_out_op(op)) { outmod = midgard_outmod_int_wrap; } else if (instr->op == nir_op_fsat) { outmod = midgard_outmod_sat; @@ -1365,6 +1357,15 @@ emit_global( mir_set_offset(ctx, &ins, offset, is_shared); mir_set_intr_mask(instr, &ins, is_read); + /* Set a valid swizzle for masked out components */ + assert(ins.mask); + unsigned first_component = __builtin_ffs(ins.mask) - 1; + + for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) { + if (!(ins.mask & (1 << i))) + ins.swizzle[0][i] = first_component; + } + emit_mir_instruction(ctx, ins); } @@ -1578,13 +1579,13 @@ emit_vertex_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) } static void -emit_msaa_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) +emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx) { unsigned reg = nir_dest_index(&instr->dest); midgard_instruction ld = m_ld_color_buffer_32u(reg, 0); ld.op = midgard_op_ld_color_buffer_32u_old; - ld.load_store.address = 97; + ld.load_store.address = idx; ld.load_store.arg_2 = 0x1E; for (int i = 0; i < 4; ++i) @@ -1601,11 +1602,6 @@ emit_control_barrier(compiler_context *ctx) .dest = ~0, .src = { ~0, ~0, ~0, ~0 }, .op = TEXTURE_OP_BARRIER, - .texture = { - /* TODO: optimize */ - .out_of_order = MIDGARD_BARRIER_BUFFER | - MIDGARD_BARRIER_SHARED , - } }; emit_mir_instruction(ctx, ins); @@ -1872,7 +1868,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) else if (combined) rt = MIDGARD_ZS_RT; else - assert(0); + unreachable("bad rt"); unsigned reg_z = ~0, reg_s = ~0; if (combined) { @@ -1983,8 +1979,12 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) emit_vertex_builtin(ctx, instr); break; + case nir_intrinsic_load_sample_mask_in: + emit_special(ctx, instr, 96); + break; + case nir_intrinsic_load_sample_id: - emit_msaa_builtin(ctx, instr); + emit_special(ctx, instr, 97); break; case nir_intrinsic_memory_barrier_buffer: @@ -2004,25 +2004,26 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) } } +/* Returns dimension with 0 special casing cubemaps */ static unsigned midgard_tex_format(enum glsl_sampler_dim dim) { switch (dim) { case GLSL_SAMPLER_DIM_1D: case GLSL_SAMPLER_DIM_BUF: - return MALI_TEX_1D; + return 1; case GLSL_SAMPLER_DIM_2D: case GLSL_SAMPLER_DIM_MS: case GLSL_SAMPLER_DIM_EXTERNAL: case GLSL_SAMPLER_DIM_RECT: - return MALI_TEX_2D; + return 2; case GLSL_SAMPLER_DIM_3D: - return MALI_TEX_3D; + return 3; case GLSL_SAMPLER_DIM_CUBE: - return MALI_TEX_CUBE; + return 0; default: DBG("Unknown sampler dim type\n"); @@ -2064,6 +2065,15 @@ pan_attach_constant_bias( return true; } +static enum mali_texture_mode +mdg_texture_mode(nir_tex_instr *instr) +{ + if (instr->is_shadow) + return TEXTURE_SHADOW; + else + return TEXTURE_NORMAL; +} + static void emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, unsigned midgard_texop) @@ -2098,7 +2108,7 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, .format = midgard_tex_format(instr->sampler_dim), .texture_handle = texture_index, .sampler_handle = sampler_index, - .shadow = instr->is_shadow, + .mode = mdg_texture_mode(instr) } }; @@ -2427,6 +2437,13 @@ max_bitsize_for_alu(midgard_instruction *ins) break; } + /* High implies computing at a higher bitsize, e.g umul_high of 32-bit + * requires computing at 64-bit */ + if (midgard_is_integer_out_op(ins->op) && ins->outmod == midgard_outmod_int_high) { + max_bitsize *= 2; + assert(max_bitsize <= 64); + } + return max_bitsize; } @@ -2811,7 +2828,7 @@ emit_cf_list(struct compiler_context *ctx, struct exec_list *list) * stream and in branch targets. An initial block might be empty, so iterate * until we find one that 'works' */ -static unsigned +unsigned midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx) { midgard_block *initial_block = mir_get_block(ctx, block_idx); @@ -2877,7 +2894,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b ctx->nir = nir; ctx->stage = nir->info.stage; ctx->is_blend = is_blend; - ctx->alpha_ref = program->alpha_ref; ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt; ctx->blend_input = ~0; ctx->blend_src1 = ~0; @@ -2891,7 +2907,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b /* Initialize at a global (not block) level hash tables */ ctx->ssa_constants = _mesa_hash_table_u64_create(NULL); - ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); /* Lower gl_Position pre-optimisation, but after lowering vars to ssa * (so we don't accidentally duplicate the epilogue since mesa/st has @@ -2934,7 +2949,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b /* Assign sysvals and counts, now that we're sure * (post-optimisation) */ - panfrost_nir_assign_sysvals(&ctx->sysvals, nir); + panfrost_nir_assign_sysvals(&ctx->sysvals, ctx, nir); program->sysval_count = ctx->sysvals.sysval_count; memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count); @@ -3007,124 +3022,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b midgard_schedule_program(ctx); mir_ra(ctx); - /* Now that all the bundles are scheduled and we can calculate block - * sizes, emit actual branch instructions rather than placeholders */ - - int br_block_idx = 0; - - mir_foreach_block(ctx, _block) { - midgard_block *block = (midgard_block *) _block; - util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) { - for (int c = 0; c < bundle->instruction_count; ++c) { - midgard_instruction *ins = bundle->instructions[c]; - - if (!midgard_is_branch_unit(ins->unit)) continue; - - /* Parse some basic branch info */ - bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT; - bool is_conditional = ins->branch.conditional; - bool is_inverted = ins->branch.invert_conditional; - bool is_discard = ins->branch.target_type == TARGET_DISCARD; - bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT; - bool is_special = is_discard || is_tilebuf_wait; - bool is_writeout = ins->writeout; - - /* Determine the block we're jumping to */ - int target_number = ins->branch.target_block; - - /* Report the destination tag */ - int dest_tag = is_discard ? 0 : - is_tilebuf_wait ? bundle->tag : - midgard_get_first_tag_from_block(ctx, target_number); - - /* Count up the number of quadwords we're - * jumping over = number of quadwords until - * (br_block_idx, target_number) */ - - int quadword_offset = 0; - - if (is_discard) { - /* Ignored */ - } else if (is_tilebuf_wait) { - quadword_offset = -1; - } else if (target_number > br_block_idx) { - /* Jump forward */ - - for (int idx = br_block_idx + 1; idx < target_number; ++idx) { - midgard_block *blk = mir_get_block(ctx, idx); - assert(blk); - - quadword_offset += blk->quadword_count; - } - } else { - /* Jump backwards */ - - for (int idx = br_block_idx; idx >= target_number; --idx) { - midgard_block *blk = mir_get_block(ctx, idx); - assert(blk); - - quadword_offset -= blk->quadword_count; - } - } - - /* Unconditional extended branches (far jumps) - * have issues, so we always use a conditional - * branch, setting the condition to always for - * unconditional. For compact unconditional - * branches, cond isn't used so it doesn't - * matter what we pick. */ - - midgard_condition cond = - !is_conditional ? midgard_condition_always : - is_inverted ? midgard_condition_false : - midgard_condition_true; - - midgard_jmp_writeout_op op = - is_discard ? midgard_jmp_writeout_op_discard : - is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending : - is_writeout ? midgard_jmp_writeout_op_writeout : - (is_compact && !is_conditional) ? midgard_jmp_writeout_op_branch_uncond : - midgard_jmp_writeout_op_branch_cond; - - if (!is_compact) { - midgard_branch_extended branch = - midgard_create_branch_extended( - cond, op, - dest_tag, - quadword_offset); - - memcpy(&ins->branch_extended, &branch, sizeof(branch)); - } else if (is_conditional || is_special) { - midgard_branch_cond branch = { - .op = op, - .dest_tag = dest_tag, - .offset = quadword_offset, - .cond = cond - }; - - assert(branch.offset == quadword_offset); - - memcpy(&ins->br_compact, &branch, sizeof(branch)); - } else { - assert(op == midgard_jmp_writeout_op_branch_uncond); - - midgard_branch_uncond branch = { - .op = op, - .dest_tag = dest_tag, - .offset = quadword_offset, - .unknown = 1 - }; - - assert(branch.offset == quadword_offset); - - memcpy(&ins->br_compact, &branch, sizeof(branch)); - } - } - } - - ++br_block_idx; - } - /* Emit flat binary from the instruction arrays. Iterate each block in * sequence. Save instruction boundaries such that lookahead tags can * be assigned easily */