From 60097cc840e33af8506d7d4d621fefdca1a77695 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jan 2020 14:18:56 -0600 Subject: [PATCH 1/1] nir: Add a new memory_barrier_tcs_patch intrinsic Right now, it's implemented as a no-op for everyone. For most drivers, it's a switch case in the NIR -> whatever which just breaks. For ir3, they already have code to delete tessellation barriers so we just add a case to also delete memory_barrier_tcs_patch. Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Eric Anholt Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 2 ++ src/amd/llvm/ac_nir_to_llvm.c | 2 ++ src/broadcom/compiler/nir_to_vir.c | 1 + src/compiler/nir/nir_intrinsics.py | 3 +++ src/compiler/nir/nir_opt_combine_stores.c | 4 ++++ src/compiler/nir/nir_opt_copy_prop_vars.c | 6 ++++++ src/compiler/nir/nir_opt_dead_write_vars.c | 4 ++++ src/freedreno/ir3/ir3_nir_lower_tess.c | 1 + src/gallium/auxiliary/gallivm/lp_bld_nir.c | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 2 ++ src/intel/compiler/brw_fs_nir.cpp | 3 +++ src/intel/compiler/brw_vec4_tcs.cpp | 3 +++ 12 files changed, 32 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index abcfe572a38..29dea6e6cd3 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5729,6 +5729,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) case nir_intrinsic_memory_barrier_shared: emit_memory_barrier(ctx, instr); break; + case nir_intrinsic_memory_barrier_tcs_patch: + break; case nir_intrinsic_load_num_work_groups: { Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.num_work_groups))); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index d43f4e9fd19..fb042ac1d4a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3553,6 +3553,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_memory_barrier_shared: emit_membar(&ctx->ac, instr); break; + case nir_intrinsic_memory_barrier_tcs_patch: + break; case nir_intrinsic_barrier: ac_emit_barrier(&ctx->ac, ctx->stage); break; diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index e2de77ddf05..401958e1471 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2247,6 +2247,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: case nir_intrinsic_memory_barrier_shared: + case nir_intrinsic_memory_barrier_tcs_patch: case nir_intrinsic_group_memory_barrier: /* We don't do any instruction scheduling of these NIR * instructions between each other, so we just need to make diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 4aa616d269e..04f58b0172b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -255,6 +255,9 @@ barrier("memory_barrier_shared") barrier("begin_invocation_interlock") barrier("end_invocation_interlock") +# Memory barrier for synchronizing TCS patch outputs +barrier("memory_barrier_tcs_patch") + # A conditional discard/demote, with a single boolean source. intrinsic("discard_if", src_comp=[1]) intrinsic("demote_if", src_comp=[1]) diff --git a/src/compiler/nir/nir_opt_combine_stores.c b/src/compiler/nir/nir_opt_combine_stores.c index 6bd2c534971..508833b5b46 100644 --- a/src/compiler/nir/nir_opt_combine_stores.c +++ b/src/compiler/nir/nir_opt_combine_stores.c @@ -320,6 +320,10 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block) combine_stores_with_modes(state, nir_var_mem_shared); break; + case nir_intrinsic_memory_barrier_tcs_patch: + combine_stores_with_modes(state, nir_var_shader_out); + break; + case nir_intrinsic_scoped_memory_barrier: if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) { combine_stores_with_modes(state, diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index fdbf62c3599..26ca61969af 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -820,6 +820,12 @@ copy_prop_vars_block(struct copy_prop_var_state *state, apply_barrier_for_modes(copies, nir_var_mem_shared); break; + case nir_intrinsic_memory_barrier_tcs_patch: + if (debug) dump_instr(instr); + + apply_barrier_for_modes(copies, nir_var_shader_out); + break; + case nir_intrinsic_scoped_memory_barrier: if (debug) dump_instr(instr); diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c index ffe0de771da..ecbe77895d3 100644 --- a/src/compiler/nir/nir_opt_dead_write_vars.c +++ b/src/compiler/nir/nir_opt_dead_write_vars.c @@ -148,6 +148,10 @@ remove_dead_write_vars_local(void *mem_ctx, nir_block *block) clear_unused_for_modes(&unused_writes, nir_var_mem_shared); break; + case nir_intrinsic_memory_barrier_tcs_patch: + clear_unused_for_modes(&unused_writes, nir_var_shader_out); + break; + case nir_intrinsic_scoped_memory_barrier: { if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) { clear_unused_for_modes(&unused_writes, diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index 5066f022729..b29903537b8 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -349,6 +349,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) break; case nir_intrinsic_barrier: + case nir_intrinsic_memory_barrier_tcs_patch: /* Hull shaders dispatch 32 wide so an entire patch will always * fit in a single warp and execute in lock-step. Consequently, * we don't need to do anything for TCS barriers so just remove diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index ce503645071..72a2721e030 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1359,6 +1359,7 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_memory_barrier_shared: case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_tcs_patch: break; case nir_intrinsic_load_kernel_input: visit_load_kernel_input(bld_base, instr, result); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 0f2b3e12d00..13b60dd4e86 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2663,6 +2663,8 @@ Converter::visit(nir_intrinsic_instr *insn) bar->subOp = getSubOp(op); break; } + case nir_intrinsic_memory_barrier_tcs_patch: + break; case nir_intrinsic_shader_clock: { const DataType dType = getDType(insn); LValues &newDefs = convert(&insn->dest); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 3b05d424cbb..a104df497cd 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4332,6 +4332,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_memory_barrier_tcs_patch: + break; + case nir_intrinsic_shader_clock: { /* We cannot do anything if there is an event, so ignore it for now */ const fs_reg shader_clock = get_timestamp(bld); diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 3ce5e268e27..e539657335c 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -315,6 +315,9 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_memory_barrier_tcs_patch: + break; + default: vec4_visitor::nir_emit_intrinsic(instr); } -- 2.30.2