From cd2f26090a3857703bc85bd58ac5922800b06bc0 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 12 Aug 2013 21:18:58 +0200 Subject: [PATCH] gallivm: fix exec_mask interaction with geometry shader after end of main Because we must maintain an exec_mask even if there's currently nothing on the mask stack, we can still have an exec_mask at the end of the program. Effectively, this mask should be set back to default when returning from main. Without relying on END/RET opcode (I think it's valid to have neither) it is actually difficult to do this, as there doesn't seem any reasonable place to do it, so instead let's just say the exec_mask is invalid outside main (which it really is effectively). The problem is that geometry shader called end_primitive outside the shader (in the epilogue), and as a result used a bogus mask, leading to bugs if we had to set the (somewhat misnamed) ret_in_main bit anywhere. So just avoid the mask combining function when called from outside the shader. Reviewed-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 2 +- .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 28 +++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 495940caf80..5a9e8d0a1a2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -466,7 +466,7 @@ lp_build_tgsi_llvm( while (bld_base->pc != -1) { struct tgsi_full_instruction *instr = bld_base->instructions + - bld_base->pc; + bld_base->pc; const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(instr->Instruction.Opcode); if (!lp_build_tgsi_inst_llvm(bld_base, instr)) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 589ea4f16fe..db8e9979b4e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2691,11 +2691,21 @@ end_primitive_masked(struct lp_build_tgsi_context * bld_base, LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; if (bld->gs_iface->end_primitive) { + struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef emitted_vertices_vec = LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); LLVMValueRef emitted_prims_vec = LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); + LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, + emitted_vertices_vec, + uint_bld->zero); + /* We need to combine the current execution mask with the mask + telling us which, if any, execution slots actually have + unemitted primitives, this way we make sure that end_primitives + executes only on the paths that have unflushed vertices */ + mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); + bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, emitted_vertices_vec, emitted_prims_vec); @@ -2735,20 +2745,7 @@ end_primitive( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); if (bld->gs_iface->end_primitive) { - LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef mask = mask_vec(bld_base); - struct lp_build_context *uint_bld = &bld_base->uint_bld; - LLVMValueRef emitted_verts = LLVMBuildLoad( - builder, bld->emitted_vertices_vec_ptr, ""); - LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, - emitted_verts, - uint_bld->zero); - /* We need to combine the current execution mask with the mask - telling us which, if any, execution slots actually have - unemitted primitives, this way we make sure that end_primitives - executes only on the paths that have unflushed vertices */ - mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); - end_primitive_masked(bld_base, mask); } } @@ -3148,8 +3145,9 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base) LLVMValueRef total_emitted_vertices_vec; LLVMValueRef emitted_prims_vec; /* implicit end_primitives, needed in case there are any unflushed - vertices in the cache */ - end_primitive(NULL, bld_base, NULL); + vertices in the cache. Note must not call end_primitive here + since the exec_mask is not valid at this point. */ + end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); total_emitted_vertices_vec = LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); -- 2.30.2