draw/gs: use mask to limit vertex emission.
authorDave Airlie <airlied@redhat.com>
Mon, 6 Jul 2020 02:06:46 +0000 (12:06 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 7 Jul 2020 20:06:14 +0000 (06:06 +1000)
When executing for a single primitive, the mask has only one active
lane, however the vertex emit emits for all the lanes, pass in
the active mask and write the excess lanes to the overflow slot.

Fixes:
glsl-1.50-gs-max-output -scan 1 20

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5555>

src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/swr/swr_shader.cpp

index 862c570efbfb52ccab19d3c8399e109ff11cd485..845ed6242b1031a8a2595adc084734d3f3601f42 100644 (file)
@@ -1785,7 +1785,7 @@ draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                          struct lp_build_context * bld,
                          LLVMValueRef (*outputs)[4],
                          LLVMValueRef emitted_vertices_vec,
-                         LLVMValueRef stream_id)
+                         LLVMValueRef mask_vec, LLVMValueRef stream_id)
 {
    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
    struct draw_gs_llvm_variant *variant = gs_iface->variant;
@@ -1801,12 +1801,15 @@ draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
    unsigned i;
    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
 
+   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
    for (i = 0; i < gs_type.length; ++i) {
       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
       LLVMValueRef currently_emitted =
          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
+      indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
+                                   lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
    }
 
    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
index a363571a2cd015bcf51fd4ecf38cf46c87f5c304..6af022a8bca0f22b89fac943128008481de745c1 100644 (file)
@@ -1604,6 +1604,7 @@ static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_i
    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
                               bld->outputs,
                               total_emitted_vertices_vec,
+                              mask,
                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
 
    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
index 42a01ec6a7701280182101ed830a79df0048e09d..b059c1a8a4eb65eecaace3d266a331b45fc7918d 100644 (file)
@@ -429,7 +429,7 @@ struct lp_build_gs_iface
                        struct lp_build_context * bld,
                        LLVMValueRef (*outputs)[4],
                        LLVMValueRef emitted_vertices_vec,
-                       LLVMValueRef stream_id);
+                       LLVMValueRef mask_vec, LLVMValueRef stream_id);
    void (*end_primitive)(const struct lp_build_gs_iface *gs_iface,
                          struct lp_build_context * bld,
                          LLVMValueRef total_emitted_vertices_vec,
index 9e00726b35de93e8b358862def32abea74efb6ab..f3f339923dbc246957fb073b3f8217bc571e997e 100644 (file)
@@ -3969,6 +3969,7 @@ emit_vertex(
       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
                                  bld->outputs,
                                  total_emitted_vertices_vec,
+                                 mask,
                                  stream_id);
       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
                                 mask);
index ea3701346aaf9def67d99feb5b07a660f12a6d01..cc3bf717653fd6c09a1eb109818123e45c34b6dc 100644 (file)
@@ -504,6 +504,7 @@ swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                            struct lp_build_context * bld,
                            LLVMValueRef (*outputs)[4],
                            LLVMValueRef emitted_vertices_vec,
+                           LLVMValueRef mask_vec,
                            LLVMValueRef stream_id)
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;