radeonsi: add si_vs_prolog_bits::unpack_instance_id_from_vertex_id:1
authorMarek Olšák <marek.olsak@amd.com>
Tue, 12 Feb 2019 20:26:41 +0000 (15:26 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 16 May 2019 17:10:07 +0000 (13:10 -0400)
The prim discard compute shader bakes InstanceID into the output index buffer.

Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h

index e05bbfde62329b240d745bda57e08835a596d0ba..c31e94fe3516eef291230030580050b195328bda 100644 (file)
@@ -5664,6 +5664,8 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key,
                prefix, prolog->instance_divisor_is_one);
        fprintf(f, "  %s.instance_divisor_is_fetched = %u\n",
                prefix, prolog->instance_divisor_is_fetched);
+       fprintf(f, "  %s.unpack_instance_id_from_vertex_id = %u\n",
+               prefix, prolog->unpack_instance_id_from_vertex_id);
        fprintf(f, "  %s.ls_vgpr_fix = %u\n",
                prefix, prolog->ls_vgpr_fix);
 
@@ -7099,8 +7101,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                }
        }
 
-       ctx->abi.vertex_id = input_vgprs[first_vs_vgpr];
-       ctx->abi.instance_id = input_vgprs[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)];
+       unsigned vertex_id_vgpr = first_vs_vgpr;
+       unsigned instance_id_vgpr = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
+
+       ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
+       ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
+
+       /* InstanceID = VertexID >> 16;
+        * VertexID   = VertexID & 0xffff;
+        */
+       if (key->vs_prolog.states.unpack_instance_id_from_vertex_id) {
+               ctx->abi.instance_id = LLVMBuildLShr(ctx->ac.builder, ctx->abi.vertex_id,
+                                                    LLVMConstInt(ctx->i32, 16, 0), "");
+               ctx->abi.vertex_id = LLVMBuildAnd(ctx->ac.builder, ctx->abi.vertex_id,
+                                                 LLVMConstInt(ctx->i32, 0xffff, 0), "");
+       }
 
        /* Copy inputs to outputs. This should be no-op, as the registers match,
         * but it will prevent the compiler from overwriting them unintentionally.
@@ -7112,6 +7127,12 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
        }
        for (i = 0; i < num_input_vgprs; i++) {
                LLVMValueRef p = input_vgprs[i];
+
+               if (i == vertex_id_vgpr)
+                       p = ctx->abi.vertex_id;
+               else if (i == instance_id_vgpr)
+                       p = ctx->abi.instance_id;
+
                p = ac_to_float(&ctx->ac, p);
                ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p,
                                           key->vs_prolog.num_input_sgprs + i, "");
index 16b78fbf43efed18381f66888e8d8cff211b8883..3a63292658b2570013bf81d50cc823089dc68f75 100644 (file)
@@ -409,6 +409,7 @@ struct si_vs_prolog_bits {
        uint16_t        instance_divisor_is_one;     /* bitmask of inputs */
        uint16_t        instance_divisor_is_fetched; /* bitmask of inputs */
        unsigned        ls_vgpr_fix:1;
+       unsigned        unpack_instance_id_from_vertex_id:1;
 };
 
 /* Common TCS bits between the shader key and the epilog key. */