ac: unify primitive export code
authorMarek Olšák <marek.olsak@amd.com>
Tue, 24 Dec 2019 00:26:46 +0000 (19:26 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 8 Jan 2020 21:00:38 +0000 (16:00 -0500)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_llvm_build.h
src/amd/vulkan/radv_nir_to_llvm.c
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 3df941b1f591edbb5777d7c799d40b4377365880..a66272873f00da297223674bb54ab63fca35c4d8 100644 (file)
@@ -4748,6 +4748,59 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
        ac_build_endif(ctx, 5020);
 }
 
+LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
+                                const struct ac_ngg_prim *prim)
+{
+       /* The prim export format is:
+        *  - bits 0..8: index 0
+        *  - bit 9: edge flag 0
+        *  - bits 10..18: index 1
+        *  - bit 19: edge flag 1
+        *  - bits 20..28: index 2
+        *  - bit 29: edge flag 2
+        *  - bit 31: null primitive (skip)
+        */
+       LLVMBuilderRef builder = ctx->builder;
+       LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
+       LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
+
+       for (unsigned i = 0; i < prim->num_vertices; ++i) {
+               tmp = LLVMBuildShl(builder, prim->index[i],
+                                  LLVMConstInt(ctx->i32, 10 * i, false), "");
+               result = LLVMBuildOr(builder, result, tmp, "");
+               tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, "");
+               tmp = LLVMBuildShl(builder, tmp,
+                                  LLVMConstInt(ctx->i32, 10 * i + 9, false), "");
+               result = LLVMBuildOr(builder, result, tmp, "");
+       }
+       return result;
+}
+
+void ac_build_export_prim(struct ac_llvm_context *ctx,
+                         const struct ac_ngg_prim *prim)
+{
+       struct ac_export_args args;
+
+       if (prim->passthrough) {
+               args.out[0] = prim->passthrough;
+       } else {
+               args.out[0] = ac_pack_prim_export(ctx, prim);
+       }
+
+       args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
+       args.out[1] = LLVMGetUndef(ctx->f32);
+       args.out[2] = LLVMGetUndef(ctx->f32);
+       args.out[3] = LLVMGetUndef(ctx->f32);
+
+       args.target = V_008DFC_SQ_EXP_PRIM;
+       args.enabled_channels = 1;
+       args.done = true;
+       args.valid_mask = false;
+       args.compr = false;
+
+       ac_build_export(ctx, &args);
+}
+
 static LLVMTypeRef
 arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
 {
index 9e216a80956c6ca42aad3627129c3d38d3c80ee4..8672977bd6795f5815385f2721884903b6454292 100644 (file)
@@ -750,6 +750,19 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
 void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
                                   LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt);
 
+struct ac_ngg_prim {
+       unsigned num_vertices;
+       LLVMValueRef isnull;
+       LLVMValueRef index[3];
+       LLVMValueRef edgeflag[3];
+       LLVMValueRef passthrough;
+};
+
+LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
+                                const struct ac_ngg_prim *prim);
+void ac_build_export_prim(struct ac_llvm_context *ctx,
+                         const struct ac_ngg_prim *prim);
+
 static inline LLVMValueRef
 ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
 {
index 3aeba1783166d1e4f63594317e27fbd5ddcd870a..5a321a0677f6377dfbb1c6a6b56a80414a32a743 100644 (file)
@@ -2371,47 +2371,6 @@ ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread,
        return ngg_gs_vertex_ptr(ctx, vertexidx);
 }
 
-struct ngg_prim {
-       unsigned num_vertices;
-       LLVMValueRef isnull;
-       LLVMValueRef index[3];
-       LLVMValueRef edgeflag[3];
-};
-
-static void build_export_prim(struct radv_shader_context *ctx,
-                             const struct ngg_prim *prim)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       struct ac_export_args args;
-       LLVMValueRef tmp;
-
-       tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
-       args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
-       for (unsigned i = 0; i < prim->num_vertices; ++i) {
-               tmp = LLVMBuildShl(builder, prim->index[i],
-                                  LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
-               args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-               tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
-               tmp = LLVMBuildShl(builder, tmp,
-                                  LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
-               args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-       }
-
-       args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
-       args.out[1] = LLVMGetUndef(ctx->ac.f32);
-       args.out[2] = LLVMGetUndef(ctx->ac.f32);
-       args.out[3] = LLVMGetUndef(ctx->ac.f32);
-
-       args.target = V_008DFC_SQ_EXP_PRIM;
-       args.enabled_channels = 1;
-       args.done = true;
-       args.valid_mask = false;
-       args.compr = false;
-
-       ac_build_export(&ctx->ac, &args);
-}
-
 static struct radv_stream_output *
 radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
 {
@@ -3002,14 +2961,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
                                      ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
 
        /* TODO: streamout queries */
-       /* Export primitive data to the index buffer. Format is:
-        *  - bits 0..8: index 0
-        *  - bit 9: edge flag 0
-        *  - bits 10..18: index 1
-        *  - bit 19: edge flag 1
-        *  - bits 20..28: index 2
-        *  - bit 29: edge flag 2
-        *  - bit 31: null primitive (skip)
+       /* Export primitive data to the index buffer.
         *
         * For the first version, we will always build up all three indices
         * independent of the primitive type. The additional garbage data
@@ -3020,7 +2972,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
         */
        ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
        {
-               struct ngg_prim prim = {};
+               struct ac_ngg_prim prim = {};
 
                prim.num_vertices = num_vertices;
                prim.isnull = ctx->ac.i1false;
@@ -3033,7 +2985,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
                        prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
                }
 
-               build_export_prim(ctx, &prim);
+               ac_build_export_prim(&ctx->ac, &prim);
        }
        ac_build_endif(&ctx->ac, 6001);
 
@@ -3323,7 +3275,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
        ac_build_ifcc(&ctx->ac, tmp, 5140);
        {
                LLVMValueRef flags;
-               struct ngg_prim prim = {};
+               struct ac_ngg_prim prim = {};
                prim.num_vertices = verts_per_prim;
 
                tmp = ngg_gs_vertex_ptr(ctx, tid);
@@ -3352,7 +3304,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
                        LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
                        is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
 
-                       struct ngg_prim in = prim;
+                       struct ac_ngg_prim in = prim;
                        prim.index[0] = in.index[0];
                        prim.index[1] = LLVMBuildSelect(builder, is_odd,
                                                        in.index[2], in.index[1], "");
@@ -3360,7 +3312,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
                                                        in.index[1], in.index[2], "");
                }
 
-               build_export_prim(ctx, &prim);
+               ac_build_export_prim(&ctx->ac, &prim);
        }
        ac_build_endif(&ctx->ac, 5140);
 
index d403383b09b402e2a138cc5c85d779b62f04c263..6bc5c99d02b15722bcd0428db4c4a16c8bf747b7 100644 (file)
@@ -71,52 +71,6 @@ static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx)
                                     LLVMConstInt(ctx->i32, GFX10_GS_QUERY_BUF, false));
 }
 
-struct ngg_prim {
-       unsigned num_vertices;
-       LLVMValueRef isnull;
-       LLVMValueRef index[3];
-       LLVMValueRef edgeflag[3];
-       LLVMValueRef passthrough;
-};
-
-static void build_export_prim(struct si_shader_context *ctx,
-                             const struct ngg_prim *prim)
-{
-       LLVMBuilderRef builder = ctx->ac.builder;
-       struct ac_export_args args;
-       LLVMValueRef tmp;
-
-       if (prim->passthrough) {
-               args.out[0] = prim->passthrough;
-       } else {
-               tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
-               args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
-               for (unsigned i = 0; i < prim->num_vertices; ++i) {
-                       tmp = LLVMBuildShl(builder, prim->index[i],
-                                          LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
-                       args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-                       tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
-                       tmp = LLVMBuildShl(builder, tmp,
-                                          LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
-                       args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-               }
-       }
-
-       args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
-       args.out[1] = LLVMGetUndef(ctx->ac.f32);
-       args.out[2] = LLVMGetUndef(ctx->ac.f32);
-       args.out[3] = LLVMGetUndef(ctx->ac.f32);
-
-       args.target = V_008DFC_SQ_EXP_PRIM;
-       args.enabled_channels = 1;
-       args.done = true;
-       args.valid_mask = false;
-       args.compr = false;
-
-       ac_build_export(&ctx->ac, &args);
-}
-
 static void build_streamout_vertex(struct si_shader_context *ctx,
                                   LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
                                   unsigned stream, LLVMValueRef offset_vtx,
@@ -693,14 +647,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
                ac_build_endif(&ctx->ac, 5029);
        }
 
-       /* Export primitive data to the index buffer. Format is:
-        *  - bits 0..8: index 0
-        *  - bit 9: edge flag 0
-        *  - bits 10..18: index 1
-        *  - bit 19: edge flag 1
-        *  - bits 20..28: index 2
-        *  - bit 29: edge flag 2
-        *  - bit 31: null primitive (skip)
+       /* Build the primitive export.
         *
         * For the first version, we will always build up all three indices
         * independent of the primitive type. The additional garbage data
@@ -711,7 +658,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
         */
        ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
        {
-               struct ngg_prim prim = {};
+               struct ac_ngg_prim prim = {};
 
                if (gfx10_is_ngg_passthrough(ctx->shader)) {
                        prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
@@ -739,7 +686,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
                        }
                }
 
-               build_export_prim(ctx, &prim);
+               ac_build_export_prim(&ctx->ac, &prim);
        }
        ac_build_endif(&ctx->ac, 6001);
 
@@ -1213,7 +1160,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
        ac_build_ifcc(&ctx->ac, tmp, 5140);
        {
                LLVMValueRef flags;
-               struct ngg_prim prim = {};
+               struct ac_ngg_prim prim = {};
                prim.num_vertices = verts_per_prim;
 
                tmp = ngg_gs_vertex_ptr(ctx, tid);
@@ -1242,7 +1189,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
                                              si_unpack_param(ctx, ctx->vs_state_bits, 4, 2),
                                              ctx->i32_0, "");
 
-                       struct ngg_prim in = prim;
+                       struct ac_ngg_prim in = prim;
                        prim.index[0] = LLVMBuildSelect(builder, flatshade_first,
                                                        in.index[0],
                                                        LLVMBuildSelect(builder, is_odd,
@@ -1258,7 +1205,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
                                                        in.index[2], "");
                }
 
-               build_export_prim(ctx, &prim);
+               ac_build_export_prim(&ctx->ac, &prim);
        }
        ac_build_endif(&ctx->ac, 5140);