radeonsi: add VS epilog
authorMarek Olšák <marek.olsak@amd.com>
Tue, 26 Jan 2016 23:33:07 +0000 (00:33 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 21 Feb 2016 20:08:57 +0000 (21:08 +0100)
It only exports the primitive ID.
Also used by TES when it's compiled as VS.

The VS input location of the primitive ID input is v2.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h

index f7bb954e742f4f79bfe040e41724edc42b5689cb..3956e9802e1aa9b02841ad84f5626505d6022624 100644 (file)
@@ -540,7 +540,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
        struct si_screen *sscreen = (struct si_screen *)pscreen;
        struct si_shader_part *parts[] = {
                sscreen->vs_prologs,
-               /* this will be filled with other shader parts */
+               sscreen->vs_epilogs,
        };
        unsigned i;
 
index f4bafc271ef190e5c418f81445c320adde1cd7b0..8d98779307cf98eadf047b330ec7faacfb34f48c 100644 (file)
@@ -90,6 +90,7 @@ struct si_screen {
 
        pipe_mutex                      shader_parts_mutex;
        struct si_shader_part           *vs_prologs;
+       struct si_shader_part           *vs_epilogs;
 };
 
 struct si_blend_color {
index 14f0f9e2ed88097676c653a4d60573edef55c7b3..ea1a658eddc910cf0d8857e5f5587ed5d58beacf 100644 (file)
@@ -129,6 +129,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
                               LLVMTargetMachineRef tm,
                               struct tgsi_shader_info *info);
 
+/* The VS location of the PrimitiveID input is the same in the epilog,
+ * so that the main shader part doesn't have to move it.
+ */
+#define VS_EPILOG_PRIMID_LOC 2
 
 #define PERSPECTIVE_BASE 0
 #define LINEAR_BASE 9
@@ -2230,16 +2234,26 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
                                              "");
        }
 
-       /* Export PrimitiveID when PS needs it. */
-       if (si_vs_exports_prim_id(ctx->shader)) {
-               outputs[i].name = TGSI_SEMANTIC_PRIMID;
-               outputs[i].sid = 0;
-               outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                              get_primitive_id(bld_base, 0));
-               outputs[i].values[1] = bld_base->base.undef;
-               outputs[i].values[2] = bld_base->base.undef;
-               outputs[i].values[3] = bld_base->base.undef;
-               i++;
+       if (ctx->is_monolithic) {
+               /* Export PrimitiveID when PS needs it. */
+               if (si_vs_exports_prim_id(ctx->shader)) {
+                       outputs[i].name = TGSI_SEMANTIC_PRIMID;
+                       outputs[i].sid = 0;
+                       outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+                                                      get_primitive_id(bld_base, 0));
+                       outputs[i].values[1] = bld_base->base.undef;
+                       outputs[i].values[2] = bld_base->base.undef;
+                       outputs[i].values[3] = bld_base->base.undef;
+                       i++;
+               }
+       } else {
+               /* Return the primitive ID from the LLVM function. */
+               ctx->return_value =
+                       LLVMBuildInsertValue(gallivm->builder,
+                                            ctx->return_value,
+                                            bitcast(bld_base, TGSI_TYPE_FLOAT,
+                                                    get_primitive_id(bld_base, 0)),
+                                            VS_EPILOG_PRIMID_LOC, "");
        }
 
        si_llvm_export_vs(bld_base, outputs, i);
@@ -3724,6 +3738,11 @@ static void create_function(struct si_shader_context *ctx)
 
                        for (i = 0; i < shader->selector->info.num_inputs; i++)
                                params[num_params++] = ctx->i32;
+
+                       /* PrimitiveID output. */
+                       if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
+                               for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+                                       returns[num_returns++] = ctx->f32;
                }
                break;
 
@@ -3758,6 +3777,11 @@ static void create_function(struct si_shader_context *ctx)
                params[ctx->param_tes_v = num_params++] = ctx->f32;
                params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
                params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
+
+               /* PrimitiveID output. */
+               if (!ctx->is_monolithic && !shader->key.tes.as_es)
+                       for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+                               returns[num_returns++] = ctx->f32;
                break;
 
        case TGSI_PROCESSOR_GEOMETRY:
@@ -4864,6 +4888,111 @@ static bool si_compile_vs_prolog(struct si_screen *sscreen,
        return status;
 }
 
+/**
+ * Compile the vertex shader epilog. This is also used by the tessellation
+ * evaluation shader compiled as VS.
+ *
+ * The input is PrimitiveID.
+ *
+ * If PrimitiveID is required by the pixel shader, export it.
+ * Otherwise, do nothing.
+ */
+static bool si_compile_vs_epilog(struct si_screen *sscreen,
+                                LLVMTargetMachineRef tm,
+                                struct pipe_debug_callback *debug,
+                                struct si_shader_part *out)
+{
+       union si_shader_part_key *key = &out->key;
+       struct si_shader_context ctx;
+       struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+       struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+       LLVMTypeRef params[5];
+       int num_params, i;
+       bool status = true;
+
+       si_init_shader_ctx(&ctx, sscreen, NULL, tm, NULL);
+       ctx.type = TGSI_PROCESSOR_VERTEX;
+
+       /* Declare input VGPRs. */
+       num_params = key->vs_epilog.states.export_prim_id ?
+                          (VS_EPILOG_PRIMID_LOC + 1) : 0;
+       assert(num_params <= ARRAY_SIZE(params));
+
+       for (i = 0; i < num_params; i++)
+               params[i] = ctx.f32;
+
+       /* Create the function. */
+       si_create_function(&ctx, NULL, 0, params, num_params,
+                          -1, -1);
+
+       /* Emit exports. */
+       if (key->vs_epilog.states.export_prim_id) {
+               struct lp_build_context *base = &bld_base->base;
+               struct lp_build_context *uint = &bld_base->uint_bld;
+               LLVMValueRef args[9];
+
+               args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
+               args[1] = uint->zero; /* whether the EXEC mask is valid */
+               args[2] = uint->zero; /* DONE bit */
+               args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM +
+                                              key->vs_epilog.prim_id_param_offset);
+               args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+               args[5] = LLVMGetParam(ctx.radeon_bld.main_fn,
+                                      VS_EPILOG_PRIMID_LOC); /* X */
+               args[6] = uint->undef; /* Y */
+               args[7] = uint->undef; /* Z */
+               args[8] = uint->undef; /* W */
+
+               lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+                                  LLVMVoidTypeInContext(base->gallivm->context),
+                                  args, 9, 0);
+       }
+
+       /* Compile. */
+       LLVMBuildRet(gallivm->builder, ctx.return_value);
+       radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+       if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+                           gallivm->module, debug, ctx.type,
+                           "Vertex Shader Epilog"))
+               status = false;
+
+       radeon_llvm_dispose(&ctx.radeon_bld);
+       return status;
+}
+
+/**
+ * Create & compile a vertex shader epilog. This a helper used by VS and TES.
+ */
+static bool si_get_vs_epilog(struct si_screen *sscreen,
+                            LLVMTargetMachineRef tm,
+                            struct si_shader *shader,
+                            struct pipe_debug_callback *debug,
+                            struct si_vs_epilog_bits *states)
+{
+       union si_shader_part_key epilog_key;
+
+       memset(&epilog_key, 0, sizeof(epilog_key));
+       epilog_key.vs_epilog.states = *states;
+
+       /* Set up the PrimitiveID output. */
+       if (shader->key.vs.epilog.export_prim_id) {
+               unsigned index = shader->selector->info.num_outputs;
+               unsigned offset = shader->nr_param_exports++;
+
+               epilog_key.vs_epilog.prim_id_param_offset = offset;
+               shader->vs_output_param_offset[index] = offset;
+       }
+
+       shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
+                                           &epilog_key, tm, debug,
+                                           si_compile_vs_epilog);
+       return shader->epilog != NULL;
+}
+
+/**
+ * Select and compile (or reuse) vertex shader parts (prolog & epilog).
+ */
 static bool si_shader_select_vs_parts(struct si_screen *sscreen,
                                      LLVMTargetMachineRef tm,
                                      struct si_shader *shader,
@@ -4889,6 +5018,12 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
                        return false;
        }
 
+       /* Get the epilog. */
+       if (!shader->key.vs.as_es && !shader->key.vs.as_ls &&
+           !si_get_vs_epilog(sscreen, tm, shader, debug,
+                             &shader->key.vs.epilog))
+               return false;
+
        /* Set the instanceID flag. */
        for (i = 0; i < info->num_inputs; i++)
                if (prolog_key.vs_prolog.states.instance_divisors[i])
@@ -4897,6 +5032,22 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
        return true;
 }
 
+/**
+ * Select and compile (or reuse) TES parts (epilog).
+ */
+static bool si_shader_select_tes_parts(struct si_screen *sscreen,
+                                      LLVMTargetMachineRef tm,
+                                      struct si_shader *shader,
+                                      struct pipe_debug_callback *debug)
+{
+       if (shader->key.tes.as_es)
+               return true;
+
+       /* TES compiled as VS. */
+       return si_get_vs_epilog(sscreen, tm, shader, debug,
+                               &shader->key.tes.epilog);
+}
+
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
@@ -4915,6 +5066,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                        if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
                                return -1;
                        break;
+               case PIPE_SHADER_TESS_EVAL:
+                       if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
+                               return -1;
+                       break;
                }
 
                /* Update SGPR and VGPR counts. */
index e3ba4c7ca04007f2a385b187cba0be1da996ebce..d7388a4ae918a4ee2a34fa8c337d6d9447e8770d 100644 (file)
@@ -274,6 +274,10 @@ union si_shader_part_key {
                unsigned        num_input_sgprs:5;
                unsigned        last_input:4;
        } vs_prolog;
+       struct {
+               struct si_vs_epilog_bits states;
+               unsigned        prim_id_param_offset:5;
+       } vs_epilog;
 };
 
 union si_shader_key {