radeonsi: fix vertex fetches for 2_10_10_10 formats
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 2 Nov 2016 18:07:40 +0000 (19:07 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 4 Nov 2016 20:30:18 +0000 (21:30 +0100)
The hardware always treats the alpha channel as unsigned, so add a shader
workaround. This is rare enough that we'll just build a monolithic vertex
shader.

The SINT case cannot actually happen in OpenGL, but I've included it for
completeness since it's just a mix of the other cases.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 28a8b1fe9e958d51e99040a8f209c5966d4ad317..b170eb97d485682625526bdf2f9af7c8dc75c3cd 100644 (file)
@@ -369,17 +369,16 @@ static LLVMValueRef get_instance_index_for_fetch(
 }
 
 static void declare_input_vs(
-       struct si_shader_context *radeon_bld,
+       struct si_shader_context *ctx,
        unsigned input_index,
        const struct tgsi_full_declaration *decl,
        LLVMValueRef out[4])
 {
-       struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
+       struct lp_build_context *base = &ctx->soa.bld_base.base;
        struct gallivm_state *gallivm = base->gallivm;
-       struct si_shader_context *ctx =
-               si_shader_context(&radeon_bld->soa.bld_base);
 
        unsigned chan;
+       unsigned fix_fetch;
 
        LLVMValueRef t_list_ptr;
        LLVMValueRef t_offset;
@@ -399,7 +398,7 @@ static void declare_input_vs(
        /* Build the attribute offset */
        attribute_offset = lp_build_const_int32(gallivm, 0);
 
-       buffer_index = LLVMGetParam(radeon_bld->main_fn,
+       buffer_index = LLVMGetParam(ctx->main_fn,
                                    ctx->param_vertex_index0 +
                                    input_index);
 
@@ -416,6 +415,45 @@ static void declare_input_vs(
                out[chan] = LLVMBuildExtractElement(gallivm->builder,
                                                    input, llvm_chan, "");
        }
+
+       fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3;
+       if (fix_fetch) {
+               /* The hardware returns an unsigned value; convert it to a
+                * signed one.
+                */
+               LLVMValueRef tmp = out[3];
+               LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
+
+               /* First, recover the sign-extended signed integer value. */
+               if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
+                       tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
+               else
+                       tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, "");
+
+               /* For the integer-like cases, do a natural sign extension.
+                *
+                * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+                * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+                * exponent.
+                */
+               tmp = LLVMBuildShl(gallivm->builder, tmp,
+                                  fix_fetch == SI_FIX_FETCH_A2_SNORM ?
+                                  LLVMConstInt(ctx->i32, 7, 0) : c30, "");
+               tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, "");
+
+               /* Convert back to the right type. */
+               if (fix_fetch == SI_FIX_FETCH_A2_SNORM) {
+                       LLVMValueRef clamp;
+                       LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
+                       tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
+                       clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
+                       tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
+               } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
+                       tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
+               }
+
+               out[3] = tmp;
+       }
 }
 
 static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
@@ -8102,11 +8140,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 
        /* LS, ES, VS are compiled on demand if the main part hasn't been
         * compiled for that stage.
+        *
+        * Vertex shaders are compiled on demand when a vertex fetch
+        * workaround must be applied.
         */
        if (!mainp ||
            (sel->type == PIPE_SHADER_VERTEX &&
             (shader->key.vs.as_es != mainp->key.vs.as_es ||
-             shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
+             shader->key.vs.as_ls != mainp->key.vs.as_ls ||
+             shader->key.vs.fix_fetch)) ||
            (sel->type == PIPE_SHADER_TESS_EVAL &&
             shader->key.tes.as_es != mainp->key.tes.as_es) ||
            (sel->type == PIPE_SHADER_TESS_CTRL &&
index d8ab2a41c9c815850206092fe8a15bbffcd7a0e6..59e7bfb045747f9e5b5855a281943c05c53ae84a 100644 (file)
@@ -233,6 +233,14 @@ enum {
        TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
 };
 
+/* For VS shader key fix_fetch. */
+enum {
+       SI_FIX_FETCH_NONE = 0,
+       SI_FIX_FETCH_A2_SNORM = 1,
+       SI_FIX_FETCH_A2_SSCALED = 2,
+       SI_FIX_FETCH_A2_SINT = 3,
+};
+
 struct si_shader;
 
 /* A shader selector is a gallium CSO and contains shader variants and
@@ -400,6 +408,9 @@ union si_shader_key {
                struct si_vs_epilog_bits epilog;
                unsigned        as_es:1; /* export shader */
                unsigned        as_ls:1; /* local shader */
+
+               /* One pair of bits for every input: SI_FIX_FETCH_* enums. */
+               uint32_t        fix_fetch;
        } vs;
        struct {
                struct si_tcs_epilog_bits epilog;
index 642ce79ab446c3733cb619da86b2651084fec159..24c7b100feadeb15ffc36d233c56f7906396149f 100644 (file)
@@ -3281,6 +3281,20 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                                   S_008F0C_NUM_FORMAT(num_format) |
                                   S_008F0C_DATA_FORMAT(data_format);
                v->format_size[i] = desc->block.bits / 8;
+
+               /* The hardware always treats the 2-bit alpha channel as
+                * unsigned, so a shader workaround is needed.
+                */
+               if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
+                       if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
+                               v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
+                       } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
+                               v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
+                       } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
+                               /* This isn't actually used in OpenGL. */
+                               v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
+                       }
+               }
        }
        memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
 
index 3ebf578e59313aed43763761fc87f6e75a6cbb12..c444a699ed65907ede43a3bfed914ac051eb1543 100644 (file)
@@ -99,6 +99,7 @@ struct si_stencil_ref {
 struct si_vertex_element
 {
        unsigned                        count;
+       uint32_t                        fix_fetch;
        uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
        uint32_t                        format_size[SI_MAX_ATTRIBS];
        struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
index 2a41bf1c20a2fb540c08ff053ea252501e87fc06..9e95fea33af5649d9e66afb769c11d1eea6ce8fe 100644 (file)
@@ -872,6 +872,10 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                        for (i = 0; i < count; ++i)
                                key->vs.prolog.instance_divisors[i] =
                                        sctx->vertex_elements->elements[i].instance_divisor;
+
+                       key->vs.fix_fetch =
+                               sctx->vertex_elements->fix_fetch &
+                               u_bit_consecutive(0, 2 * count);
                }
                if (sctx->tes_shader.cso)
                        key->vs.as_ls = 1;