freedreno/a3xx: add support to emulate GL_CLAMP
authorRob Clark <robclark@freedesktop.org>
Mon, 29 Sep 2014 14:44:46 +0000 (10:44 -0400)
committerRob Clark <robclark@freedesktop.org>
Mon, 29 Sep 2014 22:30:43 +0000 (18:30 -0400)
Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_context.h
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_texture.c
src/gallium/drivers/freedreno/a3xx/fd3_texture.h
src/gallium/drivers/freedreno/ir3/ir3_compiler.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 7d7663a28bcaad3b13142cca0f157242bb01af1f..48bbb47e9ca08e5ae05fd733f651e57af4fc57ff 100644 (file)
@@ -76,6 +76,16 @@ struct fd3_context {
 
        struct u_upload_mgr *border_color_uploader;
        struct pipe_resource *border_color_buf;
+
+       /* bitmask of sampler which needs coords clamped for vertex
+        * shader:
+        */
+       unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+
+       /* bitmask of sampler which needs coords clamped for frag
+        * shader:
+        */
+       unsigned fsaturate_s, fsaturate_t, fsaturate_r;
 };
 
 static INLINE struct fd3_context *
index 15d2ce4a943d39a8dd3a12bdbeddea794a81f79a..f7a5fcafd9a6a81e12a409d626f66ea6a5f0f76a 100644 (file)
@@ -100,6 +100,7 @@ static void
 fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
 {
        unsigned dirty = ctx->dirty;
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
        struct ir3_shader_key key = {
                        /* do binning pass first: */
                        .binning_pass = true,
@@ -108,7 +109,14 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
                        // TODO set .half_precision based on render target format,
                        // ie. float16 and smaller use half, float32 use full..
                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                       .vsaturate_s = fd3_ctx->vsaturate_s,
+                       .vsaturate_t = fd3_ctx->vsaturate_t,
+                       .vsaturate_r = fd3_ctx->vsaturate_r,
+                       .fsaturate_s = fd3_ctx->fsaturate_s,
+                       .fsaturate_t = fd3_ctx->fsaturate_t,
+                       .fsaturate_r = fd3_ctx->fsaturate_r,
        };
+
        draw_impl(ctx, info, ctx->binning_ring,
                        dirty & ~(FD_DIRTY_BLEND), key);
        /* and now regular (non-binning) pass: */
index 918dcc491f2e3a880cb1a5f65308e86b9a4ad2ee..d70b39e21142865a25b3f77c3728005ecf40992e 100644 (file)
 #include "fd3_util.h"
 
 static enum a3xx_tex_clamp
-tex_clamp(unsigned wrap)
+tex_clamp(unsigned wrap, bool clamp_to_edge)
 {
-       /* hardware probably supports more, but we can't coax all the
-        * wrap/clamp modes out of the GLESv2 blob driver.
-        *
-        * TODO once we have basics working, go back and just try
-        * different values and see what happens
-        */
+       /* Hardware does not support _CLAMP, but we emulate it: */
+       if (wrap == PIPE_TEX_WRAP_CLAMP) {
+               wrap = (clamp_to_edge) ?
+                       PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+       }
+
        switch (wrap) {
        case PIPE_TEX_WRAP_REPEAT:
                return A3XX_TEX_REPEAT;
-       case PIPE_TEX_WRAP_CLAMP:
        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
                return A3XX_TEX_CLAMP_TO_EDGE;
        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
                return A3XX_TEX_CLAMP_TO_BORDER;
-       case PIPE_TEX_WRAP_MIRROR_CLAMP:
-       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-               /* these two we should emulate! */
        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
                /* only works for PoT.. need to emulate otherwise! */
                return A3XX_TEX_MIRROR_CLAMP;
        case PIPE_TEX_WRAP_MIRROR_REPEAT:
                return A3XX_TEX_MIRROR_REPEAT;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               /* these two we could perhaps emulate, but we currently
+                * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+                */
        default:
                DBG("invalid wrap: %u", wrap);
                return 0;
@@ -86,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
 {
        struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
        bool miplinear = false;
+       bool clamp_to_edge;
 
        if (!so)
                return NULL;
@@ -95,14 +97,29 @@ fd3_sampler_state_create(struct pipe_context *pctx,
 
        so->base = *cso;
 
+       /*
+        * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE;  for linear
+        * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+        * clamping the texture coordinates to [0.0, 1.0].
+        *
+        * The clamping will be taken care of in the shaders.  There are two
+        * filters here, but let the minification one has a say.
+        */
+       clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+       if (!clamp_to_edge) {
+               so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+       }
+
        so->texsamp0 =
                        COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
                        COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
                        A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
                        A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
-                       A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
-                       A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
-                       A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+                       A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
+                       A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
+                       A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
 
        if (cso->compare_mode)
                so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
@@ -124,7 +141,35 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
                unsigned shader, unsigned start,
                unsigned nr, void **hwcso)
 {
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       unsigned saturate_s = 0, saturate_t = 0, saturate_r = 0;
+       unsigned i;
+
+       for (i = 0; i < nr; i++) {
+               if (hwcso[i]) {
+                       struct fd3_sampler_stateobj *sampler =
+                                       fd3_sampler_stateobj(hwcso[i]);
+                       if (sampler->saturate_s)
+                               saturate_s |= (1 << i);
+                       if (sampler->saturate_t)
+                               saturate_t |= (1 << i);
+                       if (sampler->saturate_r)
+                               saturate_r |= (1 << i);
+               }
+       }
+
        fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+
+       if (shader == PIPE_SHADER_FRAGMENT) {
+               fd3_ctx->fsaturate_s = saturate_s;
+               fd3_ctx->fsaturate_t = saturate_t;
+               fd3_ctx->fsaturate_r = saturate_r;
+       } else if (shader == PIPE_SHADER_VERTEX) {
+               fd3_ctx->vsaturate_s = saturate_s;
+               fd3_ctx->vsaturate_t = saturate_t;
+               fd3_ctx->vsaturate_r = saturate_r;
+       }
 }
 
 static enum a3xx_tex_type
index a83f527366b1dca0bfc4b770b47da684d38a3dac..0434df37413e931e3d217812930c3be74bbae8d2 100644 (file)
@@ -40,6 +40,7 @@
 struct fd3_sampler_stateobj {
        struct pipe_sampler_state base;
        uint32_t texsamp0, texsamp1;
+       bool saturate_s, saturate_t, saturate_r;
 };
 
 static INLINE struct fd3_sampler_stateobj *
index c2d4942b66d132ea76e276be8bc85a7d5be30c47..07b57b8d049135e00a520f9527fd97be23bb4ad0 100644 (file)
@@ -135,7 +135,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 {
        unsigned ret;
        struct tgsi_shader_info *info = &ctx->info;
-       const struct fd_lowering_config lconfig = {
+       struct fd_lowering_config lconfig = {
                        .color_two_side = so->key.color_two_side,
                        .lower_DST  = true,
                        .lower_XPD  = true,
@@ -153,6 +153,20 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
                        .lower_DP2A = true,
        };
 
+       switch (so->type) {
+       case SHADER_FRAGMENT:
+       case SHADER_COMPUTE:
+               lconfig.saturate_s = so->key.fsaturate_s;
+               lconfig.saturate_t = so->key.fsaturate_t;
+               lconfig.saturate_r = so->key.fsaturate_r;
+               break;
+       case SHADER_VERTEX:
+               lconfig.saturate_s = so->key.vsaturate_s;
+               lconfig.saturate_t = so->key.vsaturate_t;
+               lconfig.saturate_r = so->key.vsaturate_r;
+               break;
+       }
+
        ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
        ctx->free_tokens = !!ctx->tokens;
        if (!ctx->tokens) {
index 1e1ca7ad813ecbdb7095a6af80b8529656248ff4..4267feb351fe76679e872abb279cfe92d9b8444a 100644 (file)
@@ -125,7 +125,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 {
        unsigned ret, base = 0;
        struct tgsi_shader_info *info = &ctx->info;
-       const struct fd_lowering_config lconfig = {
+       struct fd_lowering_config lconfig = {
                        .color_two_side = so->key.color_two_side,
                        .lower_DST  = true,
                        .lower_XPD  = true,
@@ -143,6 +143,20 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
                        .lower_DP2A = true,
        };
 
+       switch (so->type) {
+       case SHADER_FRAGMENT:
+       case SHADER_COMPUTE:
+               lconfig.saturate_s = so->key.fsaturate_s;
+               lconfig.saturate_t = so->key.fsaturate_t;
+               lconfig.saturate_r = so->key.fsaturate_r;
+               break;
+       case SHADER_VERTEX:
+               lconfig.saturate_s = so->key.vsaturate_s;
+               lconfig.saturate_t = so->key.vsaturate_t;
+               lconfig.saturate_r = so->key.vsaturate_r;
+               break;
+       }
+
        ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
        ctx->free_tokens = !!ctx->tokens;
        if (!ctx->tokens) {
index 6d45597886baadc4140669dad13f23cf71e78b8d..ed7c639c9301cfce22e716839129949c2bc52d44 100644 (file)
@@ -185,11 +185,17 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
         */
        if (shader->type == SHADER_FRAGMENT) {
                key.binning_pass = false;
+               key.vsaturate_s = 0;
+               key.vsaturate_t = 0;
+               key.vsaturate_r = 0;
        }
        if (shader->type == SHADER_VERTEX) {
                key.color_two_side = false;
                key.half_precision = false;
                key.alpha = false;
+               key.fsaturate_s = 0;
+               key.fsaturate_t = 0;
+               key.fsaturate_r = 0;
        }
 
        for (v = shader->variants; v; v = v->next)
index ea8616491768442c97790ea87d49b99acb4f2844..04a737ef19de5e80beb2162a4898f6d42d30fa48 100644 (file)
@@ -52,8 +52,23 @@ static inline uint16_t sem2idx(ir3_semantic sem)
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
+ *
+ * TODO since shader key is starting to get larger (than 32bit)
+ * we probably should pass it around by ptr rather than value more
+ * of the places.. but watch out in ir3_shader_variant() where the
+ * key gets normalized, we need to make a copy there.
  */
 struct ir3_shader_key {
+       /* bitmask of sampler which needs coords clamped for vertex
+        * shader:
+        */
+       unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+
+       /* bitmask of sampler which needs coords clamped for frag
+        * shader:
+        */
+       unsigned fsaturate_s, fsaturate_t, fsaturate_r;
+
        /*
         * Vertex shader variant parameters:
         */