radeonsi: fix UINT/SINT clamping for 10-bit formats on <= CIK
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 20 Feb 2017 11:07:21 +0000 (12:07 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 21 Feb 2017 09:45:13 +0000 (10:45 +0100)
The same PS epilog workaround as for 8-bit integer formats is required,
since the CB doesn't do clamping.

Fixes GL45-CTS.gtf32.GL3Tests.packed_pixels.packed_pixels*.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index e8dbf5d6d3c3a7876dcedf8c23c9b41e00e3b7f7..a977dc18eb8fcf15a7e408dd40678d390ff79bc7 100644 (file)
@@ -316,6 +316,7 @@ struct r600_surface {
        bool alphatest_bypass;
        bool export_16bpc;
        bool color_is_int8;
+       bool color_is_int10;
 
        /* Color registers. */
        unsigned cb_color_info;
index bee6881d096a640371d84ca074ce43212172346b..5b9a5a18cd67e9e9480f29d2ee45a935d4f2dc52 100644 (file)
@@ -169,7 +169,8 @@ struct si_framebuffer {
        unsigned                        spi_shader_col_format_alpha;
        unsigned                        spi_shader_col_format_blend;
        unsigned                        spi_shader_col_format_blend_alpha;
-       unsigned                        color_is_int8; /* bitmask */
+       unsigned                        color_is_int8;
+       unsigned                        color_is_int10;
        unsigned                        dirty_cbufs;
        bool                            dirty_zsbuf;
        bool                            any_dst_linear;
index a67ac821421f1caf273c7caf1eebf2ce91f3d330..4075eefe62333616749eaf20a3f1e0e333cf761f 100644 (file)
@@ -1735,7 +1735,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        LLVMValueRef val[4];
        unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
        unsigned chan;
-       bool is_int8;
+       bool is_int8, is_int10;
 
        /* Default is 0xf. Adjusted below depending on the format. */
        args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
@@ -1757,6 +1757,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                assert(cbuf >= 0 && cbuf < 8);
                spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
                is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
+               is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
        }
 
        args[4] = uint->zero; /* COMPR flag */
@@ -1856,13 +1857,17 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                break;
 
        case V_028714_SPI_SHADER_UINT16_ABGR: {
-               LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
-                                                       255 : 65535);
+               LLVMValueRef max_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? 255 : is_int10 ? 1023 : 65535);
+               LLVMValueRef max_alpha =
+                       !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 3);
+
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
-                                                             val[chan], max);
+                                       val[chan],
+                                       chan == 3 ? max_alpha : max_rgb);
                }
 
                args[4] = uint->one; /* COMPR flag */
@@ -1874,19 +1879,24 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        }
 
        case V_028714_SPI_SHADER_SINT16_ABGR: {
-               LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
-                                                       127 : 32767);
-               LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ?
-                                                       -128 : -32768);
+               LLVMValueRef max_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? 127 : is_int10 ? 511 : 32767);
+               LLVMValueRef min_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? -128 : is_int10 ? -512 : -32768);
+               LLVMValueRef max_alpha =
+                       !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 1);
+               LLVMValueRef min_alpha =
+                       !is_int10 ? min_rgb : lp_build_const_int32(gallivm, -2);
+
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
-                                                             TGSI_OPCODE_IMIN,
-                                                             val[chan], max);
+                                       TGSI_OPCODE_IMIN,
+                                       val[chan], chan == 3 ? max_alpha : max_rgb);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
-                                                             TGSI_OPCODE_IMAX,
-                                                             val[chan], min);
+                                       TGSI_OPCODE_IMAX,
+                                       val[chan], chan == 3 ? min_alpha : min_rgb);
                }
 
                args[4] = uint->one; /* COMPR flag */
@@ -6336,6 +6346,7 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
                fprintf(f, "  part.ps.prolog.bc_optimize_for_linear = %u\n", key->part.ps.prolog.bc_optimize_for_linear);
                fprintf(f, "  part.ps.epilog.spi_shader_col_format = 0x%x\n", key->part.ps.epilog.spi_shader_col_format);
                fprintf(f, "  part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8);
+               fprintf(f, "  part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10);
                fprintf(f, "  part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf);
                fprintf(f, "  part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func);
                fprintf(f, "  part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one);
index 0bb0f18f41aaeef838f6f32f03c68ca87d24a597..579f3ca2df67690ddb0d9d0e265b52d63d75c33c 100644 (file)
@@ -375,6 +375,7 @@ struct si_ps_prolog_bits {
 struct si_ps_epilog_bits {
        unsigned        spi_shader_col_format;
        unsigned        color_is_int8:8;
+       unsigned        color_is_int10:8;
        unsigned        last_cbuf:3;
        unsigned        alpha_func:3;
        unsigned        alpha_to_one:1;
index 81592a7e8587b2210b517be3de22baed294b3ade..6948a74fbdd990118ab483aef5ab247ad3af9470 100644 (file)
@@ -2137,11 +2137,15 @@ static void si_initialize_color_surface(struct si_context *sctx,
                blend_bypass = 1;
        }
 
-       if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
-           (format == V_028C70_COLOR_8 ||
-            format == V_028C70_COLOR_8_8 ||
-            format == V_028C70_COLOR_8_8_8_8))
-               surf->color_is_int8 = true;
+       if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
+               if (format == V_028C70_COLOR_8 ||
+                   format == V_028C70_COLOR_8_8 ||
+                   format == V_028C70_COLOR_8_8_8_8)
+                       surf->color_is_int8 = true;
+               else if (format == V_028C70_COLOR_10_10_10_2 ||
+                        format == V_028C70_COLOR_2_10_10_10)
+                       surf->color_is_int10 = true;
+       }
 
        color_info = S_028C70_FORMAT(format) |
                S_028C70_COMP_SWAP(swap) |
@@ -2405,6 +2409,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        sctx->framebuffer.spi_shader_col_format_blend = 0;
        sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
        sctx->framebuffer.color_is_int8 = 0;
+       sctx->framebuffer.color_is_int10 = 0;
 
        sctx->framebuffer.compressed_cb_mask = 0;
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
@@ -2434,6 +2439,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
                if (surf->color_is_int8)
                        sctx->framebuffer.color_is_int8 |= 1 << i;
+               if (surf->color_is_int10)
+                       sctx->framebuffer.color_is_int10 |= 1 << i;
 
                if (rtex->fmask.size) {
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
index 4a81b566dc140039d2573725d1f8ff27b15e4dc1..d6d4560404f336578f4890fb3b202fc3a93e0a92 100644 (file)
@@ -1051,13 +1051,16 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                 * to the range supported by the type if a channel has less
                 * than 16 bits and the export format is 16_ABGR.
                 */
-               if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+               if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) {
                        key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
+                       key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
+               }
 
                /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
                if (!key->part.ps.epilog.last_cbuf) {
                        key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
                        key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
+                       key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
                }
 
                if (rs) {