radeonsi: add a workaround for clamping unaligned RGB 8 & 16-bit vertex loads
authorMarek Olšák <marek.olsak@amd.com>
Thu, 16 Feb 2017 00:17:48 +0000 (01:17 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 18 Feb 2017 00:22:08 +0000 (01:22 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
src/gallium/drivers/radeonsi/si_state.c

index 8b9fed9fb8c266e3d9221b32f410acc63dd36fa5..1829e3ec118be3bb952140a1582dc3ec096dc256 100644 (file)
@@ -368,14 +368,27 @@ static void declare_input_vs(
 
        fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
 
-       /* Do multiple loads for double formats. */
-       if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
+       /* Do multiple loads for special formats. */
+       switch (fix_fetch) {
+       case SI_FIX_FETCH_RGB_64_FLOAT:
                num_fetches = 3; /* 3 2-dword loads */
                fetch_stride = 8;
-       } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
+               break;
+       case SI_FIX_FETCH_RGBA_64_FLOAT:
                num_fetches = 2; /* 2 4-dword loads */
                fetch_stride = 16;
-       } else {
+               break;
+       case SI_FIX_FETCH_RGB_8:
+       case SI_FIX_FETCH_RGB_8_INT:
+               num_fetches = 3;
+               fetch_stride = 1;
+               break;
+       case SI_FIX_FETCH_RGB_16:
+       case SI_FIX_FETCH_RGB_16_INT:
+               num_fetches = 3;
+               fetch_stride = 2;
+               break;
+       default:
                num_fetches = 1;
                fetch_stride = 0;
        }
@@ -512,6 +525,23 @@ static void declare_input_vs(
                                                            chan % 2);
                }
                break;
+       case SI_FIX_FETCH_RGB_8:
+       case SI_FIX_FETCH_RGB_8_INT:
+       case SI_FIX_FETCH_RGB_16:
+       case SI_FIX_FETCH_RGB_16_INT:
+               for (chan = 0; chan < 3; chan++) {
+                       out[chan] = LLVMBuildExtractElement(gallivm->builder,
+                                                           input[chan],
+                                                           ctx->i32_0, "");
+               }
+               if (fix_fetch == SI_FIX_FETCH_RGB_8 ||
+                   fix_fetch == SI_FIX_FETCH_RGB_16) {
+                       out[3] = LLVMConstReal(ctx->f32, 1);
+               } else {
+                       out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1,
+                                                 ctx->f32, "");
+               }
+               break;
        }
 }
 
index 46161907a679b0d6d71626d4b82551688ce91475..da88df041ca1317a193bbfe7a81133e11b32692f 100644 (file)
@@ -250,6 +250,10 @@ enum {
        SI_FIX_FETCH_RG_64_FLOAT,
        SI_FIX_FETCH_RGB_64_FLOAT,
        SI_FIX_FETCH_RGBA_64_FLOAT,
+       SI_FIX_FETCH_RGB_8,     /* A = 1.0 */
+       SI_FIX_FETCH_RGB_8_INT, /* A = 1 */
+       SI_FIX_FETCH_RGB_16,
+       SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
index 26cc28d80795618129c5c69e81f5eaddbe7009d7..8fde6c25684f6c8045b1aff6cd39743f564ac90d 100644 (file)
@@ -149,6 +149,9 @@ struct si_shader_context {
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
 
+       LLVMValueRef i32_0;
+       LLVMValueRef i32_1;
+
        LLVMValueRef shared_memory;
 };
 
index c7445e037a3906e2cbc28899ab387778bf4e79d8..c7019c1b8b74920766e808c44a95b8e6bf84534a 100644 (file)
@@ -1366,6 +1366,9 @@ void si_llvm_context_init(struct si_shader_context *ctx,
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+       ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+       ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
 }
 
 void si_llvm_create_func(struct si_shader_context *ctx,
index d9b9f83876397cbd9798a8c30a095bde01388e6c..024de8b035edda0b61c1703552dfb39770eaa548 100644 (file)
@@ -1732,10 +1732,10 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
        case 8:
                switch (desc->nr_channels) {
                case 1:
+               case 3: /* 3 loads */
                        return V_008F0C_BUF_DATA_FORMAT_8;
                case 2:
                        return V_008F0C_BUF_DATA_FORMAT_8_8;
-               case 3:
                case 4:
                        return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
                }
@@ -1743,10 +1743,10 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
        case 16:
                switch (desc->nr_channels) {
                case 1:
+               case 3: /* 3 loads */
                        return V_008F0C_BUF_DATA_FORMAT_16;
                case 2:
                        return V_008F0C_BUF_DATA_FORMAT_16_16;
-               case 3:
                case 4:
                        return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
                }
@@ -3459,6 +3459,20 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                        default:
                                assert(0);
                        }
+               } else if (channel && desc->nr_channels == 3) {
+                       assert(desc->swizzle[0] == PIPE_SWIZZLE_X);
+
+                       if (channel->size == 8) {
+                               if (channel->pure_integer)
+                                       v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT;
+                               else
+                                       v->fix_fetch[i] = SI_FIX_FETCH_RGB_8;
+                       } else if (channel->size == 16) {
+                               if (channel->pure_integer)
+                                       v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT;
+                               else
+                                       v->fix_fetch[i] = SI_FIX_FETCH_RGB_16;
+                       }
                }
 
                v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |