v3d: Create separate sampler states for the various blend formats.
authorEric Anholt <eric@anholt.net>
Thu, 27 Dec 2018 04:56:49 +0000 (20:56 -0800)
committerEric Anholt <eric@anholt.net>
Sun, 27 Jan 2019 16:30:03 +0000 (08:30 -0800)
The sampler border color is encoded in the TMU's blending format (half
floats, 32-bit floats, or integers) and must be clamped to the format's
range unorm/snorm/int ranges by the driver.  Additionally, the TMU doesn't
know about how we're abusing the swizzle to support BGRA, A, and LA, so we
have to pre-swizzle the border color for those.

We don't really want to spend half a kb on sampler states in most cases,
so skip generating the variants when the border color is unused or is
0,0,0,0.

src/broadcom/cle/v3d_packet_v33.xml
src/gallium/drivers/v3d/v3d_context.h
src/gallium/drivers/v3d/v3d_uniforms.c
src/gallium/drivers/v3d/v3dx_state.c

index 4f605e7a4d068b70aea0e1a239c0e0c49c330714..754461dc06797a613c986dadb2556444a9432e2b 100644 (file)
   </struct>
 
   <struct name="Sampler State" min_ver="41">
-    <field name="Border color Alpha" size="32" start="160" type="uint"/>
-    <field name="Border color Blue" size="32" start="128" type="uint"/>
-    <field name="Border color Green" size="32" start="96" type="uint"/>
-    <field name="Border color Red" size="32" start="64" type="uint"/>
+    <field name="Border color word 3" size="32" start="160" type="uint"/>
+    <field name="Border color word 2" size="32" start="128" type="uint"/>
+    <field name="Border color word 1" size="32" start="96" type="uint"/>
+    <field name="Border color word 0" size="32" start="64" type="uint"/>
 
     <field name="Maximum Anisotropy" size="2" start="61" type="uint"/>
     <field name="Border Color Mode" size="3" start="58" type="Border Color Mode"/>
index 1b37681c70268855e4678f091b135e7b117beb13..f8f0e64ef9d2a753417c768aa77c29f0b62ce47c 100644 (file)
@@ -88,6 +88,35 @@ void v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo);
 
 #define VC5_MAX_FS_INPUTS 64
 
+enum v3d_sampler_state_variant {
+        V3D_SAMPLER_STATE_BORDER_0,
+        V3D_SAMPLER_STATE_F16,
+        V3D_SAMPLER_STATE_F16_UNORM,
+        V3D_SAMPLER_STATE_F16_SNORM,
+        V3D_SAMPLER_STATE_F16_BGRA,
+        V3D_SAMPLER_STATE_F16_BGRA_UNORM,
+        V3D_SAMPLER_STATE_F16_BGRA_SNORM,
+        V3D_SAMPLER_STATE_F16_A,
+        V3D_SAMPLER_STATE_F16_A_SNORM,
+        V3D_SAMPLER_STATE_F16_A_UNORM,
+        V3D_SAMPLER_STATE_F16_LA,
+        V3D_SAMPLER_STATE_F16_LA_UNORM,
+        V3D_SAMPLER_STATE_F16_LA_SNORM,
+        V3D_SAMPLER_STATE_32,
+        V3D_SAMPLER_STATE_32_UNORM,
+        V3D_SAMPLER_STATE_32_SNORM,
+        V3D_SAMPLER_STATE_32_A,
+        V3D_SAMPLER_STATE_32_A_UNORM,
+        V3D_SAMPLER_STATE_32_A_SNORM,
+        V3D_SAMPLER_STATE_1010102U,
+        V3D_SAMPLER_STATE_16U,
+        V3D_SAMPLER_STATE_16I,
+        V3D_SAMPLER_STATE_8I,
+        V3D_SAMPLER_STATE_8U,
+
+        V3D_SAMPLER_STATE_VARIANT_COUNT,
+};
+
 struct v3d_sampler_view {
         struct pipe_sampler_view base;
         uint32_t p0;
@@ -99,6 +128,8 @@ struct v3d_sampler_view {
         /* V3D 4.x: Texture state struct. */
         struct v3d_bo *bo;
 
+        enum v3d_sampler_state_variant sampler_variant;
+
         /* Actual texture to be read by this sampler view.  May be different
          * from base.texture in the case of having a shadow tiled copy of a
          * raster texture.
@@ -115,7 +146,9 @@ struct v3d_sampler_state {
         uint8_t texture_shader_state[32];
         /* V3D 4.x: Sampler state struct. */
         struct pipe_resource *sampler_state;
-        uint32_t sampler_state_offset;
+        uint32_t sampler_state_offset[V3D_SAMPLER_STATE_VARIANT_COUNT];
+
+        bool border_color_variants;
 };
 
 struct v3d_texture_stateobj {
index 1dd9aba9340c81c694dad82752382079946ab566..307ccaafa3dc24c602bcd39b91ffe23452538cc6 100644 (file)
@@ -213,10 +213,16 @@ write_tmu_p1(struct v3d_job *job,
         uint32_t unit = v3d_tmu_config_data_get_unit(data);
         struct pipe_sampler_state *psampler = texstate->samplers[unit];
         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
+        int variant = 0;
+
+        if (sampler->border_color_variants)
+                variant = sview->sampler_variant;
 
         cl_aligned_reloc(&job->indirect, uniforms,
                          v3d_resource(sampler->sampler_state)->bo,
-                         sampler->sampler_state_offset |
+                         sampler->sampler_state_offset[variant] |
                          v3d_tmu_config_data_get_value(data));
 }
 
index 598a7e870f8e1b446a99e81552fec3426de4a767..f326b5379ba1dca57a3e4d35dc3ec4ae3506e574 100644 (file)
@@ -524,32 +524,13 @@ translate_wrap(uint32_t pipe_wrap, bool using_nearest)
         }
 }
 
-
-static void *
-v3d_create_sampler_state(struct pipe_context *pctx,
-                         const struct pipe_sampler_state *cso)
-{
-        MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx);
-        struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state);
-
-        if (!so)
-                return NULL;
-
-        memcpy(so, cso, sizeof(*cso));
-
-        bool either_nearest =
-                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
-                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
 #if V3D_VERSION >= 40
-        void *map;
-        u_upload_alloc(v3d->state_uploader, 0,
-                       cl_packet_length(SAMPLER_STATE),
-                       32, /* XXX: 8 for unextended samplers. */
-                       &so->sampler_state_offset,
-                       &so->sampler_state,
-                       &map);
-
+static void
+v3d_upload_sampler_state_variant(void *map,
+                                 const struct pipe_sampler_state *cso,
+                                 enum v3d_sampler_state_variant variant,
+                                 bool either_nearest)
+{
         v3dx_pack(map, SAMPLER_STATE, sampler) {
                 sampler.wrap_i_border = false;
 
@@ -595,29 +576,190 @@ v3d_create_sampler_state(struct pipe_context *pctx,
                                 sampler.maximum_anisotropy = 1;
                 }
 
-                sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
-                /* XXX: The border color field is in the TMU blending format
-                 * (32, f16, or i16), and we need to customize it based on
-                 * that.
-                 *
-                 * XXX: for compat alpha formats, we need the alpha field to
-                 * be in the red channel.
-                 */
-                sampler.border_color_red =
-                        util_float_to_half(cso->border_color.f[0]);
-                sampler.border_color_green =
-                        util_float_to_half(cso->border_color.f[1]);
-                sampler.border_color_blue =
-                        util_float_to_half(cso->border_color.f[2]);
-                sampler.border_color_alpha =
-                        util_float_to_half(cso->border_color.f[3]);
+                if (variant == V3D_SAMPLER_STATE_BORDER_0) {
+                        sampler.border_color_mode = V3D_BORDER_COLOR_0000;
+                } else {
+                        sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
+
+                        union pipe_color_union border;
+
+                        /* First, reswizzle the border color for any
+                         * mismatching we're doing between the texture's
+                         * channel order in hardware (R) versus what it is at
+                         * the GL level (ALPHA)
+                         */
+                        switch (variant) {
+                        case V3D_SAMPLER_STATE_F16_BGRA:
+                        case V3D_SAMPLER_STATE_F16_BGRA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_SNORM:
+                                border.i[0] = cso->border_color.i[2];
+                                border.i[1] = cso->border_color.i[1];
+                                border.i[2] = cso->border_color.i[0];
+                                border.i[3] = cso->border_color.i[3];
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_A:
+                        case V3D_SAMPLER_STATE_F16_A_UNORM:
+                        case V3D_SAMPLER_STATE_F16_A_SNORM:
+                        case V3D_SAMPLER_STATE_32_A:
+                        case V3D_SAMPLER_STATE_32_A_UNORM:
+                        case V3D_SAMPLER_STATE_32_A_SNORM:
+                                border.i[0] = cso->border_color.i[3];
+                                border.i[1] = 0;
+                                border.i[2] = 0;
+                                border.i[3] = 0;
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_LA:
+                        case V3D_SAMPLER_STATE_F16_LA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_SNORM:
+                                border.i[0] = cso->border_color.i[0];
+                                border.i[1] = cso->border_color.i[3];
+                                border.i[2] = 0;
+                                border.i[3] = 0;
+                                break;
+
+                        default:
+                                border = cso->border_color;
+                        }
+
+                        /* Perform any clamping. */
+                        switch (variant) {
+                        case V3D_SAMPLER_STATE_F16_UNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_A_UNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_UNORM:
+                        case V3D_SAMPLER_STATE_32_UNORM:
+                        case V3D_SAMPLER_STATE_32_A_UNORM:
+                                for (int i = 0; i < 4; i++)
+                                        border.f[i] = CLAMP(border.f[i], 0, 1);
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_SNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_SNORM:
+                        case V3D_SAMPLER_STATE_F16_A_SNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_SNORM:
+                        case V3D_SAMPLER_STATE_32_SNORM:
+                        case V3D_SAMPLER_STATE_32_A_SNORM:
+                                for (int i = 0; i < 4; i++)
+                                        border.f[i] = CLAMP(border.f[i], -1, 1);
+                                break;
+
+                        case V3D_SAMPLER_STATE_1010102U:
+                                border.ui[0] = CLAMP(border.ui[0],
+                                                     0, (1 << 10) - 1);
+                                border.ui[1] = CLAMP(border.ui[1],
+                                                     0, (1 << 10) - 1);
+                                border.ui[2] = CLAMP(border.ui[2],
+                                                     0, (1 << 10) - 1);
+                                border.ui[3] = CLAMP(border.ui[3],
+                                                     0, 3);
+                                break;
+
+                        case V3D_SAMPLER_STATE_16U:
+                                for (int i = 0; i < 4; i++)
+                                        border.ui[i] = CLAMP(border.ui[i],
+                                                             0, 0xffff);
+                                break;
+
+                        case V3D_SAMPLER_STATE_16I:
+                                for (int i = 0; i < 4; i++)
+                                        border.i[i] = CLAMP(border.i[i],
+                                                            -32768, 32767);
+                                break;
+
+                        case V3D_SAMPLER_STATE_8U:
+                                for (int i = 0; i < 4; i++)
+                                        border.ui[i] = CLAMP(border.ui[i],
+                                                             0, 0xff);
+                                break;
+
+                        case V3D_SAMPLER_STATE_8I:
+                                for (int i = 0; i < 4; i++)
+                                        border.i[i] = CLAMP(border.i[i],
+                                                            -128, 127);
+                                break;
+
+                        default:
+                                break;
+                        }
+
+                        if (variant >= V3D_SAMPLER_STATE_32) {
+                                sampler.border_color_word_0 = border.ui[0];
+                                sampler.border_color_word_1 = border.ui[1];
+                                sampler.border_color_word_2 = border.ui[2];
+                                sampler.border_color_word_3 = border.ui[3];
+                        } else {
+                                sampler.border_color_word_0 =
+                                        util_float_to_half(border.f[0]);
+                                sampler.border_color_word_1 =
+                                        util_float_to_half(border.f[1]);
+                                sampler.border_color_word_2 =
+                                        util_float_to_half(border.f[2]);
+                                sampler.border_color_word_3 =
+                                        util_float_to_half(border.f[3]);
+                        }
+                }
+        }
+}
+#endif
+
+static void *
+v3d_create_sampler_state(struct pipe_context *pctx,
+                         const struct pipe_sampler_state *cso)
+{
+        MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so, cso, sizeof(*cso));
+
+        bool either_nearest =
+                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+        enum V3DX(Wrap_Mode) wrap_s = translate_wrap(cso->wrap_s,
+                                                     either_nearest);
+        enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t,
+                                                     either_nearest);
+        enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r,
+                                                     either_nearest);
+
+        bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER ||
+                                  wrap_t == V3D_WRAP_MODE_BORDER ||
+                                  wrap_r == V3D_WRAP_MODE_BORDER);
+        so->border_color_variants = (uses_border_color &&
+                                     (cso->border_color.ui[0] != 0 ||
+                                      cso->border_color.ui[1] != 0 ||
+                                      cso->border_color.ui[2] != 0 ||
+                                      cso->border_color.ui[3] != 0));
+
+#if V3D_VERSION >= 40
+        void *map;
+        int sampler_align = so->border_color_variants ? 32 : 8;
+        int sampler_size = align(cl_packet_length(SAMPLER_STATE), sampler_align);
+        int num_variants = (so->border_color_variants ? ARRAY_SIZE(so->sampler_state_offset) : 1);
+        u_upload_alloc(v3d->state_uploader, 0,
+                       sampler_size * num_variants,
+                       sampler_align,
+                       &so->sampler_state_offset[0],
+                       &so->sampler_state,
+                       &map);
+
+        for (int i = 0; i < num_variants; i++) {
+                so->sampler_state_offset[i] =
+                        so->sampler_state_offset[0] + i * sampler_size;
+                v3d_upload_sampler_state_variant(map + i * sampler_size,
+                                                 cso, i, either_nearest);
         }
 
 #else /* V3D_VERSION < 40 */
         v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
-                p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest);
-                p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest);
-                p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest);
+                p0.s_wrap_mode = wrap_s;
+                p0.t_wrap_mode = wrap_t;
+                p0.r_wrap_mode = wrap_r;
         }
 
         v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
@@ -787,6 +929,78 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                 prsc = &rsc->base;
         }
 
+        /* If we're sampling depth from depth/stencil, demote the format to
+         * just depth.  u_format will end up giving the answers for the
+         * stencil channel, otherwise.
+         */
+        enum pipe_format sample_format = cso->format;
+        if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM)
+                sample_format = PIPE_FORMAT_X8Z24_UNORM;
+
+#if V3D_VERSION >= 40
+        const struct util_format_description *desc =
+                util_format_description(sample_format);
+
+        if (util_format_is_pure_integer(sample_format) &&
+            !util_format_has_depth(desc)) {
+                int chan = util_format_get_first_non_void_channel(sample_format);
+                if (util_format_is_pure_uint(sample_format)) {
+                        switch (desc->channel[chan].size) {
+                        case 32:
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                                break;
+                        case 16:
+                                so->sampler_variant = V3D_SAMPLER_STATE_16U;
+                                break;
+                        case 10:
+                                so->sampler_variant = V3D_SAMPLER_STATE_1010102U;
+                                break;
+                        case 8:
+                                so->sampler_variant = V3D_SAMPLER_STATE_8U;
+                                break;
+                        }
+                } else {
+                        switch (desc->channel[chan].size) {
+                        case 32:
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                                break;
+                        case 16:
+                                so->sampler_variant = V3D_SAMPLER_STATE_16I;
+                                break;
+                        case 8:
+                                so->sampler_variant = V3D_SAMPLER_STATE_8I;
+                                break;
+                        }
+                }
+        } else {
+                if (v3d_get_tex_return_size(&screen->devinfo, sample_format,
+                                           PIPE_TEX_COMPARE_NONE) == 32) {
+                        if (util_format_is_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_32_A;
+                        else
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                } else {
+                        if (util_format_is_luminance_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_LA;
+                        else if (util_format_is_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_A;
+                        else if (fmt_swizzle[0] == PIPE_SWIZZLE_Z)
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_BGRA;
+                        else
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16;
+
+                }
+
+                if (util_format_is_unorm(sample_format)) {
+                        so->sampler_variant += (V3D_SAMPLER_STATE_F16_UNORM -
+                                                V3D_SAMPLER_STATE_F16);
+                } else if (util_format_is_snorm(sample_format)){
+                        so->sampler_variant += (V3D_SAMPLER_STATE_F16_SNORM -
+                                                V3D_SAMPLER_STATE_F16);
+                }
+        }
+#endif
+
         /* V3D still doesn't support sampling from raster textures, so we will
          * have to copy to a temporary tiled texture.
          */