From 3da39f22975703ad3688bfb4b658d219147ce9a1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 1 Nov 2017 14:39:47 -0700
Subject: [PATCH] broadcom/vc5: Introduce a helper for pre-packing our V3DXX
 structs.

This is so much more pleasant to write than the manual
V3D33_whatever_pack() calls, and will be useful for when we start doing
actual per-V3D compiles.
---
 src/gallium/drivers/vc5/vc5_cl.h    |  13 ++
 src/gallium/drivers/vc5/vc5_state.c | 307 +++++++++++++---------------
 2 files changed, 155 insertions(+), 165 deletions(-)

diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h
index 4c64f084d3d..f62376e296a 100644
--- a/src/gallium/drivers/vc5/vc5_cl.h
+++ b/src/gallium/drivers/vc5/vc5_cl.h
@@ -248,6 +248,19 @@ cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
         cl_advance(&(cl)->next, sizeof(*packet));                \
 } while (0)
 
+#define v3dx_pack(packed, packet, name)                          \
+        for (struct cl_packet_struct(packet) name = {            \
+                cl_packet_header(packet)                         \
+        },                                                       \
+        *_loop_terminate = &name;                                \
+        __builtin_expect(_loop_terminate != NULL, 1);            \
+        ({                                                       \
+                cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
+                VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
+                                                 cl_packet_length(packet))); \
+                _loop_terminate = NULL;                          \
+        }))                                                      \
+
 /**
  * Helper function called by the XML-generated pack functions for filling in
  * an address field in shader records.
diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c
index db921d63f36..ed32919f8ae 100644
--- a/src/gallium/drivers/vc5/vc5_state.c
+++ b/src/gallium/drivers/vc5/vc5_state.c
@@ -253,63 +253,61 @@ vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
                         util_format_description(elem->src_format);
                 uint32_t r_size = desc->channel[0].size;
 
-                struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = {
-                        /* vec_size == 0 means 4 */
-                        .vec_size = desc->nr_channels & 3,
-                        .signed_int_type = (desc->channel[0].type ==
-                                            UTIL_FORMAT_TYPE_SIGNED),
-
-                        .normalized_int_type = desc->channel[0].normalized,
-                        .read_as_int_uint = desc->channel[0].pure_integer,
-                        .instance_divisor = elem->instance_divisor,
-                };
-
-                switch (desc->channel[0].type) {
-                case UTIL_FORMAT_TYPE_FLOAT:
-                        if (r_size == 32) {
-                                attr_unpacked.type = ATTRIBUTE_FLOAT;
-                        } else {
-                                assert(r_size == 16);
-                                attr_unpacked.type = ATTRIBUTE_HALF_FLOAT;
-                        }
-                        break;
+                const uint32_t size =
+                        cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
 
-                case UTIL_FORMAT_TYPE_SIGNED:
-                case UTIL_FORMAT_TYPE_UNSIGNED:
-                        switch (r_size) {
-                        case 32:
-                                attr_unpacked.type = ATTRIBUTE_INT;
-                                break;
-                        case 16:
-                                attr_unpacked.type = ATTRIBUTE_SHORT;
-                                break;
-                        case 10:
-                                attr_unpacked.type = ATTRIBUTE_INT2_10_10_10;
+                v3dx_pack(&so->attrs[i * size],
+                          GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+                        /* vec_size == 0 means 4 */
+                        attr.vec_size = desc->nr_channels & 3;
+                        attr.signed_int_type = (desc->channel[0].type ==
+                                                UTIL_FORMAT_TYPE_SIGNED);
+
+                        attr.normalized_int_type = desc->channel[0].normalized;
+                        attr.read_as_int_uint = desc->channel[0].pure_integer;
+                        attr.instance_divisor = elem->instance_divisor;
+
+                        switch (desc->channel[0].type) {
+                        case UTIL_FORMAT_TYPE_FLOAT:
+                                if (r_size == 32) {
+                                        attr.type = ATTRIBUTE_FLOAT;
+                                } else {
+                                        assert(r_size == 16);
+                                        attr.type = ATTRIBUTE_HALF_FLOAT;
+                                }
                                 break;
-                        case 8:
-                                attr_unpacked.type = ATTRIBUTE_BYTE;
+
+                        case UTIL_FORMAT_TYPE_SIGNED:
+                        case UTIL_FORMAT_TYPE_UNSIGNED:
+                                switch (r_size) {
+                                case 32:
+                                        attr.type = ATTRIBUTE_INT;
+                                        break;
+                                case 16:
+                                        attr.type = ATTRIBUTE_SHORT;
+                                        break;
+                                case 10:
+                                        attr.type = ATTRIBUTE_INT2_10_10_10;
+                                        break;
+                                case 8:
+                                        attr.type = ATTRIBUTE_BYTE;
+                                        break;
+                                default:
+                                        fprintf(stderr,
+                                                "format %s unsupported\n",
+                                                desc->name);
+                                        attr.type = ATTRIBUTE_BYTE;
+                                        abort();
+                                }
                                 break;
+
                         default:
                                 fprintf(stderr,
                                         "format %s unsupported\n",
                                         desc->name);
-                                attr_unpacked.type = ATTRIBUTE_BYTE;
                                 abort();
                         }
-                        break;
-
-                default:
-                        fprintf(stderr,
-                                "format %s unsupported\n",
-                                desc->name);
-                        abort();
                 }
-
-                const uint32_t size =
-                        cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
-                V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(NULL,
-                                                            (uint8_t *)&so->attrs[i * size],
-                                                            &attr_unpacked);
         }
 
         /* Set up the default attribute values in case any of the vertex
@@ -462,26 +460,17 @@ vc5_create_sampler_state(struct pipe_context *pctx,
                 (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
                  cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
 
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
-                .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest),
-                .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest),
-                .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest),
-        };
-        V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&so->p0,
-                                                         &p0_unpacked);
-
-        struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
-                cl_packet_header(TEXTURE_SHADER_STATE),
+        v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
+                p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest);
+                p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest);
+                p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest);
+        }
 
-                .min_level_of_detail = MAX2(cso->min_lod, 0.0),
-                .depth_compare_function = cso->compare_func,
-                .fixed_bias = cso->lod_bias,
-        };
-        STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
-                      cl_packet_length(TEXTURE_SHADER_STATE));
-        cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
-                                             &state_unpacked);
+        v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+                tex.min_level_of_detail = MAX2(cso->min_lod, 0.0);
+                tex.depth_compare_function = cso->compare_func;
+                tex.fixed_bias = cso->lod_bias;
+        }
 
         return so;
 }
@@ -543,27 +532,19 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
 
         pipe_reference(NULL, &prsc->reference);
 
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
-        };
+        v3dx_pack(&so->p1, TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1, p1) {
+                p1.return_word_0_of_texture_data = true;
+                if (vc5_get_tex_return_size(cso->format) == 16) {
+                        p1.return_word_1_of_texture_data = true;
+                } else {
+                        int chans = vc5_get_tex_return_channels(cso->format);
 
-        unpacked.return_word_0_of_texture_data = true;
-        if (vc5_get_tex_return_size(cso->format) == 16) {
-                unpacked.return_word_1_of_texture_data = true;
-        } else {
-                int chans = vc5_get_tex_return_channels(cso->format);
-
-                if (chans > 1)
-                        unpacked.return_word_1_of_texture_data = true;
-                if (chans > 2)
-                        unpacked.return_word_2_of_texture_data = true;
-                if (chans > 3)
-                        unpacked.return_word_3_of_texture_data = true;
+                        p1.return_word_1_of_texture_data = chans > 1;
+                        p1.return_word_2_of_texture_data = chans > 2;
+                        p1.return_word_3_of_texture_data = chans > 3;
+                }
         }
 
-        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&so->p1,
-                                                         &unpacked);
-
         /* Compute the sampler view's swizzle up front. This will be plugged
          * into either the sampler (for 16-bit returns) or the shader's
          * texture key (for 32)
@@ -583,94 +564,90 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
 
         int msaa_scale = prsc->nr_samples > 1 ? 2 : 1;
 
-        struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
-                cl_packet_header(TEXTURE_SHADER_STATE),
-
-                .image_width = prsc->width0 * msaa_scale,
-                .image_height = prsc->height0 * msaa_scale,
-                .image_depth = prsc->depth0,
+        v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+                tex.image_width = prsc->width0 * msaa_scale;
+                tex.image_height = prsc->height0 * msaa_scale;
+                tex.image_depth = prsc->depth0;
+
+                tex.srgb = util_format_is_srgb(cso->format);
+
+                tex.base_level = cso->u.tex.first_level;
+                tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
+
+                if (prsc->nr_samples > 1) {
+                        /* Using texture views to reinterpret formats on our
+                         * MSAA textures won't work, because we don't lay out
+                         * the bits in memory as it's expected -- for example,
+                         * RGBA8 and RGB10_A2 are compatible in the
+                         * ARB_texture_view spec, but in HW we lay them out as
+                         * 32bpp RGBA8 and 64bpp RGBA16F.  Just assert for now
+                         * to catch failures.
+                         */
+                        assert(util_format_linear(cso->format) ==
+                               util_format_linear(prsc->format));
+                        uint32_t output_image_format =
+                                vc5_get_rt_format(cso->format);
+                        uint32_t internal_type;
+                        uint32_t internal_bpp;
+                        vc5_get_internal_type_bpp_for_output_format(output_image_format,
+                                                                    &internal_type,
+                                                                    &internal_bpp);
+
+                        switch (internal_type) {
+                        case INTERNAL_TYPE_8:
+                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
+                                break;
+                        case INTERNAL_TYPE_16F:
+                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
+                                break;
+                        default:
+                                unreachable("Bad MSAA texture type");
+                        }
 
-                .srgb = util_format_is_srgb(cso->format),
+                        /* sRGB was stored in the tile buffer as linear and
+                         * would have been encoded to sRGB on resolved tile
+                         * buffer store.  Note that this means we would need
+                         * shader code if we wanted to read an MSAA sRGB
+                         * texture without sRGB decode.
+                         */
+                        tex.srgb = false;
+                } else {
+                        tex.texture_type = vc5_get_tex_format(cso->format);
+                }
 
-                .base_level = cso->u.tex.first_level,
-                .array_stride_64_byte_aligned = rsc->cube_map_stride / 64,
-        };
+                /* Note: Contrary to the docs, the swizzle still applies even
+                 * if the return size is 32.  It's just that you probably want
+                 * to swizzle in the shader, because you need the Y/Z/W
+                 * channels to be defined.
+                 */
+                if (vc5_get_tex_return_size(cso->format) != 32) {
+                        tex.swizzle_r = translate_swizzle(so->swizzle[0]);
+                        tex.swizzle_g = translate_swizzle(so->swizzle[1]);
+                        tex.swizzle_b = translate_swizzle(so->swizzle[2]);
+                        tex.swizzle_a = translate_swizzle(so->swizzle[3]);
+                } else {
+                        tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+                        tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+                        tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+                        tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+                }
 
-        if (prsc->nr_samples > 1) {
-                /* Using texture views to reinterpret formats on our MSAA
-                 * textures won't work, because we don't lay out the bits in
-                 * memory as it's expected -- for example, RGBA8 and RGB10_A2
-                 * are compatible in the ARB_texture_view spec, but in HW we
-                 * lay them out as 32bpp RGBA8 and 64bpp RGBA16F.  Just assert
-                 * for now to catch failures.
+                /* XXX: While we need to use this flag to enable tiled
+                 * resource sharing (even a small shared buffer should be UIF,
+                 * not UBLINEAR or raster), this is also at the moment
+                 * patching up the fact that our resource layout's decisions
+                 * about XOR don't quite match the HW's.
                  */
-                assert(util_format_linear(cso->format) ==
-                       util_format_linear(prsc->format));
-                uint32_t output_image_format = vc5_get_rt_format(cso->format);
-                uint32_t internal_type;
-                uint32_t internal_bpp;
-                vc5_get_internal_type_bpp_for_output_format(output_image_format,
-                                                            &internal_type,
-                                                            &internal_bpp);
-
-                switch (internal_type) {
-                case INTERNAL_TYPE_8:
-                        state_unpacked.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
-                        break;
-                case INTERNAL_TYPE_16F:
-                        state_unpacked.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
+                switch (rsc->slices[0].tiling) {
+                case VC5_TILING_UIF_NO_XOR:
+                case VC5_TILING_UIF_XOR:
+                        tex.level_0_is_strictly_uif = true;
+                        tex.level_0_xor_enable = false;
                         break;
                 default:
-                        unreachable("Bad MSAA texture type");
+                        break;
                 }
-
-                /* sRGB was stored in the tile buffer as linear and would have
-                 * been encoded to sRGB on resolved tile buffer store.  Note
-                 * that this means we would need shader code if we wanted to
-                 * read an MSAA sRGB texture without sRGB decode.
-                 */
-                state_unpacked.srgb = false;
-        } else {
-                state_unpacked.texture_type = vc5_get_tex_format(cso->format);
-        }
-
-        /* Note: Contrary to the docs, the swizzle still applies even
-         * if the return size is 32.  It's just that you probably want
-         * to swizzle in the shader, because you need the Y/Z/W
-         * channels to be defined.
-         */
-        if (vc5_get_tex_return_size(cso->format) != 32) {
-                state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]);
-                state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]);
-                state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]);
-                state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]);
-        } else {
-                state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
-                state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
-                state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
-                state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
-        }
-
-        /* XXX: While we need to use this flag to enable tiled
-         * resource sharing (even a small shared buffer should be UIF,
-         * not UBLINEAR or raster), this is also at the moment
-         * patching up the fact that our resource layout's decisions
-         * about XOR don't quite match the HW's.
-         */
-        switch (rsc->slices[0].tiling) {
-        case VC5_TILING_UIF_NO_XOR:
-        case VC5_TILING_UIF_XOR:
-                state_unpacked.level_0_is_strictly_uif = true;
-                state_unpacked.level_0_xor_enable = false;
-                break;
-        default:
-                break;
-        }
-
-        STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
-                      cl_packet_length(TEXTURE_SHADER_STATE));
-        cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
-                                             &state_unpacked);
+        };
 
         return &so->base;
 }
-- 
2.30.2