From 3da39f22975703ad3688bfb4b658d219147ce9a1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 1 Nov 2017 14:39:47 -0700 Subject: [PATCH] broadcom/vc5: Introduce a helper for pre-packing our V3DXX structs. This is so much more pleasant to write than the manual V3D33_whatever_pack() calls, and will be useful for when we start doing actual per-V3D compiles. --- src/gallium/drivers/vc5/vc5_cl.h | 13 ++ src/gallium/drivers/vc5/vc5_state.c | 307 +++++++++++++--------------- 2 files changed, 155 insertions(+), 165 deletions(-) diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h index 4c64f084d3d..f62376e296a 100644 --- a/src/gallium/drivers/vc5/vc5_cl.h +++ b/src/gallium/drivers/vc5/vc5_cl.h @@ -248,6 +248,19 @@ cl_get_emit_space(struct vc5_cl_out **cl, size_t size) cl_advance(&(cl)->next, sizeof(*packet)); \ } while (0) +#define v3dx_pack(packed, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \ + cl_packet_length(packet))); \ + _loop_terminate = NULL; \ + })) \ + /** * Helper function called by the XML-generated pack functions for filling in * an address field in shader records. diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c index db921d63f36..ed32919f8ae 100644 --- a/src/gallium/drivers/vc5/vc5_state.c +++ b/src/gallium/drivers/vc5/vc5_state.c @@ -253,63 +253,61 @@ vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, util_format_description(elem->src_format); uint32_t r_size = desc->channel[0].size; - struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = { - /* vec_size == 0 means 4 */ - .vec_size = desc->nr_channels & 3, - .signed_int_type = (desc->channel[0].type == - UTIL_FORMAT_TYPE_SIGNED), - - .normalized_int_type = desc->channel[0].normalized, - .read_as_int_uint = desc->channel[0].pure_integer, - .instance_divisor = elem->instance_divisor, - }; - - switch (desc->channel[0].type) { - case UTIL_FORMAT_TYPE_FLOAT: - if (r_size == 32) { - attr_unpacked.type = ATTRIBUTE_FLOAT; - } else { - assert(r_size == 16); - attr_unpacked.type = ATTRIBUTE_HALF_FLOAT; - } - break; + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - case UTIL_FORMAT_TYPE_SIGNED: - case UTIL_FORMAT_TYPE_UNSIGNED: - switch (r_size) { - case 32: - attr_unpacked.type = ATTRIBUTE_INT; - break; - case 16: - attr_unpacked.type = ATTRIBUTE_SHORT; - break; - case 10: - attr_unpacked.type = ATTRIBUTE_INT2_10_10_10; + v3dx_pack(&so->attrs[i * size], + GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* vec_size == 0 means 4 */ + attr.vec_size = desc->nr_channels & 3; + attr.signed_int_type = (desc->channel[0].type == + UTIL_FORMAT_TYPE_SIGNED); + + attr.normalized_int_type = desc->channel[0].normalized; + attr.read_as_int_uint = desc->channel[0].pure_integer; + attr.instance_divisor = elem->instance_divisor; + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr.type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr.type = ATTRIBUTE_HALF_FLOAT; + } break; - case 8: - attr_unpacked.type = ATTRIBUTE_BYTE; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr.type = ATTRIBUTE_INT; + break; + case 16: + attr.type = ATTRIBUTE_SHORT; + break; + case 10: + attr.type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr.type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr.type = ATTRIBUTE_BYTE; + abort(); + } break; + default: fprintf(stderr, "format %s unsupported\n", desc->name); - attr_unpacked.type = ATTRIBUTE_BYTE; abort(); } - break; - - default: - fprintf(stderr, - "format %s unsupported\n", - desc->name); - abort(); } - - const uint32_t size = - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(NULL, - (uint8_t *)&so->attrs[i * size], - &attr_unpacked); } /* Set up the default attribute values in case any of the vertex @@ -462,26 +460,17 @@ vc5_create_sampler_state(struct pipe_context *pctx, (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); - struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = { - .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest), - .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest), - .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest), - }; - V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, - (uint8_t *)&so->p0, - &p0_unpacked); - - struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { - cl_packet_header(TEXTURE_SHADER_STATE), + v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { + p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest); + p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest); + p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest); + } - .min_level_of_detail = MAX2(cso->min_lod, 0.0), - .depth_compare_function = cso->compare_func, - .fixed_bias = cso->lod_bias, - }; - STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == - cl_packet_length(TEXTURE_SHADER_STATE)); - cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, - &state_unpacked); + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + tex.min_level_of_detail = MAX2(cso->min_lod, 0.0); + tex.depth_compare_function = cso->compare_func; + tex.fixed_bias = cso->lod_bias; + } return so; } @@ -543,27 +532,19 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, pipe_reference(NULL, &prsc->reference); - struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { - }; + v3dx_pack(&so->p1, TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1, p1) { + p1.return_word_0_of_texture_data = true; + if (vc5_get_tex_return_size(cso->format) == 16) { + p1.return_word_1_of_texture_data = true; + } else { + int chans = vc5_get_tex_return_channels(cso->format); - unpacked.return_word_0_of_texture_data = true; - if (vc5_get_tex_return_size(cso->format) == 16) { - unpacked.return_word_1_of_texture_data = true; - } else { - int chans = vc5_get_tex_return_channels(cso->format); - - if (chans > 1) - unpacked.return_word_1_of_texture_data = true; - if (chans > 2) - unpacked.return_word_2_of_texture_data = true; - if (chans > 3) - unpacked.return_word_3_of_texture_data = true; + p1.return_word_1_of_texture_data = chans > 1; + p1.return_word_2_of_texture_data = chans > 2; + p1.return_word_3_of_texture_data = chans > 3; + } } - V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, - (uint8_t *)&so->p1, - &unpacked); - /* Compute the sampler view's swizzle up front. This will be plugged * into either the sampler (for 16-bit returns) or the shader's * texture key (for 32) @@ -583,94 +564,90 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; - struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { - cl_packet_header(TEXTURE_SHADER_STATE), - - .image_width = prsc->width0 * msaa_scale, - .image_height = prsc->height0 * msaa_scale, - .image_depth = prsc->depth0, + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + tex.image_width = prsc->width0 * msaa_scale; + tex.image_height = prsc->height0 * msaa_scale; + tex.image_depth = prsc->depth0; + + tex.srgb = util_format_is_srgb(cso->format); + + tex.base_level = cso->u.tex.first_level; + tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64; + + if (prsc->nr_samples > 1) { + /* Using texture views to reinterpret formats on our + * MSAA textures won't work, because we don't lay out + * the bits in memory as it's expected -- for example, + * RGBA8 and RGB10_A2 are compatible in the + * ARB_texture_view spec, but in HW we lay them out as + * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now + * to catch failures. + */ + assert(util_format_linear(cso->format) == + util_format_linear(prsc->format)); + uint32_t output_image_format = + vc5_get_rt_format(cso->format); + uint32_t internal_type; + uint32_t internal_bpp; + vc5_get_internal_type_bpp_for_output_format(output_image_format, + &internal_type, + &internal_bpp); + + switch (internal_type) { + case INTERNAL_TYPE_8: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8; + break; + case INTERNAL_TYPE_16F: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; + break; + default: + unreachable("Bad MSAA texture type"); + } - .srgb = util_format_is_srgb(cso->format), + /* sRGB was stored in the tile buffer as linear and + * would have been encoded to sRGB on resolved tile + * buffer store. Note that this means we would need + * shader code if we wanted to read an MSAA sRGB + * texture without sRGB decode. + */ + tex.srgb = false; + } else { + tex.texture_type = vc5_get_tex_format(cso->format); + } - .base_level = cso->u.tex.first_level, - .array_stride_64_byte_aligned = rsc->cube_map_stride / 64, - }; + /* Note: Contrary to the docs, the swizzle still applies even + * if the return size is 32. It's just that you probably want + * to swizzle in the shader, because you need the Y/Z/W + * channels to be defined. + */ + if (vc5_get_tex_return_size(cso->format) != 32) { + tex.swizzle_r = translate_swizzle(so->swizzle[0]); + tex.swizzle_g = translate_swizzle(so->swizzle[1]); + tex.swizzle_b = translate_swizzle(so->swizzle[2]); + tex.swizzle_a = translate_swizzle(so->swizzle[3]); + } else { + tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } - if (prsc->nr_samples > 1) { - /* Using texture views to reinterpret formats on our MSAA - * textures won't work, because we don't lay out the bits in - * memory as it's expected -- for example, RGBA8 and RGB10_A2 - * are compatible in the ARB_texture_view spec, but in HW we - * lay them out as 32bpp RGBA8 and 64bpp RGBA16F. Just assert - * for now to catch failures. + /* XXX: While we need to use this flag to enable tiled + * resource sharing (even a small shared buffer should be UIF, + * not UBLINEAR or raster), this is also at the moment + * patching up the fact that our resource layout's decisions + * about XOR don't quite match the HW's. */ - assert(util_format_linear(cso->format) == - util_format_linear(prsc->format)); - uint32_t output_image_format = vc5_get_rt_format(cso->format); - uint32_t internal_type; - uint32_t internal_bpp; - vc5_get_internal_type_bpp_for_output_format(output_image_format, - &internal_type, - &internal_bpp); - - switch (internal_type) { - case INTERNAL_TYPE_8: - state_unpacked.texture_type = TEXTURE_DATA_FORMAT_RGBA8; - break; - case INTERNAL_TYPE_16F: - state_unpacked.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; + switch (rsc->slices[0].tiling) { + case VC5_TILING_UIF_NO_XOR: + case VC5_TILING_UIF_XOR: + tex.level_0_is_strictly_uif = true; + tex.level_0_xor_enable = false; break; default: - unreachable("Bad MSAA texture type"); + break; } - - /* sRGB was stored in the tile buffer as linear and would have - * been encoded to sRGB on resolved tile buffer store. Note - * that this means we would need shader code if we wanted to - * read an MSAA sRGB texture without sRGB decode. - */ - state_unpacked.srgb = false; - } else { - state_unpacked.texture_type = vc5_get_tex_format(cso->format); - } - - /* Note: Contrary to the docs, the swizzle still applies even - * if the return size is 32. It's just that you probably want - * to swizzle in the shader, because you need the Y/Z/W - * channels to be defined. - */ - if (vc5_get_tex_return_size(cso->format) != 32) { - state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]); - state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]); - state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]); - state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]); - } else { - state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); - state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); - state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); - state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); - } - - /* XXX: While we need to use this flag to enable tiled - * resource sharing (even a small shared buffer should be UIF, - * not UBLINEAR or raster), this is also at the moment - * patching up the fact that our resource layout's decisions - * about XOR don't quite match the HW's. - */ - switch (rsc->slices[0].tiling) { - case VC5_TILING_UIF_NO_XOR: - case VC5_TILING_UIF_XOR: - state_unpacked.level_0_is_strictly_uif = true; - state_unpacked.level_0_xor_enable = false; - break; - default: - break; - } - - STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == - cl_packet_length(TEXTURE_SHADER_STATE)); - cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, - &state_unpacked); + }; return &so->base; } -- 2.30.2