From 3d4d388e3929d7948b62d90867357aecbfba5aeb Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 12 May 2018 23:50:04 +0200 Subject: [PATCH] radv: Fix up 2_10_10_10 alpha sign. Pre-Vega HW always interprets the alpha for this format as unsigned, so we have to implement a fixup to do the sign correctly for signed formats. v2: Improve indexing mess. CC: 18.0 18.1 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106480 Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 68 ++++++++++++++++++++++++++----- src/amd/vulkan/radv_pipeline.c | 30 ++++++++++++-- src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_shader.h | 12 ++++++ 4 files changed, 98 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f98940f0d8f..47c52dc437b 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1865,6 +1865,47 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, return ac_build_load_to_sgpr(&ctx->ac, list, index); } +/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ +static LLVMValueRef +adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, + unsigned adjustment, + LLVMValueRef alpha) +{ + if (adjustment == RADV_ALPHA_ADJUST_NONE) + return alpha; + + LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); + + if (adjustment == RADV_ALPHA_ADJUST_SSCALED) + alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, ""); + else + alpha = ac_to_integer(&ctx->ac, alpha); + + /* For the integer-like cases, do a natural sign extension. + * + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 + * and happen to contain 0, 1, 2, 3 as the two LSBs of the + * exponent. + */ + alpha = LLVMBuildShl(ctx->ac.builder, alpha, + adjustment == RADV_ALPHA_ADJUST_SNORM ? + LLVMConstInt(ctx->ac.i32, 7, 0) : c30, ""); + alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, ""); + + /* Convert back to the right type. */ + if (adjustment == RADV_ALPHA_ADJUST_SNORM) { + LLVMValueRef clamp; + LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0); + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, ""); + alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, ""); + } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) { + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + } + + return alpha; +} static void handle_vs_input_decl(struct radv_shader_context *ctx, @@ -1875,18 +1916,19 @@ handle_vs_input_decl(struct radv_shader_context *ctx, LLVMValueRef t_list; LLVMValueRef input; LLVMValueRef buffer_index; - int index = variable->data.location - VERT_ATTRIB_GENERIC0; - int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); uint8_t input_usage_mask = ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; unsigned num_channels = util_last_bit(input_usage_mask); - variable->data.driver_location = idx * 4; + variable->data.driver_location = variable->data.location * 4; + + for (unsigned i = 0; i < attrib_count; ++i) { + LLVMValueRef output[4]; + unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0; - for (unsigned i = 0; i < attrib_count; ++i, ++idx) { - if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { - uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[index + i]; + if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { + uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; if (divisor) { buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, @@ -1910,7 +1952,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, } else buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, ctx->abi.base_vertex, ""); - t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); + t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false); t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); @@ -1923,9 +1965,15 @@ handle_vs_input_decl(struct radv_shader_context *ctx, for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - ctx->inputs[ac_llvm_reg_index_soa(idx, chan)] = - ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, - input, llvm_chan, "")); + output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, ""); + } + + unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3; + output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]); + + for (unsigned chan = 0; chan < 4; chan++) { + ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = + ac_to_integer(&ctx->ac, output[chan]); } } } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 7a577dae413..52734a308a9 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1813,13 +1813,36 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, } for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) { - unsigned binding; - binding = input_state->pVertexAttributeDescriptions[i].binding; + unsigned location = input_state->pVertexAttributeDescriptions[i].location; + unsigned binding = input_state->pVertexAttributeDescriptions[i].binding; if (binding_input_rate & (1u << binding)) { - unsigned location = input_state->pVertexAttributeDescriptions[i].location; key.instance_rate_inputs |= 1u << location; key.instance_rate_divisors[location] = instance_rate_divisors[binding]; } + + if (pipeline->device->physical_device->rad_info.chip_class <= VI && + pipeline->device->physical_device->rad_info.family != CHIP_STONEY) { + VkFormat format = input_state->pVertexAttributeDescriptions[i].format; + uint64_t adjust; + switch(format) { + case VK_FORMAT_A2R10G10B10_SNORM_PACK32: + case VK_FORMAT_A2B10G10R10_SNORM_PACK32: + adjust = RADV_ALPHA_ADJUST_SNORM; + break; + case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: + case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: + adjust = RADV_ALPHA_ADJUST_SSCALED; + break; + case VK_FORMAT_A2R10G10B10_SINT_PACK32: + case VK_FORMAT_A2B10G10R10_SINT_PACK32: + adjust = RADV_ALPHA_ADJUST_SINT; + break; + default: + adjust = 0; + break; + } + key.vertex_alpha_adjust |= adjust << (2 * location); + } } if (pCreateInfo->pTessellationState) @@ -1848,6 +1871,7 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys, nir_shader **nir) { keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs; + keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust; for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i]; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4805acab280..e3eed887fae 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -352,6 +352,7 @@ struct radv_pipeline_cache { struct radv_pipeline_key { uint32_t instance_rate_inputs; uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + uint64_t vertex_alpha_adjust; unsigned tess_input_vertices; uint32_t col_format; uint32_t is_int8; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 12878307ecd..b711cba80cf 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -55,9 +55,21 @@ struct radv_shader_module { char data[0]; }; +enum { + RADV_ALPHA_ADJUST_NONE = 0, + RADV_ALPHA_ADJUST_SNORM = 1, + RADV_ALPHA_ADJUST_SINT = 2, + RADV_ALPHA_ADJUST_SSCALED = 3, +}; + struct radv_vs_variant_key { uint32_t instance_rate_inputs; uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + + /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ + uint64_t alpha_adjust; + uint32_t as_es:1; uint32_t as_ls:1; uint32_t export_prim_id:1; -- 2.30.2