From: Bas Nieuwenhuizen Date: Sun, 8 Apr 2018 08:15:21 +0000 (+0200) Subject: radv: Implement VK_EXT_vertex_attribute_divisor. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6ff98dbf7c1420edeae519b1596bb9d4cca4b065;p=mesa.git radv: Implement VK_EXT_vertex_attribute_divisor. Pretty straight forward, just pass the divisors through the shader key and then do a LLVM divide. Reviewed-by: Samuel Pitoiset --- diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d912fac8ec0..a4a0ea6dd5f 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1002,6 +1002,12 @@ void radv_GetPhysicalDeviceProperties2( properties->vgprAllocationGranularity = 4; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties = + (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; + properties->maxVertexAttribDivisor = UINT32_MAX; + break; + } default: break; } diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index a680f42dec7..db37d617f9e 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -93,6 +93,7 @@ EXTENSIONS = [ Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), + Extension('VK_EXT_vertex_attribute_divisor', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 2f0864da462..a6b48e297da 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1794,14 +1794,26 @@ handle_vs_input_decl(struct radv_shader_context *ctx, for (unsigned i = 0; i < attrib_count; ++i, ++idx) { if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, - ctx->abi.start_instance, ""); - if (ctx->options->key.vs.as_ls) { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[index + i]; + + if (divisor) { + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, + ctx->abi.start_instance, ""); + + if (divisor != 1) { + buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index, + LLVMConstInt(ctx->ac.i32, divisor, 0), ""); + } + + if (ctx->options->key.vs.as_ls) { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + } else { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + } } else { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + buffer_index = ctx->ac.i32_0; } } else buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6735b36846a..fce3201c198 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1743,22 +1743,38 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, { const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState; + const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = + vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); + struct radv_pipeline_key key; memset(&key, 0, sizeof(key)); key.has_multiview_view_index = has_view_index; uint32_t binding_input_rate = 0; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) { - if (input_state->pVertexBindingDescriptions[i].inputRate) - binding_input_rate |= 1u << input_state->pVertexBindingDescriptions[i].binding; + if (input_state->pVertexBindingDescriptions[i].inputRate) { + unsigned binding = input_state->pVertexBindingDescriptions[i].binding; + binding_input_rate |= 1u << binding; + instance_rate_divisors[binding] = 1; + } + } + if (divisor_state) { + for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) { + instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] = + divisor_state->pVertexBindingDivisors[i].divisor; + } } for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) { unsigned binding; binding = input_state->pVertexAttributeDescriptions[i].binding; - if (binding_input_rate & (1u << binding)) - key.instance_rate_inputs |= 1u << input_state->pVertexAttributeDescriptions[i].location; + if (binding_input_rate & (1u << binding)) { + unsigned location = input_state->pVertexAttributeDescriptions[i].location; + key.instance_rate_inputs |= 1u << location; + key.instance_rate_divisors[location] = instance_rate_divisors[binding]; + } } if (pCreateInfo->pTessellationState) @@ -1787,6 +1803,8 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys, nir_shader **nir) { keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs; + for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) + keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i]; if (nir[MESA_SHADER_TESS_CTRL]) { keys[MESA_SHADER_VERTEX].vs.as_ls = true; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1bcc3a906ec..fb94c50cb00 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -347,6 +347,7 @@ struct radv_pipeline_cache { struct radv_pipeline_key { uint32_t instance_rate_inputs; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; unsigned tess_input_vertices; uint32_t col_format; uint32_t is_int8; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index cbb7394eeae..6588b787724 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -57,6 +57,7 @@ struct radv_shader_module { struct radv_vs_variant_key { uint32_t instance_rate_inputs; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; uint32_t as_es:1; uint32_t as_ls:1; uint32_t export_prim_id:1;