From 5ff651c0a7cabcd1b7c348f3a3509aa6c6e406b7 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 1 Jul 2019 02:19:13 +0200 Subject: [PATCH] radv: Move more stuff to variant create time. Due to them depending on the linker result. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 58 -------------------------- src/amd/vulkan/radv_shader.c | 68 +++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 61 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 9c11f605535..934d98bcc8f 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3886,70 +3886,12 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, fprintf(stderr, "compile failed\n"); } - if (options->dump_shader) - fprintf(stderr, "disasm:\n%s\n", binary.disasm_string); - ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill); LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMDisposeModule(llvm_module); LLVMContextDispose(ctx); - if (stage == MESA_SHADER_FRAGMENT) { - shader_info->num_input_vgprs = 0; - if (G_0286CC_PERSP_SAMPLE_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTER_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTROID_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_PULL_MODEL_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 3; - if (G_0286CC_LINEAR_SAMPLE_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTER_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTROID_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINE_STIPPLE_TEX_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_X_FLOAT_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Y_FLOAT_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Z_FLOAT_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_W_FLOAT_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_FRONT_FACE_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_ANCILLARY_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_SAMPLE_COVERAGE_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_FIXED_PT_ENA(config.spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - } - config.num_vgprs = MAX2(config.num_vgprs, shader_info->num_input_vgprs); - - /* +3 for scratch wave offset and VCC */ - config.num_sgprs = MAX2(config.num_sgprs, - shader_info->num_input_sgprs + 3); - - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - GFX6 & GFX7 would be very slow. - */ - config.float_mode |= V_00B028_FP_64_DENORMS; - size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0; size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0; size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size + diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c30c22fbf7d..3e2966b7856 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -478,8 +478,65 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, { bool scratch_enabled = config_in->scratch_bytes_per_wave > 0; unsigned vgpr_comp_cnt = 0; + unsigned num_input_vgprs = info->num_input_vgprs; + + if (stage == MESA_SHADER_FRAGMENT) { + num_input_vgprs = 0; + if (G_0286CC_PERSP_SAMPLE_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTER_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTROID_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_PERSP_PULL_MODEL_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 3; + if (G_0286CC_LINEAR_SAMPLE_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTER_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTROID_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 2; + if (G_0286CC_LINE_STIPPLE_TEX_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_POS_X_FLOAT_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_POS_Y_FLOAT_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_POS_Z_FLOAT_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_POS_W_FLOAT_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_FRONT_FACE_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_ANCILLARY_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_SAMPLE_COVERAGE_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + if (G_0286CC_POS_FIXED_PT_ENA(config_in->spi_ps_input_addr)) + num_input_vgprs += 1; + } + + unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs); + /* +3 for scratch wave offset and VCC */ + unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3); *config_out = *config_in; + config_out->num_vgprs = num_vgprs; + config_out->num_sgprs = num_sgprs; + + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - GFX6 & GFX7 would be very slow. + */ + config_out->float_mode |= V_00B028_FP_64_DENORMS; config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) | @@ -490,10 +547,10 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) | S_00B12C_SO_EN(!!info->info.so.num_outputs); - config_out->rsrc1 = S_00B848_VGPRS((config_in->num_vgprs - 1) / 4) | - S_00B848_SGPRS((config_in->num_sgprs - 1) / 8) | + config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / 4) | + S_00B848_SGPRS((num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | - S_00B848_FLOAT_MODE(config_in->float_mode); + S_00B848_FLOAT_MODE(config_out->float_mode); switch (stage) { case MESA_SHADER_TESS_EVAL: @@ -807,6 +864,11 @@ shader_variant_compile(struct radv_device *device, return NULL; } + if (options->dump_shader) { + fprintf(stderr, "disasm:\n%s\n", variant->disasm_string); + } + + if (device->keep_shader_info) { if (!gs_copy_shader && !module->nir) { variant->nir = *shaders; -- 2.30.2