From 726a31df705bba61b91152a84bd0abaea8418768 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 1 Jul 2019 01:29:24 +0200 Subject: [PATCH] radv: Add the concept of radv shader binaries. This simplifies a bunch of stuff by (1) Keeping all the things in a single allocation, making things easier for the cache. (2) creating a shader_variant creation helper. This is immediately put to use by creating rtld shader binaries. This is the main reason for the binaries, as we need to do the linking at upload time, i.e. post caching. We do not enable rtld yet. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 93 +++++--- src/amd/vulkan/radv_pipeline.c | 43 ++-- src/amd/vulkan/radv_pipeline_cache.c | 62 ++---- src/amd/vulkan/radv_private.h | 14 +- src/amd/vulkan/radv_shader.c | 314 +++++++++++++++++---------- src/amd/vulkan/radv_shader.h | 53 ++++- 6 files changed, 346 insertions(+), 233 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index a7e949a1c04..9c11f605535 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3863,32 +3863,33 @@ static unsigned ac_llvm_compile(LLVMModuleRef M, static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, gl_shader_stage stage, const struct radv_nir_compiler_options *options) { + struct ac_shader_binary binary; + struct ac_shader_config config; if (options->dump_shader) ac_dump_module(llvm_module); - memset(binary, 0, sizeof(*binary)); + memset(&binary, 0, sizeof(binary)); if (options->record_llvm_ir) { char *llvm_ir = LLVMPrintModuleToString(llvm_module); - binary->llvm_ir_string = strdup(llvm_ir); + binary.llvm_ir_string = strdup(llvm_ir); LLVMDisposeMessage(llvm_ir); } - int v = ac_llvm_compile(llvm_module, binary, ac_llvm); + int v = ac_llvm_compile(llvm_module, &binary, ac_llvm); if (v) { fprintf(stderr, "compile failed\n"); } if (options->dump_shader) - fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); + fprintf(stderr, "disasm:\n%s\n", binary.disasm_string); - ac_shader_binary_read_config(binary, config, 0, options->supports_spill); + ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill); LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMDisposeModule(llvm_module); @@ -3896,43 +3897,43 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, if (stage == MESA_SHADER_FRAGMENT) { shader_info->num_input_vgprs = 0; - if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) + if (G_0286CC_PERSP_SAMPLE_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) + if (G_0286CC_PERSP_CENTER_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) + if (G_0286CC_PERSP_CENTROID_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) + if (G_0286CC_PERSP_PULL_MODEL_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 3; - if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) + if (G_0286CC_LINEAR_SAMPLE_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) + if (G_0286CC_LINEAR_CENTER_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) + if (G_0286CC_LINEAR_CENTROID_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 2; - if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) + if (G_0286CC_LINE_STIPPLE_TEX_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) + if (G_0286CC_POS_X_FLOAT_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) + if (G_0286CC_POS_Y_FLOAT_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) + if (G_0286CC_POS_Z_FLOAT_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) + if (G_0286CC_POS_W_FLOAT_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) + if (G_0286CC_FRONT_FACE_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) + if (G_0286CC_ANCILLARY_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) + if (G_0286CC_SAMPLE_COVERAGE_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) + if (G_0286CC_POS_FIXED_PT_ENA(config.spi_ps_input_addr)) shader_info->num_input_vgprs += 1; } - config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); + config.num_vgprs = MAX2(config.num_vgprs, shader_info->num_input_vgprs); /* +3 for scratch wave offset and VCC */ - config->num_sgprs = MAX2(config->num_sgprs, + config.num_sgprs = MAX2(config.num_sgprs, shader_info->num_input_sgprs + 3); /* Enable 64-bit and 16-bit denormals, because there is no performance @@ -3947,7 +3948,35 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, * have to stop using those. * - GFX6 & GFX7 would be very slow. */ - config->float_mode |= V_00B028_FP_64_DENORMS; + config.float_mode |= V_00B028_FP_64_DENORMS; + + size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0; + size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0; + size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size + + disasm_size + llvm_ir_size + 2; + struct radv_shader_binary_legacy *lbin = calloc(1, alloc_size); + memcpy(lbin->data, binary.code, binary.code_size); + if (binary.llvm_ir_string) + memcpy(lbin->data + binary.code_size, binary.llvm_ir_string, llvm_ir_size + 1); + if (binary.disasm_string) + memcpy(lbin->data + binary.code_size + llvm_ir_size + 1, binary.disasm_string, disasm_size + 1); + + lbin->base.type = RADV_BINARY_TYPE_LEGACY; + lbin->base.stage = stage; + lbin->base.total_size = alloc_size; + lbin->config = config; + lbin->code_size = binary.code_size; + lbin->llvm_ir_size = llvm_ir_size; + lbin->disasm_size = disasm_size; + *rbinary = &lbin->base; + + free(binary.code); + free(binary.config); + free(binary.rodata); + free(binary.global_symbol_offsets); + free(binary.relocs); + free(binary.disasm_string); + free(binary.llvm_ir_string); } static void @@ -3990,8 +4019,7 @@ ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_sha void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, struct nir_shader *const *nir, int nir_count, @@ -4003,7 +4031,7 @@ radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, options); - ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info, + ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, shader_info, nir[nir_count - 1]->info.stage, options); for (int i = 0; i < nir_count; ++i) @@ -4115,8 +4143,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, const struct radv_nir_compiler_options *options) { @@ -4156,6 +4183,8 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); - ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info, + ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, shader_info, MESA_SHADER_VERTEX, options); + (*rbinary)->is_gs_copy_shader = true; + } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 18235c4fe27..09b04235e8b 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2131,8 +2131,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, struct radv_shader_module fs_m = {0}; struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; nir_shader *nir[MESA_SHADER_STAGES] = {0}; - void *codes[MESA_SHADER_STAGES] = {0}; - unsigned code_sizes[MESA_SHADER_STAGES] = {0}; + struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL}; struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}}; unsigned char hash[20], gs_copy_hash[20]; @@ -2229,9 +2228,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]); pipeline->shaders[MESA_SHADER_FRAGMENT] = - radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, + radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline->layout, keys + MESA_SHADER_FRAGMENT, - &codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]); + &binaries[MESA_SHADER_FRAGMENT]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false); } @@ -2259,10 +2258,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]); - pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, + pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->layout, - &key, &codes[MESA_SHADER_TESS_CTRL], - &code_sizes[MESA_SHADER_TESS_CTRL]); + &key, &binaries[MESA_SHADER_TESS_CTRL]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false); } @@ -2278,10 +2276,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]); - pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, + pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->layout, - &keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY], - &code_sizes[MESA_SHADER_GEOMETRY]); + &keys[pre_stage] , &binaries[MESA_SHADER_GEOMETRY]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false); } @@ -2300,48 +2297,42 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[i]); - pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1, + pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1, pipeline->layout, - keys + i, &codes[i], - &code_sizes[i]); + keys + i, &binaries[i]); radv_stop_feedback(stage_feedbacks[i], false); } } if(modules[MESA_SHADER_GEOMETRY]) { - void *gs_copy_code = NULL; - unsigned gs_copy_code_size = 0; + struct radv_shader_binary *gs_copy_binary = NULL; if (!pipeline->gs_copy_shader) { pipeline->gs_copy_shader = radv_create_gs_copy_shader( - device, nir[MESA_SHADER_GEOMETRY], &gs_copy_code, - &gs_copy_code_size, + device, nir[MESA_SHADER_GEOMETRY], &gs_copy_binary, keys[MESA_SHADER_GEOMETRY].has_multiview_view_index); } if (pipeline->gs_copy_shader) { - void *code[MESA_SHADER_STAGES] = {0}; - unsigned code_size[MESA_SHADER_STAGES] = {0}; + struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL}; struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; - code[MESA_SHADER_GEOMETRY] = gs_copy_code; - code_size[MESA_SHADER_GEOMETRY] = gs_copy_code_size; + binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary; variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, variants, - (const void**)code, - code_size); + binaries); } - free(gs_copy_code); + free(gs_copy_binary); } radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, - (const void**)codes, code_sizes); + binaries); for (int i = 0; i < MESA_SHADER_STAGES; ++i) { - free(codes[i]); + free(binaries[i]); if (nir[i]) { if (!pipeline->device->keep_shader_info) ralloc_free(nir[i]); diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index a506711898a..2b3fda6eb8e 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -31,17 +31,12 @@ #include "ac_nir_to_llvm.h" -struct cache_entry_variant_info { - struct radv_shader_variant_info variant_info; - struct ac_shader_config config; -}; - struct cache_entry { union { unsigned char sha1[20]; uint32_t sha1_dw[5]; }; - uint32_t code_sizes[MESA_SHADER_STAGES]; + uint32_t binary_sizes[MESA_SHADER_STAGES]; struct radv_shader_variant *variants[MESA_SHADER_STAGES]; char code[0]; }; @@ -92,8 +87,8 @@ entry_size(struct cache_entry *entry) { size_t ret = sizeof(*entry); for (int i = 0; i < MESA_SHADER_STAGES; ++i) - if (entry->code_sizes[i]) - ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; + if (entry->binary_sizes[i]) + ret += entry->binary_sizes[i]; return ret; } @@ -308,31 +303,14 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, char *p = entry->code; for(int i = 0; i < MESA_SHADER_STAGES; ++i) { - if (!entry->variants[i] && entry->code_sizes[i]) { - struct radv_shader_variant *variant; - struct cache_entry_variant_info info; - - variant = calloc(1, sizeof(struct radv_shader_variant)); - if (!variant) { - pthread_mutex_unlock(&cache->mutex); - return false; - } - - memcpy(&info, p, sizeof(struct cache_entry_variant_info)); - p += sizeof(struct cache_entry_variant_info); - - variant->config = info.config; - variant->info = info.variant_info; - variant->code_size = entry->code_sizes[i]; - variant->ref_count = 1; - - void *ptr = radv_alloc_shader_memory(device, variant); - memcpy(ptr, p, entry->code_sizes[i]); - p += entry->code_sizes[i]; - - entry->variants[i] = variant; - } else if (entry->code_sizes[i]) { - p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; + if (!entry->variants[i] && entry->binary_sizes[i]) { + struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]); + memcpy(binary, p, entry->binary_sizes[i]); + p += entry->binary_sizes[i]; + + entry->variants[i] = radv_shader_variant_create(device, binary); + } else if (entry->binary_sizes[i]) { + p += entry->binary_sizes[i]; } } @@ -351,8 +329,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_shader_variant **variants, - const void *const *codes, - const unsigned *code_sizes) + struct radv_shader_binary *const *binaries) { if (!cache) cache = device->mem_cache; @@ -385,7 +362,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, size_t size = sizeof(*entry); for (int i = 0; i < MESA_SHADER_STAGES; ++i) if (variants[i]) - size += sizeof(struct cache_entry_variant_info) + code_sizes[i]; + size += binaries[i]->total_size; entry = vk_alloc(&cache->alloc, size, 8, @@ -399,22 +376,15 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, memcpy(entry->sha1, sha1, 20); char* p = entry->code; - struct cache_entry_variant_info info; - memset(&info, 0, sizeof(info)); for (int i = 0; i < MESA_SHADER_STAGES; ++i) { if (!variants[i]) continue; - entry->code_sizes[i] = code_sizes[i]; - - info.config = variants[i]->config; - info.variant_info = variants[i]->info; - memcpy(p, &info, sizeof(struct cache_entry_variant_info)); - p += sizeof(struct cache_entry_variant_info); + entry->binary_sizes[i] = binaries[i]->total_size; - memcpy(p, codes[i], code_sizes[i]); - p += code_sizes[i]; + memcpy(p, binaries[i], binaries[i]->total_size); + p += binaries[i]->total_size; } /* Always add cache items to disk. This will allow collection of diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 0c842a4d1b7..7660087f15f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -397,6 +397,9 @@ struct radv_pipeline_key { uint32_t optimisations_disabled : 1; }; +struct radv_shader_binary; +struct radv_shader_variant; + void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device); @@ -406,8 +409,6 @@ bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size); -struct radv_shader_variant; - bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, struct radv_pipeline_cache *cache, @@ -420,8 +421,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_shader_variant **variants, - const void *const *codes, - const unsigned *code_sizes); + struct radv_shader_binary *const *binaries); enum radv_blit_ds_layout { RADV_BLIT_DS_LAYOUT_TILE_ENABLE, @@ -2107,14 +2107,12 @@ struct radv_nir_compiler_options; void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, const struct radv_nir_compiler_options *option); void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, struct nir_shader *const *nir, int nir_count, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 073ed39b5b8..c30c22fbf7d 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -43,6 +43,7 @@ #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_nir_to_llvm.h" +#include "ac_rtld.h" #include "vk_format.h" #include "util/debug.h" #include "ac_exp_param.h" @@ -464,79 +465,78 @@ radv_destroy_shader_slabs(struct radv_device *device) #define DEBUGGER_NUM_MARKERS 5 static unsigned -radv_get_shader_binary_size(struct ac_shader_binary *binary) +radv_get_shader_binary_size(size_t code_size) { - return binary->code_size + DEBUGGER_NUM_MARKERS * 4; + return code_size + DEBUGGER_NUM_MARKERS * 4; } -static void -radv_fill_shader_variant(struct radv_device *device, - struct radv_shader_variant *variant, - struct radv_nir_compiler_options *options, - struct ac_shader_binary *binary, - gl_shader_stage stage) +static void radv_postprocess_config(const struct radv_physical_device *pdevice, + const struct ac_shader_config *config_in, + const struct radv_shader_variant_info *info, + gl_shader_stage stage, + struct ac_shader_config *config_out) { - bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; - struct radv_shader_info *info = &variant->info.info; + bool scratch_enabled = config_in->scratch_bytes_per_wave > 0; unsigned vgpr_comp_cnt = 0; - variant->code_size = radv_get_shader_binary_size(binary); - variant->config.rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | - S_00B12C_USER_SGPR_MSB_GFX9(variant->info.num_user_sgprs >> 5) | - S_00B12C_SCRATCH_EN(scratch_enabled) | - S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | - S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | - S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | - S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | - S_00B12C_SO_EN(!!info->so.num_outputs); - - variant->config.rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | - S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) | - S_00B848_DX10_CLAMP(1) | - S_00B848_FLOAT_MODE(variant->config.float_mode); + *config_out = *config_in; + + config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | + S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) | + S_00B12C_SCRATCH_EN(scratch_enabled) | + S_00B12C_SO_BASE0_EN(!!info->info.so.strides[0]) | + S_00B12C_SO_BASE1_EN(!!info->info.so.strides[1]) | + S_00B12C_SO_BASE2_EN(!!info->info.so.strides[2]) | + S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) | + S_00B12C_SO_EN(!!info->info.so.num_outputs); + + config_out->rsrc1 = S_00B848_VGPRS((config_in->num_vgprs - 1) / 4) | + S_00B848_SGPRS((config_in->num_sgprs - 1) / 8) | + S_00B848_DX10_CLAMP(1) | + S_00B848_FLOAT_MODE(config_in->float_mode); switch (stage) { case MESA_SHADER_TESS_EVAL: - if (options->key.tes.as_es) { - assert(device->physical_device->rad_info.chip_class <= GFX8); - vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; + if (info->tes.as_es) { + assert(pdevice->rad_info.chip_class <= GFX8); + vgpr_comp_cnt = info->info.uses_prim_id ? 3 : 2; } else { - bool enable_prim_id = variant->info.tes.export_prim_id || info->uses_prim_id; + bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id; vgpr_comp_cnt = enable_prim_id ? 3 : 2; } - variant->config.rsrc2 |= S_00B12C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); break; case MESA_SHADER_TESS_CTRL: - if (device->physical_device->rad_info.chip_class >= GFX9) { + if (pdevice->rad_info.chip_class >= GFX9) { /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ - vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; + vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1; } else { - variant->config.rsrc2 |= S_00B12C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); } break; case MESA_SHADER_VERTEX: - if (variant->info.vs.as_ls) { - assert(device->physical_device->rad_info.chip_class <= GFX8); + if (info->vs.as_ls) { + assert(pdevice->rad_info.chip_class <= GFX8); /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ - vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; - } else if (variant->info.vs.as_es) { - assert(device->physical_device->rad_info.chip_class <= GFX8); + vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1; + } else if (info->vs.as_es) { + assert(pdevice->rad_info.chip_class <= GFX8); /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ - vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; + vgpr_comp_cnt = info->info.vs.needs_instance_id ? 1 : 0; } else { /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID) * If PrimID is disabled. InstanceID / StepRate1 is loaded instead. * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ - if (variant->info.vs.export_prim_id) { + if (info->vs.export_prim_id) { vgpr_comp_cnt = 2; - } else if (info->vs.needs_instance_id) { + } else if (info->info.vs.needs_instance_id) { vgpr_comp_cnt = 1; } else { vgpr_comp_cnt = 0; @@ -547,30 +547,30 @@ radv_fill_shader_variant(struct radv_device *device, case MESA_SHADER_GEOMETRY: break; case MESA_SHADER_COMPUTE: - variant->config.rsrc2 |= - S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | - S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | - S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | - S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 : - info->cs.uses_thread_id[1] ? 1 : 0) | - S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | - S_00B84C_LDS_SIZE(variant->config.lds_size); + config_out->rsrc2 |= + S_00B84C_TGID_X_EN(info->info.cs.uses_block_id[0]) | + S_00B84C_TGID_Y_EN(info->info.cs.uses_block_id[1]) | + S_00B84C_TGID_Z_EN(info->info.cs.uses_block_id[2]) | + S_00B84C_TIDIG_COMP_CNT(info->info.cs.uses_thread_id[2] ? 2 : + info->info.cs.uses_thread_id[1] ? 1 : 0) | + S_00B84C_TG_SIZE_EN(info->info.cs.uses_local_invocation_idx) | + S_00B84C_LDS_SIZE(config_in->lds_size); break; default: unreachable("unsupported shader type"); break; } - if (device->physical_device->rad_info.chip_class >= GFX9 && + if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_GEOMETRY) { - unsigned es_type = variant->info.gs.es_type; + unsigned es_type = info->gs.es_type; unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; if (es_type == MESA_SHADER_VERTEX) { /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ - es_vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; + es_vgpr_comp_cnt = info->info.vs.needs_instance_id ? 1 : 0; } else if (es_type == MESA_SHADER_TESS_EVAL) { - es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; + es_vgpr_comp_cnt = info->info.uses_prim_id ? 3 : 2; } else { unreachable("invalid shader ES type"); } @@ -578,34 +578,25 @@ radv_fill_shader_variant(struct radv_device *device, /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and * VGPR[0:4] are always loaded. */ - if (info->uses_invocation_id) { + if (info->info.uses_invocation_id) { gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */ - } else if (info->uses_prim_id) { + } else if (info->info.uses_prim_id) { gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */ - } else if (variant->info.gs.vertices_in >= 3) { + } else if (info->gs.vertices_in >= 3) { gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */ } else { gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ } - variant->config.rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt); - variant->config.rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | + config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt); + config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL); - } else if (device->physical_device->rad_info.chip_class >= GFX9 && + } else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_TESS_CTRL) { - variant->config.rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); + config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); } else { - variant->config.rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); + config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); } - - void *ptr = radv_alloc_shader_memory(device, variant); - memcpy(ptr, binary->code, binary->code_size); - - /* Add end-of-code markers for the UMR disassembler. */ - uint32_t *ptr32 = (uint32_t *)ptr + binary->code_size / 4; - for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++) - ptr32[i] = DEBUGGER_END_OF_CODE_MARKER; - } static void radv_init_llvm_target() @@ -647,26 +638,132 @@ static void radv_init_llvm_once(void) call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target); } +struct radv_shader_variant * +radv_shader_variant_create(struct radv_device *device, + const struct radv_shader_binary *binary) +{ + struct ac_shader_config config = {0}; + struct ac_rtld_binary rtld_binary = {0}; + struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant)); + if (!variant) + return NULL; + + variant->ref_count = 1; + + if (binary->type == RADV_BINARY_TYPE_RTLD) { + struct ac_rtld_symbol lds_symbols[1]; + unsigned num_lds_symbols = 0; + const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data; + size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size; + + if (device->physical_device->rad_info.chip_class >= GFX9 && + binary->stage == MESA_SHADER_GEOMETRY && !binary->is_gs_copy_shader) { + /* We add this symbol even on LLVM <= 8 to ensure that + * shader->config.lds_size is set correctly below. + */ + struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; + sym->name = "esgs_ring"; + sym->size = 32 * 1024; + sym->align = 64 * 1024; + } + struct ac_rtld_open_info open_info = { + .info = &device->physical_device->rad_info, + .shader_type = binary->stage, + .num_parts = 1, + .elf_ptrs = &elf_data, + .elf_sizes = &elf_size, + .num_shared_lds_symbols = num_lds_symbols, + .shared_lds_symbols = lds_symbols, + }; + + if (!ac_rtld_open(&rtld_binary, open_info)) { + free(variant); + return NULL; + } + + if (!ac_rtld_read_config(&rtld_binary, &config)) { + ac_rtld_close(&rtld_binary); + free(variant); + return NULL; + } + + if (rtld_binary.lds_size > 0) { + unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; + config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity; + } + + variant->code_size = rtld_binary.rx_size; + } else { + assert(binary->type == RADV_BINARY_TYPE_LEGACY); + config = ((struct radv_shader_binary_legacy *)binary)->config; + variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size); + } + + variant->info = binary->variant_info; + radv_postprocess_config(device->physical_device, &config, &binary->variant_info, + binary->stage, &variant->config); + + void *dest_ptr = radv_alloc_shader_memory(device, variant); + + if (binary->type == RADV_BINARY_TYPE_RTLD) { + struct radv_shader_binary_rtld* bin = (struct radv_shader_binary_rtld *)binary; + struct ac_rtld_upload_info info = { + .binary = &rtld_binary, + .rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset, + .rx_ptr = dest_ptr, + }; + + if (!ac_rtld_upload(&info)) { + radv_shader_variant_destroy(device, variant); + ac_rtld_close(&rtld_binary); + return NULL; + } + + const char *disasm_data; + size_t disasm_size; + if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) { + radv_shader_variant_destroy(device, variant); + ac_rtld_close(&rtld_binary); + return NULL; + } + + variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL; + variant->disasm_string = malloc(disasm_size + 1); + memcpy(variant->disasm_string, disasm_data, disasm_size); + variant->disasm_string[disasm_size] = 0; + + ac_rtld_close(&rtld_binary); + } else { + struct radv_shader_binary_legacy* bin = (struct radv_shader_binary_legacy *)binary; + memcpy(dest_ptr, bin->data, bin->code_size); + + /* Add end-of-code markers for the UMR disassembler. */ + uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4; + for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++) + ptr32[i] = DEBUGGER_END_OF_CODE_MARKER; + + variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->code_size)) : NULL; + variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->code_size + bin->llvm_ir_size)) : NULL; + } + return variant; +} + static struct radv_shader_variant * -shader_variant_create(struct radv_device *device, - struct radv_shader_module *module, - struct nir_shader * const *shaders, - int shader_count, - gl_shader_stage stage, - struct radv_nir_compiler_options *options, - bool gs_copy_shader, - void **code_out, - unsigned *code_size_out) +shader_variant_compile(struct radv_device *device, + struct radv_shader_module *module, + struct nir_shader * const *shaders, + int shader_count, + gl_shader_stage stage, + struct radv_nir_compiler_options *options, + bool gs_copy_shader, + struct radv_shader_binary **binary_out) { enum radeon_family chip_family = device->physical_device->rad_info.family; enum ac_target_machine_options tm_options = 0; - struct radv_shader_variant *variant; - struct ac_shader_binary binary; struct ac_llvm_compiler ac_llvm; + struct radv_shader_binary *binary = NULL; + struct radv_shader_variant_info variant_info = {0}; bool thread_compiler; - variant = calloc(1, sizeof(struct radv_shader_variant)); - if (!variant) - return NULL; options->family = chip_family; options->chip_class = device->physical_device->rad_info.chip_class; @@ -695,53 +792,45 @@ shader_variant_create(struct radv_device *device, if (gs_copy_shader) { assert(shader_count == 1); radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, - &variant->config, &variant->info, - options); + &variant_info, options); } else { - radv_compile_nir_shader(&ac_llvm, &binary, &variant->config, - &variant->info, shaders, shader_count, - options); + radv_compile_nir_shader(&ac_llvm, &binary, &variant_info, + shaders, shader_count, options); } + binary->variant_info = variant_info; radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); - radv_fill_shader_variant(device, variant, options, &binary, stage); - - if (code_out) { - *code_out = binary.code; - *code_size_out = binary.code_size; - } else - free(binary.code); - free(binary.config); - free(binary.rodata); - free(binary.global_symbol_offsets); - free(binary.relocs); - variant->ref_count = 1; + struct radv_shader_variant *variant = radv_shader_variant_create(device, binary); + if (!variant) { + free(binary); + return NULL; + } if (device->keep_shader_info) { - variant->disasm_string = binary.disasm_string; - variant->llvm_ir_string = binary.llvm_ir_string; if (!gs_copy_shader && !module->nir) { variant->nir = *shaders; variant->spirv = (uint32_t *)module->data; variant->spirv_size = module->size; } - } else { - free(binary.disasm_string); } + if (binary_out) + *binary_out = binary; + else + free(binary); + return variant; } struct radv_shader_variant * -radv_shader_variant_create(struct radv_device *device, +radv_shader_variant_compile(struct radv_device *device, struct radv_shader_module *module, struct nir_shader *const *shaders, int shader_count, struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key, - void **code_out, - unsigned *code_size_out) + struct radv_shader_binary **binary_out) { struct radv_nir_compiler_options options = {0}; @@ -752,23 +841,22 @@ radv_shader_variant_create(struct radv_device *device, options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH); options.supports_spill = true; - return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, - &options, false, code_out, code_size_out); + return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, + &options, false, binary_out); } struct radv_shader_variant * radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader, - void **code_out, - unsigned *code_size_out, + struct radv_shader_binary **binary_out, bool multiview) { struct radv_nir_compiler_options options = {0}; options.key.has_multiview_view_index = multiview; - return shader_variant_create(device, NULL, &shader, 1, MESA_SHADER_VERTEX, - &options, true, code_out, code_size_out); + return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX, + &options, true, binary_out); } void diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 66cd005e5fc..c785a79ad17 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -308,6 +308,41 @@ struct radv_shader_variant_info { }; }; +enum radv_shader_binary_type { + RADV_BINARY_TYPE_LEGACY, + RADV_BINARY_TYPE_RTLD +}; + +struct radv_shader_binary { + enum radv_shader_binary_type type; + gl_shader_stage stage; + bool is_gs_copy_shader; + + struct radv_shader_variant_info variant_info; + + /* Self-referential size so we avoid consistency issues. */ + uint32_t total_size; +}; + +struct radv_shader_binary_legacy { + struct radv_shader_binary base; + struct ac_shader_config config; + unsigned code_size; + unsigned llvm_ir_size; + unsigned disasm_size; + + /* data has size of code_size + llvm_ir_size + disasm_size + 2, where + * the +2 is for 0 of the ir strings. */ + uint8_t data[0]; +}; + +struct radv_shader_binary_rtld { + struct radv_shader_binary base; + unsigned elf_size; + unsigned llvm_ir_size; + uint8_t data[0]; +}; + struct radv_shader_variant { uint32_t ref_count; @@ -360,17 +395,19 @@ radv_destroy_shader_slabs(struct radv_device *device); struct radv_shader_variant * radv_shader_variant_create(struct radv_device *device, - struct radv_shader_module *module, - struct nir_shader *const *shaders, - int shader_count, - struct radv_pipeline_layout *layout, - const struct radv_shader_variant_key *key, - void **code_out, - unsigned *code_size_out); + const struct radv_shader_binary *binary); +struct radv_shader_variant * +radv_shader_variant_compile(struct radv_device *device, + struct radv_shader_module *module, + struct nir_shader *const *shaders, + int shader_count, + struct radv_pipeline_layout *layout, + const struct radv_shader_variant_key *key, + struct radv_shader_binary **binary_out); struct radv_shader_variant * radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, - void **code_out, unsigned *code_size_out, + struct radv_shader_binary **binary_out, bool multiview); void -- 2.30.2