From 77b05cc42df29472a7852b90575a19e8991815cd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 9 May 2018 16:38:33 +0200 Subject: [PATCH] radeonsi: use ac_shader_config MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák --- src/amd/common/ac_binary.c | 2 + src/gallium/drivers/radeonsi/si_compute.c | 14 +-- src/gallium/drivers/radeonsi/si_shader.c | 112 +++------------------- src/gallium/drivers/radeonsi/si_shader.h | 25 +---- 4 files changed, 27 insertions(+), 126 deletions(-) diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index ee8d3836177..c047666b656 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -225,12 +225,14 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); conf->float_mode = G_00B028_FLOAT_MODE(value); + conf->rsrc1 = value; break; case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); break; case R_00B84C_COMPUTE_PGM_RSRC2: conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); + conf->rsrc2 = value; break; case R_0286CC_SPI_PS_INPUT_ENA: conf->spi_ps_input_ena = value; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index bb75132a1a2..a08ed7d65f5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -66,7 +66,7 @@ static const amd_kernel_code_t *si_compute_get_code_object( } static void code_object_to_config(const amd_kernel_code_t *code_object, - struct si_shader_config *out_config) { + struct ac_shader_config *out_config) { uint32_t rsrc1 = code_object->compute_pgm_resource_registers; uint32_t rsrc2 = code_object->compute_pgm_resource_registers >> 32; @@ -249,8 +249,8 @@ static void *si_create_compute_state( return NULL; } } else { - si_shader_binary_read_config(&program->shader.binary, - &program->shader.config, 0); + ac_shader_binary_read_config(&program->shader.binary, + &program->shader.config, 0, false); } si_shader_dump(sctx->screen, &program->shader, &sctx->debug, PIPE_SHADER_COMPUTE, stderr, true); @@ -366,7 +366,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader, - struct si_shader_config *config) + struct ac_shader_config *config) { uint64_t scratch_bo_size, scratch_needed; scratch_bo_size = 0; @@ -409,8 +409,8 @@ static bool si_switch_compute_shader(struct si_context *sctx, unsigned offset) { struct radeon_cmdbuf *cs = sctx->gfx_cs; - struct si_shader_config inline_config = {0}; - struct si_shader_config *config; + struct ac_shader_config inline_config = {0}; + struct ac_shader_config *config; uint64_t shader_va; if (sctx->cs_shader_state.emitted_program == program && @@ -426,7 +426,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, if (code_object) { code_object_to_config(code_object, config); } else { - si_shader_binary_read_config(&shader->binary, config, offset); + ac_shader_binary_read_config(&shader->binary, config, offset, false); } lds_blocks = config->lds_size; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8c4f4e75653..d532d664307 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5045,90 +5045,6 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, ac_build_kill_if_false(&ctx->ac, bit); } -void si_shader_binary_read_config(struct ac_shader_binary *binary, - struct si_shader_config *conf, - unsigned symbol_offset) -{ - unsigned i; - const unsigned char *config = - ac_shader_binary_config_start(binary, symbol_offset); - bool really_needs_scratch = false; - - /* LLVM adds SGPR spills to the scratch size. - * Find out if we really need the scratch buffer. - */ - for (i = 0; i < binary->reloc_count; i++) { - const struct ac_shader_reloc *reloc = &binary->relocs[i]; - - if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || - !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { - really_needs_scratch = true; - break; - } - } - - /* XXX: We may be able to emit some of these values directly rather than - * extracting fields to be emitted later. - */ - - for (i = 0; i < binary->config_size_per_symbol; i+= 8) { - unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); - unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); - switch (reg) { - case R_00B028_SPI_SHADER_PGM_RSRC1_PS: - case R_00B128_SPI_SHADER_PGM_RSRC1_VS: - case R_00B228_SPI_SHADER_PGM_RSRC1_GS: - case R_00B428_SPI_SHADER_PGM_RSRC1_HS: - case R_00B848_COMPUTE_PGM_RSRC1: - conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); - conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); - conf->float_mode = G_00B028_FLOAT_MODE(value); - conf->rsrc1 = value; - break; - case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: - conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); - break; - case R_00B84C_COMPUTE_PGM_RSRC2: - conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); - conf->rsrc2 = value; - break; - case R_0286CC_SPI_PS_INPUT_ENA: - conf->spi_ps_input_ena = value; - break; - case R_0286D0_SPI_PS_INPUT_ADDR: - conf->spi_ps_input_addr = value; - break; - case R_0286E8_SPI_TMPRING_SIZE: - case R_00B860_COMPUTE_TMPRING_SIZE: - /* WAVESIZE is in units of 256 dwords. */ - if (really_needs_scratch) - conf->scratch_bytes_per_wave = - G_00B860_WAVESIZE(value) * 256 * 4; - break; - case 0x4: /* SPILLED_SGPRS */ - conf->spilled_sgprs = value; - break; - case 0x8: /* SPILLED_VGPRS */ - conf->spilled_vgprs = value; - break; - default: - { - static bool printed; - - if (!printed) { - fprintf(stderr, "Warning: LLVM emitted unknown " - "config register: 0x%x\n", reg); - printed = true; - } - } - break; - } - } - - if (!conf->spi_ps_input_addr) - conf->spi_ps_input_addr = conf->spi_ps_input_ena; -} - void si_shader_apply_scratch_relocs(struct si_shader *shader, uint64_t scratch_va) { @@ -5296,7 +5212,7 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary, static void si_calculate_max_simd_waves(struct si_shader *shader) { struct si_screen *sscreen = shader->selector->screen; - struct si_shader_config *conf = &shader->config; + struct ac_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector->info.num_inputs; unsigned lds_increment = sscreen->info.chip_class >= GFX7 ? 512 : 256; unsigned lds_per_wave = 0; @@ -5345,13 +5261,13 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); - conf->max_simd_waves = max_simd_waves; + shader->info.max_simd_waves = max_simd_waves; } void si_shader_dump_stats_for_shader_db(const struct si_shader *shader, struct pipe_debug_callback *debug) { - const struct si_shader_config *conf = &shader->config; + const struct ac_shader_config *conf = &shader->config; pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d " @@ -5360,8 +5276,8 @@ void si_shader_dump_stats_for_shader_db(const struct si_shader *shader, conf->num_sgprs, conf->num_vgprs, si_get_shader_binary_size(shader), conf->lds_size, conf->scratch_bytes_per_wave, - conf->max_simd_waves, conf->spilled_sgprs, - conf->spilled_vgprs, conf->private_mem_vgprs); + shader->info.max_simd_waves, conf->spilled_sgprs, + conf->spilled_vgprs, shader->info.private_mem_vgprs); } static void si_shader_dump_stats(struct si_screen *sscreen, @@ -5370,7 +5286,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen, FILE *file, bool check_debug_option) { - const struct si_shader_config *conf = &shader->config; + const struct ac_shader_config *conf = &shader->config; if (!check_debug_option || si_can_dump_shader(sscreen, processor)) { @@ -5394,10 +5310,10 @@ static void si_shader_dump_stats(struct si_screen *sscreen, "********************\n\n\n", conf->num_sgprs, conf->num_vgprs, conf->spilled_sgprs, conf->spilled_vgprs, - conf->private_mem_vgprs, + shader->info.private_mem_vgprs, si_get_shader_binary_size(shader), conf->lds_size, conf->scratch_bytes_per_wave, - conf->max_simd_waves); + shader->info.max_simd_waves); } } @@ -5484,7 +5400,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, static int si_compile_llvm(struct si_screen *sscreen, struct ac_shader_binary *binary, - struct si_shader_config *conf, + struct ac_shader_config *conf, struct ac_llvm_compiler *compiler, LLVMModuleRef mod, struct pipe_debug_callback *debug, @@ -5518,7 +5434,7 @@ static int si_compile_llvm(struct si_screen *sscreen, return r; } - si_shader_binary_read_config(binary, conf, 0); + ac_shader_binary_read_config(binary, conf, 0, false); /* Enable 64-bit and 16-bit denormals, because there is no performance * cost. @@ -6893,7 +6809,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if ((debug && debug->debug_message) || si_can_dump_shader(sscreen, ctx.type)) { - ctx.shader->config.private_mem_vgprs = + ctx.shader->info.private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_fn); } @@ -8066,9 +7982,9 @@ int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compile shader->config.spilled_vgprs = MAX2(shader->config.spilled_vgprs, shader->previous_stage->config.spilled_vgprs); - shader->config.private_mem_vgprs = - MAX2(shader->config.private_mem_vgprs, - shader->previous_stage->config.private_mem_vgprs); + shader->info.private_mem_vgprs = + MAX2(shader->info.private_mem_vgprs, + shader->previous_stage->info.private_mem_vgprs); shader->config.scratch_bytes_per_wave = MAX2(shader->config.scratch_bytes_per_wave, shader->previous_stage->config.scratch_bytes_per_wave); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index bc9299bda66..b63045f662d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -574,22 +574,6 @@ struct si_shader_key { /* Restore the pack alignment to default. */ #pragma pack(pop) -struct si_shader_config { - unsigned num_sgprs; - unsigned num_vgprs; - unsigned spilled_sgprs; - unsigned spilled_vgprs; - unsigned private_mem_vgprs; - unsigned lds_size; - unsigned max_simd_waves; - unsigned spi_ps_input_ena; - unsigned spi_ps_input_addr; - unsigned float_mode; - unsigned scratch_bytes_per_wave; - unsigned rsrc1; - unsigned rsrc2; -}; - /* GCN-specific shader info. */ struct si_shader_info { ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; @@ -600,6 +584,8 @@ struct si_shader_info { bool uses_instanceid; ubyte nr_pos_exports; ubyte nr_param_exports; + unsigned private_mem_vgprs; + unsigned max_simd_waves; }; struct si_shader { @@ -627,7 +613,7 @@ struct si_shader { /* The following data is all that's needed for binary shaders. */ struct ac_shader_binary binary; - struct si_shader_config config; + struct ac_shader_config config; struct si_shader_info info; /* Shader key + LLVM IR + disassembly + statistics. @@ -684,7 +670,7 @@ struct si_shader_part { struct si_shader_part *next; union si_shader_part_key key; struct ac_shader_binary binary; - struct si_shader_config config; + struct ac_shader_config config; }; /* si_shader.c */ @@ -714,9 +700,6 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen, unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_shader *shader, uint64_t scratch_va); -void si_shader_binary_read_config(struct ac_shader_binary *binary, - struct si_shader_config *conf, - unsigned symbol_offset); const char *si_get_shader_name(const struct si_shader *shader, unsigned processor); /* si_shader_nir.c */ -- 2.30.2