From: Samuel Pitoiset Date: Wed, 8 Jul 2020 15:48:49 +0000 (+0200) Subject: radv: clean up radv_compute_generate_pm4() X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=83f63ab2c2cc4568e5a927f3ee5bb7529b96c3ec;p=mesa.git radv: clean up radv_compute_generate_pm4() For consistency regarding how the graphics pipeline is built. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e3b37ea2f94..66f76598223 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -5016,59 +5016,71 @@ VkResult radv_CreateGraphicsPipelines( return result; } +static void +radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, + struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; + uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + struct radv_device *device = pipeline->device; + + radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2); + radeon_emit(cs, va >> 8); + radeon_emit(cs, S_00B834_DATA(va >> 40)); + + radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); + radeon_emit(cs, shader->config.rsrc1); + radeon_emit(cs, shader->config.rsrc2); + if (device->physical_device->rad_info.chip_class >= GFX10) { + radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); + } +} static void -radv_compute_generate_pm4(struct radv_pipeline *pipeline) +radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, + struct radv_pipeline *pipeline) { - struct radv_shader_variant *compute_shader; + struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; struct radv_device *device = pipeline->device; unsigned threads_per_threadgroup; unsigned threadgroups_per_cu = 1; unsigned waves_per_threadgroup; unsigned max_waves_per_sh = 0; - uint64_t va; - - pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16; - pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4); - - compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; - va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; - - radeon_set_sh_reg_seq(&pipeline->cs, R_00B830_COMPUTE_PGM_LO, 2); - radeon_emit(&pipeline->cs, va >> 8); - radeon_emit(&pipeline->cs, S_00B834_DATA(va >> 40)); - - radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2); - radeon_emit(&pipeline->cs, compute_shader->config.rsrc1); - radeon_emit(&pipeline->cs, compute_shader->config.rsrc2); - if (device->physical_device->rad_info.chip_class >= GFX10) { - radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3); - } /* Calculate best compute resource limits. */ - threads_per_threadgroup = compute_shader->info.cs.block_size[0] * - compute_shader->info.cs.block_size[1] * - compute_shader->info.cs.block_size[2]; + threads_per_threadgroup = shader->info.cs.block_size[0] * + shader->info.cs.block_size[1] * + shader->info.cs.block_size[2]; waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, - compute_shader->info.wave_size); + shader->info.wave_size); if (device->physical_device->rad_info.chip_class >= GFX10 && waves_per_threadgroup == 1) threadgroups_per_cu = 2; - radeon_set_sh_reg(&pipeline->cs, R_00B854_COMPUTE_RESOURCE_LIMITS, + radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, ac_get_compute_resource_limits(&device->physical_device->rad_info, waves_per_threadgroup, max_waves_per_sh, threadgroups_per_cu)); - radeon_set_sh_reg_seq(&pipeline->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(&pipeline->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0])); - radeon_emit(&pipeline->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1])); - radeon_emit(&pipeline->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2])); + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); +} + +static void +radv_compute_generate_pm4(struct radv_pipeline *pipeline) +{ + struct radv_device *device = pipeline->device; + struct radeon_cmdbuf *cs = &pipeline->cs; + + cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16; + cs->buf = malloc(cs->max_dw * 4); + + radv_pipeline_generate_hw_cs(cs, pipeline); + radv_pipeline_generate_compute_state(cs, pipeline); assert(pipeline->cs.cdw <= pipeline->cs.max_dw); }