From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 8 Jul 2020 15:48:49 +0000 (+0200)
Subject: radv: clean up radv_compute_generate_pm4()
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=83f63ab2c2cc4568e5a927f3ee5bb7529b96c3ec;p=mesa.git

radv: clean up radv_compute_generate_pm4()

For consistency regarding how the graphics pipeline is built.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5837>
---

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e3b37ea2f94..66f76598223 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -5016,59 +5016,71 @@ VkResult radv_CreateGraphicsPipelines(
 	return result;
 }
 
+static void
+radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs,
+			     struct radv_pipeline *pipeline)
+{
+	struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+	struct radv_device *device = pipeline->device;
+
+	radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
+	radeon_emit(cs, va >> 8);
+	radeon_emit(cs, S_00B834_DATA(va >> 40));
+
+	radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+	radeon_emit(cs, shader->config.rsrc1);
+	radeon_emit(cs, shader->config.rsrc2);
+	if (device->physical_device->rad_info.chip_class >= GFX10) {
+		radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
+	}
+}
 
 static void
-radv_compute_generate_pm4(struct radv_pipeline *pipeline)
+radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs,
+				     struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *compute_shader;
+	struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
 	struct radv_device *device = pipeline->device;
 	unsigned threads_per_threadgroup;
 	unsigned threadgroups_per_cu = 1;
 	unsigned waves_per_threadgroup;
 	unsigned max_waves_per_sh = 0;
-	uint64_t va;
-
-	pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
-	pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4);
-
-	compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-	va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
-
-	radeon_set_sh_reg_seq(&pipeline->cs, R_00B830_COMPUTE_PGM_LO, 2);
-	radeon_emit(&pipeline->cs, va >> 8);
-	radeon_emit(&pipeline->cs, S_00B834_DATA(va >> 40));
-
-	radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
-	radeon_emit(&pipeline->cs, compute_shader->config.rsrc1);
-	radeon_emit(&pipeline->cs, compute_shader->config.rsrc2);
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
-	}
 
 	/* Calculate best compute resource limits. */
-	threads_per_threadgroup = compute_shader->info.cs.block_size[0] *
-				  compute_shader->info.cs.block_size[1] *
-				  compute_shader->info.cs.block_size[2];
+	threads_per_threadgroup = shader->info.cs.block_size[0] *
+				  shader->info.cs.block_size[1] *
+				  shader->info.cs.block_size[2];
 	waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup,
-					     compute_shader->info.wave_size);
+					     shader->info.wave_size);
 
 	if (device->physical_device->rad_info.chip_class >= GFX10 &&
 	    waves_per_threadgroup == 1)
 		threadgroups_per_cu = 2;
 
-	radeon_set_sh_reg(&pipeline->cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+	radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
 			  ac_get_compute_resource_limits(&device->physical_device->rad_info,
 							 waves_per_threadgroup,
 							 max_waves_per_sh,
 							 threadgroups_per_cu));
 
-	radeon_set_sh_reg_seq(&pipeline->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
-	radeon_emit(&pipeline->cs,
-		    S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]));
-	radeon_emit(&pipeline->cs,
-		    S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]));
-	radeon_emit(&pipeline->cs,
-		    S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
+	radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
+	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
+	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
+}
+
+static void
+radv_compute_generate_pm4(struct radv_pipeline *pipeline)
+{
+	struct radv_device *device = pipeline->device;
+	struct radeon_cmdbuf *cs = &pipeline->cs;
+
+	cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
+	cs->buf = malloc(cs->max_dw * 4);
+
+	radv_pipeline_generate_hw_cs(cs, pipeline);
+	radv_pipeline_generate_compute_state(cs, pipeline);
 
 	assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
 }