- struct radeon_winsys_cs *cs = sctx->gfx_cs;
- struct si_shader_config inline_config = {0};
- struct si_shader_config *config;
- uint64_t shader_va;
-
- if (sctx->cs_shader_state.emitted_program == program &&
- sctx->cs_shader_state.offset == offset)
- return true;
-
- if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
- config = &shader->config;
- } else {
- unsigned lds_blocks;
-
- config = &inline_config;
- if (code_object) {
- code_object_to_config(code_object, config);
- } else {
- si_shader_binary_read_config(&shader->binary, config, offset);
- }
-
- lds_blocks = config->lds_size;
- /* XXX: We are over allocating LDS. For SI, the shader reports
- * LDS in blocks of 256 bytes, so if there are 4 bytes lds
- * allocated in the shader and 4 bytes allocated by the state
- * tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
- */
- if (sctx->chip_class <= SI) {
- lds_blocks += align(program->local_size, 256) >> 8;
- } else {
- lds_blocks += align(program->local_size, 512) >> 9;
- }
-
- /* TODO: use si_multiwave_lds_size_workaround */
- assert(lds_blocks <= 0xFF);
-
- config->rsrc2 &= C_00B84C_LDS_SIZE;
- config->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
- }
-
- if (!si_setup_compute_scratch_buffer(sctx, shader, config))
- return false;
-
- if (shader->scratch_bo) {
- COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u bytes; "
- "Total Scratch: %u bytes\n", sctx->scratch_waves,
- config->scratch_bytes_per_wave,
- config->scratch_bytes_per_wave *
- sctx->scratch_waves);
-
- radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- shader->scratch_bo, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SCRATCH_BUFFER);
- }
-
- /* Prefetch the compute shader to TC L2.
- *
- * We should also prefetch graphics shaders if a compute dispatch was
- * the last command, and the compute shader if a draw call was the last
- * command. However, that would add more complexity and we're likely
- * to get a shader state change in that case anyway.
- */
- if (sctx->chip_class >= CIK) {
- cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b,
- 0, program->shader.bo->b.b.width0);
- }
-
- shader_va = shader->bo->gpu_address + offset;
- if (program->use_code_object_v2) {
- /* Shader code is placed after the amd_kernel_code_t
- * struct. */
- shader_va += sizeof(amd_kernel_code_t);
- }
-
- radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->bo,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
-
- radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
- radeon_emit(cs, shader_va >> 8);
- radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
-
- radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
- radeon_emit(cs, config->rsrc1);
- radeon_emit(cs, config->rsrc2);
-
- COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
- "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
-
- radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(sctx->scratch_waves)
- | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
-
- sctx->cs_shader_state.emitted_program = program;
- sctx->cs_shader_state.offset = offset;
- sctx->cs_shader_state.uses_scratch =
- config->scratch_bytes_per_wave != 0;
-
- return true;
+ struct radeon_cmdbuf *cs = sctx->gfx_cs;
+ struct ac_shader_config inline_config = {0};
+ struct ac_shader_config *config;
+ uint64_t shader_va;
+
+ if (sctx->cs_shader_state.emitted_program == program && sctx->cs_shader_state.offset == offset)
+ return true;
+
+ if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
+ config = &shader->config;
+ } else {
+ unsigned lds_blocks;
+
+ config = &inline_config;
+ code_object_to_config(code_object, config);
+
+ lds_blocks = config->lds_size;
+ /* XXX: We are over allocating LDS. For GFX6, the shader reports
+ * LDS in blocks of 256 bytes, so if there are 4 bytes lds
+ * allocated in the shader and 4 bytes allocated by the state
+ * tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
+ */
+ if (sctx->chip_class <= GFX6) {
+ lds_blocks += align(program->local_size, 256) >> 8;
+ } else {
+ lds_blocks += align(program->local_size, 512) >> 9;
+ }
+
+ /* TODO: use si_multiwave_lds_size_workaround */
+ assert(lds_blocks <= 0xFF);
+
+ config->rsrc2 &= C_00B84C_LDS_SIZE;
+ config->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
+ }
+
+ if (!si_setup_compute_scratch_buffer(sctx, shader, config))
+ return false;
+
+ if (shader->scratch_bo) {
+ COMPUTE_DBG(sctx->screen,
+ "Waves: %u; Scratch per wave: %u bytes; "
+ "Total Scratch: %u bytes\n",
+ sctx->scratch_waves, config->scratch_bytes_per_wave,
+ config->scratch_bytes_per_wave * sctx->scratch_waves);
+
+ radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->scratch_bo, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SCRATCH_BUFFER);
+ }
+
+ /* Prefetch the compute shader to TC L2.
+ *
+ * We should also prefetch graphics shaders if a compute dispatch was
+ * the last command, and the compute shader if a draw call was the last
+ * command. However, that would add more complexity and we're likely
+ * to get a shader state change in that case anyway.
+ */
+ if (sctx->chip_class >= GFX7) {
+ cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0);
+ }
+
+ shader_va = shader->bo->gpu_address + offset;
+ if (program->ir_type == PIPE_SHADER_IR_NATIVE) {
+ /* Shader code is placed after the amd_kernel_code_t
+ * struct. */
+ shader_va += sizeof(amd_kernel_code_t);
+ }
+
+ radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->bo, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_BINARY);
+
+ radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
+ radeon_emit(cs, shader_va >> 8);
+ radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
+
+ radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+ radeon_emit(cs, config->rsrc1);
+ radeon_emit(cs, config->rsrc2);
+
+ COMPUTE_DBG(sctx->screen,
+ "COMPUTE_PGM_RSRC1: 0x%08x "
+ "COMPUTE_PGM_RSRC2: 0x%08x\n",
+ config->rsrc1, config->rsrc2);
+
+ sctx->max_seen_compute_scratch_bytes_per_wave =
+ MAX2(sctx->max_seen_compute_scratch_bytes_per_wave, config->scratch_bytes_per_wave);
+
+ radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
+ S_00B860_WAVES(sctx->scratch_waves) |
+ S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
+
+ sctx->cs_shader_state.emitted_program = program;
+ sctx->cs_shader_state.offset = offset;
+ sctx->cs_shader_state.uses_scratch = config->scratch_bytes_per_wave != 0;
+
+ return true;