radeonsi: Consider input SGPR count for compute shader SGPR count.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 19 Apr 2016 12:08:13 +0000 (14:08 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 19 Apr 2016 16:31:23 +0000 (18:31 +0200)
si_shader_create corrects the SGPR count with si_fix_num_sgprs. We then
recompute the rsrc1 register to use the new SGPR count.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_shader.c

index 557e89236fa62010ede4f7763630c0244e574751..905c169dd147ef09f8aa2b35909cd4f8d3c0316e 100644 (file)
@@ -81,14 +81,20 @@ static void *si_create_compute_state(
 
                program->shader.selector = &sel;
 
-               if (si_compile_tgsi_shader(sscreen, sctx->tm, &program->shader,
-                                          true, &sctx->b.debug)) {
+               if (si_shader_create(sscreen, sctx->tm, &program->shader,
+                                    &sctx->b.debug)) {
                        FREE(sel.tokens);
                        return NULL;
                }
 
                scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
 
+               shader->config.rsrc1 =
+                          S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
+                          S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
+                          S_00B848_DX10_CLAMP(1) |
+                          S_00B848_FLOAT_MODE(shader->config.float_mode);
+
                shader->config.rsrc2 = S_00B84C_USER_SGPR(SI_CS_NUM_USER_SGPR) |
                           S_00B84C_SCRATCH_EN(scratch_enabled) |
                           S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
@@ -105,10 +111,10 @@ static void *si_create_compute_state(
                radeon_elf_read(code, header->num_bytes, &program->shader.binary);
                si_shader_binary_read_config(&program->shader.binary,
                             &program->shader.config, 0);
+               si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
+                              PIPE_SHADER_COMPUTE, stderr);
+               si_shader_binary_upload(sctx->screen, &program->shader);
        }
-       si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
-                      TGSI_PROCESSOR_COMPUTE, stderr);
-       si_shader_binary_upload(sctx->screen, &program->shader);
 
        return program;
 }
index 605b964d3795ebdb9ba1a6fbfaec3d4b1bebdc8c..3bf68eb025d92aed239ab482647eea077bed32b3 100644 (file)
@@ -7022,7 +7022,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
             (shader->key.vs.as_es != mainp->key.vs.as_es ||
              shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
            (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
-            shader->key.tes.as_es != mainp->key.tes.as_es)) {
+            shader->key.tes.as_es != mainp->key.tes.as_es) ||
+           shader->selector->type == PIPE_SHADER_COMPUTE) {
                /* Monolithic shader (compiled as a whole, has many variants,
                 * may take a long time to compile).
                 */