radeonsi: don't enable scratch just for SGPR spills
authorMarek Olšák <marek.olsak@amd.com>
Tue, 7 Jun 2016 22:14:58 +0000 (00:14 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 8 Jun 2016 17:23:41 +0000 (19:23 +0200)
Diff from shader-db:
  Scratch: 3221504 -> 17408 (-99.46 %) bytes per wave

v2: add "break;"

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c

index 6e8eefb2d8638c6aebcfa177c5fe1aa5e18ea3eb..4eb151f5872205473dee82bd02857600d825e639 100644 (file)
@@ -5825,6 +5825,20 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
        unsigned i;
        const unsigned char *config =
                radeon_shader_binary_config_start(binary, symbol_offset);
+       bool really_needs_scratch = false;
+
+       /* LLVM adds SGPR spills to the scratch size.
+        * Find out if we really need the scratch buffer.
+        */
+       for (i = 0; i < binary->reloc_count; i++) {
+               const struct radeon_shader_reloc *reloc = &binary->relocs[i];
+
+               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
+                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                       really_needs_scratch = true;
+                       break;
+               }
+       }
 
        /* XXX: We may be able to emit some of these values directly rather than
         * extracting fields to be emitted later.
@@ -5859,8 +5873,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
                case R_0286E8_SPI_TMPRING_SIZE:
                case R_00B860_COMPUTE_TMPRING_SIZE:
                        /* WAVESIZE is in units of 256 dwords. */
-                       conf->scratch_bytes_per_wave =
-                               G_00B860_WAVESIZE(value) * 256 * 4 * 1;
+                       if (really_needs_scratch)
+                               conf->scratch_bytes_per_wave =
+                                       G_00B860_WAVESIZE(value) * 256 * 4;
                        break;
                default:
                        {