i965: Fix CS scratch size calculations on Ivybridge and Baytrail.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 10 Jun 2016 00:30:40 +0000 (17:30 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Sun, 12 Jun 2016 07:40:14 +0000 (00:40 -0700)
These are linear, not powers of two, and much more limited.

Cc: "12.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/gen7_cs_state.c

index 8c0ec4ed27fbed40cef2379e755c9f3ce85de227..f1a1c87be5aef8368e54669cf048d1b5057ea205 100644 (file)
@@ -5995,6 +5995,12 @@ fs_visitor::allocate_registers(bool allow_spilling)
           * and platform.
           */
          prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048);
+      } else if (devinfo->gen <= 7 && stage == MESA_SHADER_COMPUTE) {
+         /* According to the MEDIAVFE_STATE's "Per Thread Scratch Space"
+          * field documentation, platforms prior to Haswell measure scratch
+          * size linearly with a range of [1kB, 12kB] and 1kB granularity.
+          */
+         prog_data->total_scratch = ALIGN(last_scratch, 1024);
       }
    }
 }
index 42cd61fefefe1cd2f33066cb438abb3969349dbb..9d83837812a3c6bed0ad863708fbf6d98bca2e7a 100644 (file)
@@ -79,10 +79,12 @@ brw_upload_cs_state(struct brw_context *brw)
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    ffs(prog_data->total_scratch) - 12);
       } else {
-         /* This is wrong but we'll fix it later */
+         /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
+          * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
+          */
          OUT_RELOC(stage_state->scratch_bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                   ffs(prog_data->total_scratch) - 11);
+                   prog_data->total_scratch / 1024 - 1);
       }
    } else {
       OUT_BATCH(0);