i965: Fix Haswell CS per-thread scratch space encoding.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 9 Jun 2016 23:56:31 +0000 (16:56 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Sun, 12 Jun 2016 07:40:14 +0000 (00:40 -0700)
Most scratch stages use power of two sizes, in kilobytes, where
0 means 1kB.  But compute shaders on Haswell have a minimum of 2kB,
and use a representation where 0 = 2kB.

This meant that we were effectively telling the hardware to allocate
each thread twice as much space as we meant to, while simultaneously
not allocating that much space in the buffer, leading to overflows.

Note that the existing code is completely wrong for Ivybridge,
but that will take additional work to sort out, so I've left it
as is for now.  A subsequent commit will take care of that.

Together with the previous patches, this fixes rendering corruption
on Synmark's Gl43CSDof on Haswell.

Cc: "12.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/gen7_cs_state.c

index 4b29ee5288352dd1c87cf4fc4df25d0654b95e10..8c0ec4ed27fbed40cef2379e755c9f3ce85de227 100644 (file)
@@ -5985,8 +5985,18 @@ fs_visitor::allocate_registers(bool allow_spilling)
 
    schedule_instructions(SCHEDULE_POST);
 
-   if (last_scratch > 0)
+   if (last_scratch > 0) {
       prog_data->total_scratch = brw_get_scratch_size(last_scratch);
+
+      if (devinfo->is_haswell && stage == MESA_SHADER_COMPUTE) {
+         /* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space"
+          * field documentation, Haswell supports a minimum of 2kB of
+          * scratch space for compute shaders, unlike every other stage
+          * and platform.
+          */
+         prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048);
+      }
+   }
 }
 
 bool
index a71a5957191e7c45495232cba6764d2690f433b1..42cd61fefefe1cd2f33066cb438abb3969349dbb 100644 (file)
@@ -64,14 +64,26 @@ brw_upload_cs_state(struct brw_context *brw)
    OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
 
    if (prog_data->total_scratch) {
-      if (brw->gen >= 8)
+      if (brw->gen >= 8) {
+         /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
+          * where 0 = 1k, 1 = 4k, 2 = 8k, ..., 11 = 2M.
+          */
          OUT_RELOC64(stage_state->scratch_bo,
                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                      ffs(prog_data->total_scratch) - 11);
-      else
+      } else if (brw->is_haswell) {
+         /* Haswell's Per Thread Scratch Space is in the range [0, 10]
+          * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
+          */
+         OUT_RELOC(stage_state->scratch_bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   ffs(prog_data->total_scratch) - 12);
+      } else {
+         /* This is wrong but we'll fix it later */
          OUT_RELOC(stage_state->scratch_bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    ffs(prog_data->total_scratch) - 11);
+      }
    } else {
       OUT_BATCH(0);
       if (brw->gen >= 8)