i965/fs: Organize prog_data by ksp number rather than SIMD width

author Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 28 Apr 2016 22:37:39 +0000 (15:37 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Sat, 14 May 2016 20:34:25 +0000 (13:34 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 28 Apr 2016 22:37:39 +0000 (15:37 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Sat, 14 May 2016 20:34:25 +0000 (13:34 -0700)
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c

index f55069ee74718aee9224b9422de083f4a7a74c4c..a8e31b13cf11931c3f66065e100e98358e145df3 100644 (file)
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -585,17 +585,17 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
     const struct brw_stage_prog_data *stage_prog_data;
     struct anv_pipeline_bind_map map;
     struct brw_wm_prog_key key;
-   uint32_t kernel = NO_KERNEL;
     unsigned char sha1[20];
  
     populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
  
     if (module->size > 0) {
        anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      pipeline->ps_ksp0 =
+         anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
     }
  
-   if (kernel == NO_KERNEL) {
+   if (pipeline->ps_ksp0 == NO_KERNEL) {
        struct brw_wm_prog_data prog_data = { 0, };
        struct anv_pipeline_binding surface_to_descriptor[256];
        struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -682,43 +682,16 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
        }
  
        stage_prog_data = &prog_data.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
+      pipeline->ps_ksp0 =
+         anv_pipeline_cache_upload_kernel(cache,
+                                          module->size > 0 ? sha1 : NULL,
+                                          shader_code, code_size,
                                                  &stage_prog_data, sizeof(prog_data),
                                                  &map);
  
        ralloc_free(mem_ctx);
     }
  
-   const struct brw_wm_prog_data *wm_prog_data =
-      (const struct brw_wm_prog_data *) stage_prog_data;
-
-   if (wm_prog_data->no_8)
-      pipeline->ps_simd8 = NO_KERNEL;
-   else
-      pipeline->ps_simd8 = kernel;
-
-   if (wm_prog_data->no_8 || wm_prog_data->prog_offset_16) {
-      pipeline->ps_simd16 = kernel + wm_prog_data->prog_offset_16;
-   } else {
-      pipeline->ps_simd16 = NO_KERNEL;
-   }
-
-   pipeline->ps_ksp2 = 0;
-   pipeline->ps_grf_start2 = 0;
-   if (pipeline->ps_simd8 != NO_KERNEL) {
-      pipeline->ps_ksp0 = pipeline->ps_simd8;
-      pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg;
-      if (pipeline->ps_simd16 != NO_KERNEL) {
-         pipeline->ps_ksp2 = pipeline->ps_simd16;
-         pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16;
-      }
-   } else if (pipeline->ps_simd16 != NO_KERNEL) {
-      pipeline->ps_ksp0 = pipeline->ps_simd16;
-      pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16;
-   }
-
     anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
                                     stage_prog_data, &map);
  
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index d8a21942d83eaa91170dc887c4f7fffd50c7f980..c55f1db51802cff8e7463c3dff48c37787f4866f 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1418,12 +1418,7 @@ struct anv_pipeline {
     struct anv_state                             blend_state;
     uint32_t                                     vs_simd8;
     uint32_t                                     vs_vec4;
-   uint32_t                                     ps_simd8;
-   uint32_t                                     ps_simd16;
     uint32_t                                     ps_ksp0;
-   uint32_t                                     ps_ksp2;
-   uint32_t                                     ps_grf_start0;
-   uint32_t                                     ps_grf_start2;
     uint32_t                                     gs_kernel;
     uint32_t                                     cs_simd;
  
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c

index d4797c599770eed8ca3093a569c81080e86ed7e8..285b191352ce7277d07571d74867c0c0833f98b1 100644 (file)
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -375,19 +375,21 @@ genX(graphics_pipeline_create)(
                                              POSOFFSET_SAMPLE : POSOFFSET_NONE;
  
           ps._32PixelDispatchEnable        = false;
-         ps._16PixelDispatchEnable        = pipeline->ps_simd16 != NO_KERNEL;
-         ps._8PixelDispatchEnable         = pipeline->ps_simd8 != NO_KERNEL;
+         ps._16PixelDispatchEnable        = wm_prog_data->dispatch_16;
+         ps._8PixelDispatchEnable         = wm_prog_data->dispatch_8;
  
-         ps.DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0,
+         ps.DispatchGRFStartRegisterforConstantSetupData0 =
+            wm_prog_data->base.dispatch_grf_start_reg,
           ps.DispatchGRFStartRegisterforConstantSetupData1 = 0,
-         ps.DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2,
+         ps.DispatchGRFStartRegisterforConstantSetupData2 =
+            wm_prog_data->dispatch_grf_start_reg_2,
  
           /* Haswell requires the sample mask to be set in this packet as well as
            * in 3DSTATE_SAMPLE_MASK; the values should match. */
           /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
  
           ps.KernelStartPointer1           = 0;
-         ps.KernelStartPointer2           = pipeline->ps_ksp2;
+         ps.KernelStartPointer2           = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
        }
  
        /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c

index 857f97981117189dadeef6831c0ff16bab1f86a7..d96669494a2ca12a6b6dce2a749c2db488eba685 100644 (file)
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -502,9 +502,9 @@ genX(graphics_pipeline_create)(
        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
           ps.KernelStartPointer0     = pipeline->ps_ksp0;
           ps.KernelStartPointer1     = 0;
-         ps.KernelStartPointer2     = pipeline->ps_ksp2;
-         ps._8PixelDispatchEnable   = pipeline->ps_simd8 != NO_KERNEL;
-         ps._16PixelDispatchEnable  = pipeline->ps_simd16 != NO_KERNEL;
+         ps.KernelStartPointer2     = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
+         ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
+         ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
           ps._32PixelDispatchEnable  = false;
           ps.SingleProgramFlow       = false;
           ps.VectorMaskEnable        = true;
@@ -518,9 +518,11 @@ genX(graphics_pipeline_create)(
           ps.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT];
           ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);
  
-         ps.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0;
+         ps.DispatchGRFStartRegisterForConstantSetupData0 =
+            wm_prog_data->base.dispatch_grf_start_reg;
           ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
-         ps.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2;
+         ps.DispatchGRFStartRegisterForConstantSetupData2 =
+            wm_prog_data->dispatch_grf_start_reg_2;
        }
  
        bool per_sample_ps = pCreateInfo->pMultisampleState &&
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h

index 3fcd7e87c4e377e799681637ada25c1593acbcfb..a2148ae2656d7c34f7d6fe285184d5c0091ff06c 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -367,9 +367,11 @@ struct brw_wm_prog_data {
  
     GLuint num_varying_inputs;
  
-   GLuint dispatch_grf_start_reg_16;
-   GLuint reg_blocks;
-   GLuint reg_blocks_16;
+   uint8_t reg_blocks_0;
+   uint8_t reg_blocks_2;
+
+   uint8_t dispatch_grf_start_reg_2;
+   uint32_t prog_offset_2;
  
     struct {
        /** @{
@@ -383,7 +385,8 @@ struct brw_wm_prog_data {
     bool computed_stencil;
  
     bool early_fragment_tests;
-   bool no_8;
+   bool dispatch_8;
+   bool dispatch_16;
     bool dual_src_blend;
     bool persample_dispatch;
     bool uses_pos_offset;
@@ -393,7 +396,6 @@ struct brw_wm_prog_data {
     bool uses_src_w;
     bool uses_sample_mask;
     bool pulls_bary;
-   uint32_t prog_offset_16;
  
     /**
      * Mask of which interpolation modes are required by the fragment shader.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index f66ba473411902174f684d2e9b09bc9fcc5aca71..1e84b101a8ea4ddbf81d9fe9e17c4ab3b1ac2f9f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5800,11 +5800,6 @@ fs_visitor::run_fs(bool do_rep_send)
           return false;
     }
  
-   if (dispatch_width == 8)
-      wm_prog_data->reg_blocks = brw_register_blocks(grf_used);
-   else
-      wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
-
     return !failed;
  }
  
@@ -6004,6 +5999,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                                             shader);
  
     cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL;
+   uint8_t simd8_grf_start, simd16_grf_start;
+   unsigned simd8_grf_used, simd16_grf_used;
  
     fs_visitor v8(compiler, log_data, mem_ctx, key,
                   &prog_data->base, prog, shader, 8,
@@ -6015,7 +6012,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
        return NULL;
     } else if (likely(!(INTEL_DEBUG & DEBUG_NO8))) {
        simd8_cfg = v8.cfg;
-      prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
+      simd8_grf_start = v8.payload.num_regs;
+      simd8_grf_used = v8.grf_used;
     }
  
     if (!v8.simd16_unsupported &&
@@ -6031,7 +6029,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                                     v16.fail_msg);
        } else {
           simd16_cfg = v16.cfg;
-         prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
+         simd16_grf_start = v16.payload.num_regs;
+         simd16_grf_used = v16.grf_used;
        }
     }
  
@@ -6047,6 +6046,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
     if (compiler->devinfo->gen < 5 && simd16_cfg)
        simd8_cfg = NULL;
  
+   if (prog_data->persample_dispatch) {
+      /* Starting with SandyBridge (where we first get MSAA), the different
+       * pixel dispatch combinations are grouped into classifications A
+       * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1).  On all hardware
+       * generations, the only configurations supporting persample dispatch
+       * are are this in which only one dispatch width is enabled.
+       *
+       * If computed depth is enabled, SNB only allows SIMD8 while IVB+
+       * allow SIMD8 or SIMD16 so we choose SIMD16 if available.
+       */
+      if (compiler->devinfo->gen == 6 &&
+          prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) {
+         simd16_cfg = NULL;
+      } else if (simd16_cfg) {
+         simd8_cfg = NULL;
+      }
+   }
+
     /* We have to compute the flat inputs after the visitor is finished running
      * because it relies on prog_data->urb_setup which is computed in
      * fs_visitor::calculate_urb_setup().
@@ -6065,15 +6082,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
     }
  
     if (simd8_cfg) {
+      prog_data->dispatch_8 = true;
        g.generate_code(simd8_cfg, 8);
-      prog_data->no_8 = false;
-   } else {
-      prog_data->no_8 = true;
+      prog_data->base.dispatch_grf_start_reg = simd8_grf_start;
+      prog_data->reg_blocks_0 = brw_register_blocks(simd8_grf_used);
+
+      if (simd16_cfg) {
+         prog_data->dispatch_16 = true;
+         prog_data->prog_offset_2 = g.generate_code(simd16_cfg, 16);
+         prog_data->dispatch_grf_start_reg_2 = simd16_grf_start;
+         prog_data->reg_blocks_2 = brw_register_blocks(simd16_grf_used);
+      }
+   } else if (simd16_cfg) {
+      prog_data->dispatch_16 = true;
+      g.generate_code(simd16_cfg, 16);
+      prog_data->base.dispatch_grf_start_reg = simd16_grf_start;
+      prog_data->reg_blocks_0 = brw_register_blocks(simd16_grf_used);
     }
  
-   if (simd16_cfg)
-      prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
-
     return g.get_assembly(final_assembly_size);
  }
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 58faf2f46944922b53954a4d62278e77e36adbdd..012492c0e0d8afa40eed8b89b491cd00e5b84506 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -169,7 +169,7 @@ fs_visitor::emit_dummy_fs()
     stage_prog_data->nr_pull_params = 0;
     stage_prog_data->curb_read_length = 0;
     stage_prog_data->dispatch_grf_start_reg = 2;
-   wm_prog_data->dispatch_grf_start_reg_16 = 2;
+   wm_prog_data->dispatch_grf_start_reg_2 = 2;
     grf_used = 1; /* Gen4-5 don't allow zero GRF blocks */
  
     calculate_cfg();
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c

index 91b35cd681a9b3c79d003270a56dd8341ebfb654..bf1bdc9948f67282c52f29b6e502e0912d7386c2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -86,48 +86,37 @@ brw_upload_wm_unit(struct brw_context *brw)
                         sizeof(*wm), 32, &brw->wm.base.state_offset);
     memset(wm, 0, sizeof(*wm));
  
-   if (prog_data->prog_offset_16) {
+   if (prog_data->dispatch_8 && prog_data->dispatch_16) {
        /* These two fields should be the same pre-gen6, which is why we
         * only have one hardware field to program for both dispatch
         * widths.
         */
        assert(prog_data->base.dispatch_grf_start_reg ==
-            prog_data->dispatch_grf_start_reg_16);
+            prog_data->dispatch_grf_start_reg_2);
     }
  
     /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */
-   if (prog_data->no_8) {
-      wm->wm5.enable_16_pix = 1;
-      wm->thread0.grf_reg_count = prog_data->reg_blocks_16;
-      wm->thread0.kernel_start_pointer =
-         brw_program_reloc(brw,
-                           brw->wm.base.state_offset +
-                           offsetof(struct brw_wm_unit_state, thread0),
-                           brw->wm.base.prog_offset +
-                           prog_data->prog_offset_16 +
-                           (prog_data->reg_blocks_16 << 1)) >> 6;
-
-   } else {
-      wm->thread0.grf_reg_count = prog_data->reg_blocks;
-      wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_16;
-
-      wm->wm5.enable_8_pix = 1;
-      if (prog_data->prog_offset_16)
-         wm->wm5.enable_16_pix = 1;
+   wm->wm5.enable_8_pix = prog_data->dispatch_8;
+   wm->wm5.enable_16_pix = prog_data->dispatch_16;
  
+   if (prog_data->dispatch_8 || prog_data->dispatch_16) {
+      wm->thread0.grf_reg_count = prog_data->reg_blocks_0;
        wm->thread0.kernel_start_pointer =
           brw_program_reloc(brw,
                             brw->wm.base.state_offset +
                             offsetof(struct brw_wm_unit_state, thread0),
                             brw->wm.base.prog_offset +
                             (wm->thread0.grf_reg_count << 1)) >> 6;
+   }
  
+   if (prog_data->prog_offset_2) {
+      wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2;
        wm->wm9.kernel_start_pointer_2 =
           brw_program_reloc(brw,
                             brw->wm.base.state_offset +
                             offsetof(struct brw_wm_unit_state, wm9),
                             brw->wm.base.prog_offset +
-                           prog_data->prog_offset_16 +
+                           prog_data->prog_offset_2 +
                             (wm->wm9.grf_reg_count_2 << 1)) >> 6;
     }
  
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c

index 4a5aa129d41f6bfec6163440ec34d7d98a581fc4..3e872af4894584b143d87a45c5d995ff39bf4958 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -129,29 +129,19 @@ gen6_upload_wm_state(struct brw_context *brw,
  
     dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
  
-   if (prog_data->prog_offset_16 || prog_data->no_8) {
+   if (prog_data->dispatch_8)
+      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+
+   if (prog_data->dispatch_16)
        dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
  
-      if (!prog_data->no_8 && !prog_data->persample_dispatch) {
-         dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
-         dw4 |= (prog_data->base.dispatch_grf_start_reg <<
-                 GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
-         dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
-                 GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
-         ksp0 = stage_state->prog_offset;
-         ksp2 = stage_state->prog_offset + prog_data->prog_offset_16;
-      } else {
-         dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
-                GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
-         ksp0 = stage_state->prog_offset + prog_data->prog_offset_16;
-      }
-   }
-   else {
-      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
-      dw4 |= (prog_data->base.dispatch_grf_start_reg <<
-              GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
-      ksp0 = stage_state->prog_offset;
-   }
+   dw4 |= prog_data->base.dispatch_grf_start_reg <<
+          GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
+   dw4 |= prog_data->dispatch_grf_start_reg_2 <<
+          GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
+
+   ksp0 = stage_state->prog_offset;
+   ksp2 = stage_state->prog_offset + prog_data->prog_offset_2;
  
     if (dual_source_blend_enable)
        dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
@@ -200,37 +190,6 @@ gen6_upload_wm_state(struct brw_context *brw,
           dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
        else {
           dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
-
-         /* From the Sandy Bridge PRM, Vol 2 part 1, 7.7.1 ("Pixel Grouping
-          * (Dispatch Size) Control"), p.334:
-          *
-          *     Note: in the table below, the Valid column indicates which
-          *     products that combination is supported on. Combinations of
-          *     dispatch enables not listed in the table are not available on
-          *     any product.
-          *
-          *     A: Valid on all products
-          *
-          *     B: Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
-          *     computed depth.
-          *
-          *     D: Valid on all products, except when in non-1x PERSAMPLE mode
-          *     (applies to [DevSNB+] only). Not valid on [DevSNB] if 4x
-          *     PERPIXEL mode with pixel shader computed depth.
-          *
-          *     E: Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
-          *     computed depth.
-          *
-          *     F: Valid on all products, except not valid on [DevSNB] if 4x
-          *     PERPIXEL mode with pixel shader computed depth.
-          *
-          * In the table that follows, the only entry with "A" in the Valid
-          * column is the entry where only 8 pixel dispatch is enabled.
-          * Therefore, when we are in PERPIXEL mode with pixel shader computed
-          * depth, we need to disable SIMD16 dispatch.
-          */
-         if (dw5 & GEN6_WM_COMPUTED_DEPTH)
-            dw5 &= ~GEN6_WM_16_DISPATCH_ENABLE;
        }
     } else {
        dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c

index 8d2e2c32bb4c49ef7b246088a4ef2b4d06c3280c..a618c3ed87b7e555822a5ab167c3c5fe67d976ab 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -216,34 +216,19 @@ gen7_upload_ps_state(struct brw_context *brw,
  
     dw4 |= fast_clear_op;
  
-   if (prog_data->prog_offset_16 || prog_data->no_8) {
+   if (prog_data->dispatch_16)
        dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
  
-      /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
-       * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
-       * is successfully compiled. In majority of the cases that bring us
-       * better performance than 'SIMD8 only' dispatch.
-       */
-      if (!prog_data->no_8 && !prog_data->persample_dispatch) {
-         dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
-         dw5 |= (prog_data->base.dispatch_grf_start_reg <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-         dw5 |= (prog_data->dispatch_grf_start_reg_16 <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
-         ksp0 = stage_state->prog_offset;
-         ksp2 = stage_state->prog_offset + prog_data->prog_offset_16;
-      } else {
-         dw5 |= (prog_data->dispatch_grf_start_reg_16 <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-         ksp0 = stage_state->prog_offset + prog_data->prog_offset_16;
-      }
-   }
-   else {
+   if (prog_data->dispatch_8)
        dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
-      dw5 |= (prog_data->base.dispatch_grf_start_reg <<
-              GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-      ksp0 = stage_state->prog_offset;
-   }
+
+   dw5 |= prog_data->base.dispatch_grf_start_reg <<
+          GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
+   dw5 |= prog_data->dispatch_grf_start_reg_2 <<
+          GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
+
+   ksp0 = stage_state->prog_offset;
+   ksp2 = stage_state->prog_offset + prog_data->prog_offset_2;
  
     BEGIN_BATCH(8);
     OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c

index b677a8e1793dbcfcd1e4eaa0d33f077d8c12d326..c475a52afe0a2ec652025d02651621b384b9c278 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -234,34 +234,19 @@ gen8_upload_ps_state(struct brw_context *brw,
  
     dw6 |= fast_clear_op;
  
-   if (prog_data->prog_offset_16 || prog_data->no_8) {
+   if (prog_data->dispatch_8)
+      dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
+
+   if (prog_data->dispatch_16)
        dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
  
-      /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
-       * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
-       * is successfully compiled. In majority of the cases that bring us
-       * better performance than 'SIMD8 only' dispatch.
-       */
-      if (!prog_data->no_8 && !prog_data->persample_dispatch) {
-         dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
-         dw7 |= (prog_data->base.dispatch_grf_start_reg <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-         dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
-         ksp0 = stage_state->prog_offset;
-         ksp2 = stage_state->prog_offset + prog_data->prog_offset_16;
-      } else {
-         dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
-                 GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-
-         ksp0 = stage_state->prog_offset + prog_data->prog_offset_16;
-      }
-   } else {
-      dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
-      dw7 |= (prog_data->base.dispatch_grf_start_reg <<
-              GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
-      ksp0 = stage_state->prog_offset;
-   }
+   dw7 |= prog_data->base.dispatch_grf_start_reg <<
+          GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
+   dw7 |= prog_data->dispatch_grf_start_reg_2 <<
+          GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
+
+   ksp0 = stage_state->prog_offset;
+   ksp2 = stage_state->prog_offset + prog_data->prog_offset_2;
  
     BEGIN_BATCH(12);
     OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 28 Apr 2016 22:37:39 +0000 (15:37 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Sat, 14 May 2016 20:34:25 +0000 (13:34 -0700)
src/intel/vulkan/anv_pipeline.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/gen7_pipeline.c		patch \| blob \| history
src/intel/vulkan/gen8_pipeline.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_compiler.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_wm_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_wm_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen8_ps_state.c		patch \| blob \| history