i965: compute DDX in a subspan based only on top row

author Chia-I Wu <olv@lunarg.com>

Thu, 12 Sep 2013 05:00:52 +0000 (13:00 +0800)

committer Chia-I Wu <olv@lunarg.com>

Wed, 2 Oct 2013 07:26:40 +0000 (15:26 +0800)
author Chia-I Wu <olv@lunarg.com>
Thu, 12 Sep 2013 05:00:52 +0000 (13:00 +0800)
committer Chia-I Wu <olv@lunarg.com>
Wed, 2 Oct 2013 07:26:40 +0000 (15:26 +0800)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 5f58a291d8d158fa579616a28fa6543ca7cd6114..18b8e573b598518d0abf383363cab952b6911171 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -478,6 +478,8 @@ brwCreateContext(int api,
     brw_draw_init( brw );
  
     brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
+   brw->disable_derivative_optimization =
+      driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
  
     ctx->Const.ContextFlags = 0;
     if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 0f88bad247565f45864a797ff6f609b36f65398a..0ec12185ee4978af1fa1c68afd246f5f4bebb0b9 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1005,6 +1005,7 @@ struct brw_context
     bool always_flush_cache;
     bool disable_throttling;
     bool precompile;
+   bool disable_derivative_optimization;
  
     driOptionCache optionCache;
     /** @} */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index bcb15ee0f92323420c63eb8118f6e67c6b5ad2fc..0b441d451f15b8bdd1cf582ac56f32c40abb16d8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3213,6 +3213,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
  
     key.nr_color_regions = 1;
  
+   /* GL_FRAGMENT_SHADER_DERIVATIVE_HINT is almost always GL_DONT_CARE.  The
+    * quality of the derivatives is likely to be determined by the driconf
+    * option.
+    */
+   key.high_quality_derivatives = brw->disable_derivative_optimization;
+
     key.program_string_id = bfp->id;
  
     uint32_t old_prog_offset = brw->wm.base.prog_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 7ce42c4b9dce5970d3af727d61b27b13f9962092..9eb5e177928ef81d8c4f240e9a329b5ca6973a05 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -540,7 +540,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
   *
   * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
   *
- * and we're trying to produce:
+ * Ideally, we want to produce:
   *
   *           DDX                     DDY
   * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
@@ -556,24 +556,41 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
   *
   * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
   * for each pair, and vertstride = 2 jumps us 2 elements after processing a
- * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
- * between each other.  We could probably do it like ddx and swizzle the right
- * order later, but bail for now and just produce
+ * pair.  But the ideal approximation may impose a huge performance cost on
+ * sample_d.  On at least Haswell, sample_d instruction does some
+ * optimizations if the same LOD is used for all pixels in the subspan.
+ *
+ * For DDY, it's harder, as we want to produce the pairs swizzled between each
+ * other.  We could probably do it like ddx and swizzle the right order later,
+ * but bail for now and just produce
   * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
   */
  void
  fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
  {
+   unsigned vstride, width;
+
+   if (c->key.high_quality_derivatives) {
+      /* produce accurate derivatives */
+      vstride = BRW_VERTICAL_STRIDE_2;
+      width = BRW_WIDTH_2;
+   }
+   else {
+      /* replicate the derivative at the top-left pixel to other pixels */
+      vstride = BRW_VERTICAL_STRIDE_4;
+      width = BRW_WIDTH_4;
+   }
+
     struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
                                  BRW_REGISTER_TYPE_F,
-                                BRW_VERTICAL_STRIDE_2,
-                                BRW_WIDTH_2,
+                                vstride,
+                                width,
                                  BRW_HORIZONTAL_STRIDE_0,
                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
     struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
                                  BRW_REGISTER_TYPE_F,
-                                BRW_VERTICAL_STRIDE_2,
-                                BRW_WIDTH_2,
+                                vstride,
+                                width,
                                  BRW_HORIZONTAL_STRIDE_0,
                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
     brw_ADD(p, dst, src0, negate(src1));
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index 3d7ca2a4d038b096461843838fdaef16526ca9a5..9745edac6cae0f353e110b6777c63acb9c8cb19b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -416,6 +416,15 @@ static void brw_wm_populate_key( struct brw_context *brw,
  
     key->line_aa = line_aa;
  
+   /* _NEW_HINT */
+   if (brw->disable_derivative_optimization) {
+      key->high_quality_derivatives =
+         ctx->Hint.FragmentShaderDerivative != GL_FASTEST;
+   } else {
+      key->high_quality_derivatives =
+         ctx->Hint.FragmentShaderDerivative == GL_NICEST;
+   }
+
     if (brw->gen < 6)
        key->stats_wm = brw->stats_wm;
  
@@ -503,6 +512,7 @@ const struct brw_tracked_state brw_wm_prog = {
                 _NEW_STENCIL |
                 _NEW_POLYGON |
                 _NEW_LINE |
+               _NEW_HINT |
                 _NEW_LIGHT |
                 _NEW_FRAG_CLAMP |
                 _NEW_BUFFERS |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h

index f7a2c5f234da5aaa78314cac1b8c4ae4eed3f4a1..aa786def463963d2d449409c01a8a89b47c94876 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -66,6 +66,7 @@ struct brw_wm_prog_key {
     GLuint render_to_fbo:1;
     GLuint clamp_fragment_color:1;
     GLuint line_aa:2;
+   GLuint high_quality_derivatives:1;
  
     GLushort drawable_height;
     GLbitfield64 input_slots_valid;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c

index de80a00ebb35cc054195c744e9b1b1158430c972..cddc8e8133079cbbdeb70d94770b9fb1fe64bc40 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -58,6 +58,10 @@ PUBLIC const char __driConfigOptions[] =
          DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+")
        DRI_CONF_OPT_END
  
+      DRI_CONF_OPT_BEGIN_B(disable_derivative_optimization, "false")
+        DRI_CONF_DESC(en, "Derivatives with finer granularity by default")
+      DRI_CONF_OPT_END
+
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
        DRI_CONF_FORCE_S3TC_ENABLE("false")
author	Chia-I Wu <olv@lunarg.com>
	Thu, 12 Sep 2013 05:00:52 +0000 (13:00 +0800)
committer	Chia-I Wu <olv@lunarg.com>
	Wed, 2 Oct 2013 07:26:40 +0000 (15:26 +0800)
src/mesa/drivers/dri/i965/brw_context.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_generator.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.h		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_screen.c		patch \| blob \| history