i965: Micro-optimize swizzle_to_scs() and make it inlinable.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 31 Jul 2014 08:26:30 +0000 (01:26 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 5 Jan 2015 05:31:40 +0000 (21:31 -0800)
brw_swizzle_to_scs has been showing up in my CPU profiling, which is
rather silly - it's a tiny amount of code.  It really should be inlined,
and can easily be implemented with fewer instructions.

The enum translation is as follows:

SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
        0          1          2          3             4            5
        4          5          6          7             0            1
  SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE

which is simply (swizzle + 4) & 7.

Haswell needs extra textureGather workarounds to remap GREEN to BLUE,
but Broadwell and later do not.

This patch replicates swizzle_to_scs in gen7_wm_surface_state.c and
gen8_surface_state.c, since the Gen8+ code can be simplified to a mere
two instructions.  Both copies can be marked static for easy inlining.

v2: Put the commit message in the code as comments (requested by
    Jason Ekstrand).  Also fix a typo.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
src/mesa/drivers/dri/i965/gen8_surface_state.c

index 399347c15fe6cf910767d8e63dc147a79bf9c8d8..f195407e88279d8b41a732a4e4752798f9a837e0 100644 (file)
@@ -225,7 +225,6 @@ int brw_get_texture_swizzle(const struct gl_context *ctx,
                             const struct gl_texture_object *t);
 
 /* gen7_wm_surface_state.c */
-unsigned brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue);
 uint32_t gen7_surface_tiling_mode(uint32_t tiling);
 uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);
 void gen7_set_surface_mcs_info(struct brw_context *brw,
index c257cb79c30258e5e0e20f364e6bd9c13cd6489e..e2c347a51eef9e41160c7f9d880fcd8b9fcebaaf 100644 (file)
 
 /**
  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
+ *
+ * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
+ *         0          1          2          3             4            5
+ *         4          5          6          7             0            1
+ *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
+ *
+ * which is simply adding 4 then modding by 8 (or anding with 7).
+ *
+ * We then may need to apply workarounds for textureGather hardware bugs.
  */
-unsigned
-brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
+static unsigned
+swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 {
-   switch (swizzle) {
-   case SWIZZLE_X:
-      return HSW_SCS_RED;
-   case SWIZZLE_Y:
-      return need_green_to_blue ? HSW_SCS_BLUE : HSW_SCS_GREEN;
-   case SWIZZLE_Z:
-      return HSW_SCS_BLUE;
-   case SWIZZLE_W:
-      return HSW_SCS_ALPHA;
-   case SWIZZLE_ZERO:
-      return HSW_SCS_ZERO;
-   case SWIZZLE_ONE:
-      return HSW_SCS_ONE;
-   }
+   unsigned scs = (swizzle + 4) & 7;
 
-   unreachable("Should not get here: invalid swizzle mode");
+   return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 }
 
 uint32_t
@@ -353,10 +349,10 @@ gen7_update_texture_surface(struct gl_context *ctx,
       const bool need_scs_green_to_blue = for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 
       surf[7] |=
-         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
-         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
-         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
-         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
    }
 
    if (mt->mcs_mt) {
index 56c46b0469abe73e3b68a2adf372433ebe0313f2..d1b095cf53559eed1beed6b808fc38d1e0d99b55 100644 (file)
 #include "brw_defines.h"
 #include "brw_wm.h"
 
+/**
+ * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
+ *
+ * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
+ *         0          1          2          3             4            5
+ *         4          5          6          7             0            1
+ *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
+ *
+ * which is simply adding 4 then modding by 8 (or anding with 7).
+ */
+static unsigned
+swizzle_to_scs(unsigned swizzle)
+{
+   return (swizzle + 4) & 7;
+}
+
 static uint32_t
 surface_tiling_mode(uint32_t tiling)
 {
@@ -231,10 +248,10 @@ gen8_update_texture_surface(struct gl_context *ctx,
    const int swizzle =
       unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);
    surf[7] |=
-      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), false), GEN7_SURFACE_SCS_R) |
-      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), false), GEN7_SURFACE_SCS_G) |
-      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), false), GEN7_SURFACE_SCS_B) |
-      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), false), GEN7_SURFACE_SCS_A);
+      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
+      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
+      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
+      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
 
    *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */