i965: Make the fragment shader pull constants index by dwords, not vec4s.
authorEric Anholt <eric@anholt.net>
Thu, 14 Mar 2013 21:41:37 +0000 (14:41 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 1 Apr 2013 23:17:25 +0000 (16:17 -0700)
We want to load vec4s, since loading a vec4 instead of a dword is
basically no increased latency.  But for variable indexed access, the
previous requirement of aligned vec4s for a sampler LD was hard to
implement.

Note that this change only affects those messages that use the surface
format, like sampler LDs, but not to the untyped data cache loads we've
used in other cases.

No significant performance difference on my GLSL demo with uniforms forced
to take the varying pull constants path (n=4).

NOTE: This is a candidate for the 9.1 branch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_vs_surface_state.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
src/mesa/drivers/dri/intel/intel_context.h

index 7c9ac66404024edae5a80a3f178eb8e522392e80..da3ac1554cd3023a1071d09fd7d5f740c6f74f0a 100644 (file)
@@ -2478,10 +2478,13 @@ fs_visitor::lower_uniform_pull_constant_loads()
          continue;
 
       if (intel->gen >= 7) {
+         /* The offset arg before was a vec4-aligned byte offset.  We need to
+          * turn it into a dword offset.
+          */
          fs_reg const_offset_reg = inst->src[1];
          assert(const_offset_reg.file == IMM &&
                 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
-         const_offset_reg.imm.u /= 16;
+         const_offset_reg.imm.u /= 4;
          fs_reg payload = fs_reg(this, glsl_type::uint_type);
 
          /* This is actually going to be a MOV, but since only the first dword
index 1f5e18a5e3338e50b00021d9b06b19ef64427241..0914cdde7d0e968ba2937c81bdd408dfa3fd111d 100644 (file)
@@ -187,11 +187,6 @@ void *brw_state_batch(struct brw_context *brw,
 void gen4_init_vtable_surface_functions(struct brw_context *brw);
 uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
 uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
-void brw_create_constant_surface(struct brw_context *brw,
-                                drm_intel_bo *bo,
-                                uint32_t offset,
-                                int width,
-                                uint32_t *out_offset);
 
 uint32_t brw_format_for_mesa_format(gl_format mesa_format);
 
index 6c0b690818f139f3a13e02ebc86d5b873f2961a5..675a84ccf2428af99b25f4864678c46cb309371f 100644 (file)
@@ -91,7 +91,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
 
    const int surf = SURF_INDEX_VERT_CONST_BUFFER;
    intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size,
-                                      &brw->vs.surf_offset[surf]);
+                                      &brw->vs.surf_offset[surf], false);
 
    brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
 }
index e458da7751b66825999ed78a7541d1eecd404ea4..a74b2c7cc1e9c22bb45bec2613d22c9e8e85f979 100644 (file)
@@ -913,15 +913,16 @@ brw_update_texture_surface(struct gl_context *ctx,
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
-void
+static void
 brw_create_constant_surface(struct brw_context *brw,
                            drm_intel_bo *bo,
                            uint32_t offset,
                            uint32_t size,
-                           uint32_t *out_offset)
+                           uint32_t *out_offset,
+                            bool dword_pitch)
 {
    struct intel_context *intel = &brw->intel;
-   uint32_t stride = 16;
+   uint32_t stride = dword_pitch ? 4 : 16;
    uint32_t elements = ALIGN(size, stride) / stride;
    const GLint w = elements - 1;
    uint32_t *surf;
@@ -1090,7 +1091,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
    intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
-                                      &brw->wm.surf_offset[surf_index]);
+                                      &brw->wm.surf_offset[surf_index],
+                                       true);
 
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 }
@@ -1443,7 +1445,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
        */
       intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
                                          bo->size - binding->Offset,
-                                         &surf_offsets[i]);
+                                         &surf_offsets[i],
+                                          shader->Type == GL_FRAGMENT_SHADER);
    }
 
    if (shader->NumUniformBlocks)
index 484afcd2502afad12be1f80b13edae2d409c678d..2c12be3c6353026e1f3064f23e48fab6cff9f375 100644 (file)
@@ -384,10 +384,11 @@ gen7_create_constant_surface(struct brw_context *brw,
                             drm_intel_bo *bo,
                             uint32_t offset,
                             uint32_t size,
-                            uint32_t *out_offset)
+                            uint32_t *out_offset,
+                             bool dword_pitch)
 {
    struct intel_context *intel = &brw->intel;
-   uint32_t stride = 16;
+   uint32_t stride = dword_pitch ? 4 : 16;
    uint32_t elements = ALIGN(size, stride) / stride;
    const GLint w = elements - 1;
 
index 958db1cade1a328895552698df484da41617f350..b130c02e6bee8c908618445057f2471a4f0614eb 100644 (file)
@@ -203,13 +203,14 @@ struct intel_context
                                      drm_intel_bo *bo,
                                      uint32_t offset,
                                      uint32_t size,
-                                     uint32_t *out_offset);
+                                     uint32_t *out_offset,
+                                      bool dword_pitch);
       /** \} */
    } vtbl;
 
    GLbitfield Fallback;  /**< mask of INTEL_FALLBACK_x bits */
    GLuint NewGLState;
-
    dri_bufmgr *bufmgr;
    unsigned int maxBatchSize;