i965/gen6: Use the dynamic state base address to reduce relocations.
authorEric Anholt <eric@anholt.net>
Fri, 22 Apr 2011 23:00:14 +0000 (16:00 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 29 Apr 2011 22:26:23 +0000 (15:26 -0700)
Now that all the dynamic state is streamed through the top of the
batchbuffer, we can cut out many of our relocations to that state by
using the base address.

Improves 3DMMES taiji performance 3.3% +/- 0.4% (n=15).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
src/mesa/drivers/dri/i965/gen6_cc.c
src/mesa/drivers/dri/i965/gen6_sampler_state.c
src/mesa/drivers/dri/i965/gen6_scissor_state.c
src/mesa/drivers/dri/i965/gen6_viewport_state.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c

index c0ed6f750cd17f9b4cd753a2309d552523b398b3..7325a2b6c8dab55d34c970d4dcb6f8b11cfadbec 100644 (file)
@@ -550,12 +550,28 @@ static void upload_state_base_address( struct brw_context *brw )
    if (intel->gen >= 6) {
        BEGIN_BATCH(10);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
-       OUT_BATCH(1); /* General state base address */
-       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
-                1); /* Surface state base address */
-       OUT_BATCH(1); /* Dynamic state base address */
-       OUT_BATCH(1); /* Indirect object base address */
-       OUT_BATCH(1); /* Instruction base address */
+       /* General state base address: stateless DP read/write requests */
+       OUT_BATCH(1);
+       /* Surface state base address:
+       * BINDING_TABLE_STATE
+       * SURFACE_STATE
+       */
+       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+        /* Dynamic state base address:
+        * SAMPLER_STATE
+        * SAMPLER_BORDER_COLOR_STATE
+        * CLIP, SF, WM/CC viewport state
+        * COLOR_CALC_STATE
+        * DEPTH_STENCIL_STATE
+        * BLEND_STATE
+        * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
+        * Disable is clear, which we rely on)
+        */
+       OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+                                  I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+
+       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
+       OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
        OUT_BATCH(1); /* General state upper bound */
        OUT_BATCH(1); /* Dynamic state upper bound */
        OUT_BATCH(1); /* Indirect object upper bound */
index 7d215f563c3f1eac9bbc4c97b50d06d1b5097e20..008aceb222b8e1dd49d08e10dccb6a4870825956 100644 (file)
@@ -120,6 +120,9 @@ static const struct brw_tracked_state *gen6_atoms[] =
    /* Command packets: */
    &brw_invarient_state,
 
+   /* must do before binding table pointers, cc state ptrs */
+   &brw_state_base_address,
+
    &brw_cc_vp,
    &gen6_viewport_state,       /* must do after *_vp stages */
 
@@ -150,8 +153,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    &gen6_scissor_state,
 
-   &brw_state_base_address,
-
    &gen6_binding_table_pointers,
 
    &brw_depthbuffer,
index c24494b645f789ec631b6f669da009c5202a41d7..7b93bf90241b0ac6ba8d3c12ffd93c5f140409f5 100644 (file)
@@ -271,16 +271,20 @@ static void brw_update_sampler_state(struct brw_context *brw,
 
    upload_default_color(brw, gl_sampler, unit);
 
-   /* reloc */
-   sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
-                                        brw->wm.sdc_offset[unit]) >> 5;
-
-   drm_intel_bo_emit_reloc(intel->batch.bo,
-                          brw->wm.sampler_offset +
-                          unit * sizeof(struct brw_sampler_state) +
-                          offsetof(struct brw_sampler_state, ss2),
-                          intel->batch.bo, brw->wm.sdc_offset[unit],
-                          I915_GEM_DOMAIN_SAMPLER, 0);
+   if (intel->gen >= 6) {
+      sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
+   } else {
+      /* reloc */
+      sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
+                                           brw->wm.sdc_offset[unit]) >> 5;
+
+      drm_intel_bo_emit_reloc(intel->batch.bo,
+                             brw->wm.sampler_offset +
+                             unit * sizeof(struct brw_sampler_state) +
+                             offsetof(struct brw_sampler_state, ss2),
+                             intel->batch.bo, brw->wm.sdc_offset[unit],
+                             I915_GEM_DOMAIN_SAMPLER, 0);
+   }
 }
 
 
index 938f9148d22414054604a201f9b5ac6b0259455a..80985db3d686f63eddc2d70afecf8ee62dc57c1e 100644 (file)
@@ -156,12 +156,9 @@ static void upload_cc_state_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->cc.blend_state_offset | 1);
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->cc.depth_stencil_state_offset | 1);
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->cc.state_offset | 1);
+   OUT_BATCH(brw->cc.blend_state_offset | 1);
+   OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
+   OUT_BATCH(brw->cc.state_offset | 1);
    ADVANCE_BATCH();
 }
 
index 969780ac1dfd964075f8fb96ef77622a2cc4da7b..4cdec699df6d044edc995c0574e36ec22d5a9265 100644 (file)
@@ -43,12 +43,7 @@ upload_sampler_state_pointers(struct brw_context *brw)
             (4 - 2));
    OUT_BATCH(0); /* VS */
    OUT_BATCH(0); /* GS */
-   if (brw->wm.sampler_count)
-      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-               brw->wm.sampler_offset);
-   else
-      OUT_BATCH(0);
-
+   OUT_BATCH(brw->wm.sampler_offset);
    ADVANCE_BATCH();
 }
 
index d0b37a078d5638ca79d67256872c1b64ec4fd865..fad3ca0dd04d772197c8a3c39db27b070dd1c9ab 100644 (file)
@@ -79,8 +79,7 @@ gen6_prepare_scissor_state(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            scissor_state_offset);
+   OUT_BATCH(scissor_state_offset);
    ADVANCE_BATCH();
 }
 
index c46dc3e3c11c1035c034b320358634591f6cb26d..4116bdb96dece5140da4e4b21ea027125b621bb6 100644 (file)
@@ -113,12 +113,9 @@ static void upload_viewport_state_pointers(struct brw_context *brw)
             GEN6_CC_VIEWPORT_MODIFY |
             GEN6_SF_VIEWPORT_MODIFY |
             GEN6_CLIP_VIEWPORT_MODIFY);
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->clip.vp_offset);
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->sf.vp_offset);
-   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-            brw->cc.vp_offset);
+   OUT_BATCH(brw->clip.vp_offset);
+   OUT_BATCH(brw->sf.vp_offset);
+   OUT_BATCH(brw->cc.vp_offset);
    ADVANCE_BATCH();
 }
 
index 00c28ed8d227d68b3d05da061adfc0630eaed326..b46368e36e29539798dc23f3e3bdd85452391860 100644 (file)
@@ -134,10 +134,10 @@ upload_vs_state(struct brw_context *brw)
       OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
                GEN6_CONSTANT_BUFFER_0_ENABLE |
                (5 - 2));
-      /* This is also the set of state flags from gen6_prepare_vs_constants */
-      OUT_RELOC(intel->batch.bo,
-               I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
-               brw->vs.push_const_offset +
+      /* Pointer to the VS constant buffer.  Covered by the set of
+       * state flags from gen6_prepare_wm_constants
+       */
+      OUT_BATCH(brw->vs.push_const_offset +
                brw->vs.push_const_size - 1);
       OUT_BATCH(0);
       OUT_BATCH(0);
index fe5c7a1120b02e0616799b9ddd23e2c2492221b0..33b233414c6adf6770a98d7650ceebad4a06e3ef 100644 (file)
@@ -112,10 +112,10 @@ upload_wm_state(struct brw_context *brw)
       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
                GEN6_CONSTANT_BUFFER_0_ENABLE |
                (5 - 2));
-      /* This is also the set of state flags from gen6_prepare_wm_constants */
-      OUT_RELOC(intel->batch.bo,
-               I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
-               brw->wm.push_const_offset +
+      /* Pointer to the WM constant buffer.  Covered by the set of
+       * state flags from gen6_prepare_wm_constants
+       */
+      OUT_BATCH(brw->wm.push_const_offset +
                ALIGN(brw->wm.prog_data->nr_params,
                      brw->wm.prog_data->dispatch_width) / 8 - 1);
       OUT_BATCH(0);