i965/gen6: Stream the WM push constants.
authorEric Anholt <eric@anholt.net>
Fri, 22 Apr 2011 02:03:18 +0000 (19:03 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 29 Apr 2011 22:25:30 +0000 (15:25 -0700)
Improves 3DMMES taiji demo performance by 5.1% +/- 1.9% (n=15), by
reducing CPU time spent thrashing around those tiny little constant BOs.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_vtbl.c
src/mesa/drivers/dri/i965/gen6_wm_state.c

index 6bf8a1c83c7c3eb8cb1218b06ea769f09cbf4264..6a244984e91c00d76973aa70f8f609f6b9a5868a 100644 (file)
@@ -698,11 +698,11 @@ struct brw_context
       drm_intel_bo *state_bo;
       drm_intel_bo *const_bo; /* pull constant buffer. */
       /**
-       *  This is the push constant BO on gen6.
+       * This is offset in the batch to the push constants on gen6.
        *
        * Pre-gen6, push constants live in the CURBE.
        */
-      drm_intel_bo *push_const_bo;
+      uint32_t push_const_offset;
    } wm;
 
 
index a22e63c9caab011beb563e893bc3bdede84654e1..321dbc19ede158493052a4993d7b176919a957c7 100644 (file)
@@ -364,15 +364,17 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
 
    for (i = 0; i < nr_prims; i++) {
       uint32_t hw_prim;
+      int estimated_max_prim_size;
+
+      estimated_max_prim_size = 512; /* batchbuffer commands */
+      estimated_max_prim_size += 1024; /* gen6 WM push constants */
+      estimated_max_prim_size += 512; /* misc. pad */
 
       /* Flush the batch if it's approaching full, so that we don't wrap while
        * we've got validated state that needs to be in the same batch as the
-       * primitives.  This fraction is just a guess (minimal full state plus
-       * a primitive is around 512 bytes), and would be better if we had
-       * an upper bound of how much we might emit in a single
-       * brw_try_draw_prims().
+       * primitives.
        */
-      intel_batchbuffer_require_space(intel, 1024, false);
+      intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
 
       hw_prim = brw_set_prim(brw, &prim[i]);
       if (brw->state.dirty.brw) {
index ce8712a260f0c3a56fb723fcca75b8de0543fe29..4961b0449dfb194db45995e1c739e9e33340592e 100644 (file)
@@ -93,7 +93,6 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->wm.prog_bo);
    dri_bo_release(&brw->wm.state_bo);
    dri_bo_release(&brw->wm.const_bo);
-   dri_bo_release(&brw->wm.push_const_bo);
    dri_bo_release(&brw->cc.prog_bo);
    dri_bo_release(&brw->cc.vp_bo);
    dri_bo_release(&brw->cc.blend_state_bo);
index c4b57fe1f951218252d7dac21cbe926248e17a4b..fe5c7a1120b02e0616799b9ddd23e2c2492221b0 100644 (file)
@@ -42,9 +42,6 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
    const struct brw_fragment_program *fp =
       brw_fragment_program_const(brw->fragment_program);
 
-   drm_intel_bo_unreference(brw->wm.push_const_bo);
-   brw->wm.push_const_bo = NULL;
-
    /* Updates the ParamaterValues[i] pointers for all parameters of the
     * basic type of PROGRAM_STATE_VAR.
     */
@@ -55,13 +52,11 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
       float *constants;
       unsigned int i;
 
-      brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr,
-                                                "WM constant_bo",
-                                                brw->wm.prog_data->nr_params *
-                                                sizeof(float),
-                                                4096);
-      drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo);
-      constants = brw->wm.push_const_bo->virtual;
+      constants = brw_state_batch(brw,
+                                 brw->wm.prog_data->nr_params *
+                                 sizeof(float),
+                                 32, &brw->wm.push_const_offset);
+
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
         constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
                                      *brw->wm.prog_data->param[i]);
@@ -80,15 +75,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
            printf("\n");
         printf("\n");
       }
-
-      drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
    }
 }
 
 const struct brw_tracked_state gen6_wm_constants = {
    .dirty = {
       .mesa  = _NEW_PROGRAM_CONSTANTS,
-      .brw   = BRW_NEW_FRAGMENT_PROGRAM,
+      .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_FRAGMENT_PROGRAM),
       .cache = 0,
    },
    .prepare = gen6_prepare_wm_push_constants,
@@ -118,8 +112,10 @@ upload_wm_state(struct brw_context *brw)
       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
                GEN6_CONSTANT_BUFFER_0_ENABLE |
                (5 - 2));
-      OUT_RELOC(brw->wm.push_const_bo,
+      /* This is also the set of state flags from gen6_prepare_wm_constants */
+      OUT_RELOC(intel->batch.bo,
                I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+               brw->wm.push_const_offset +
                ALIGN(brw->wm.prog_data->nr_params,
                      brw->wm.prog_data->dispatch_width) / 8 - 1);
       OUT_BATCH(0);