i965: Use state streaming on programs, and state base address on gen5+.
authorEric Anholt <eric@anholt.net>
Wed, 27 Apr 2011 20:33:10 +0000 (13:33 -0700)
committerEric Anholt <eric@anholt.net>
Sat, 18 Jun 2011 23:00:45 +0000 (16:00 -0700)
There will be a little bit of thrashing of the program cache BO as the
cache warms up, but once the application is in steady state, this
reduces relocations on gen5 and later.

On my T420 laptop, cairogl firefox-talos-gfx performance improves 2.6%
+/- 1.3% (n=6).  No statistically significant performance difference
on nexuiz (n=5).

26 files changed:
src/mesa/drivers/dri/i965/brw_clip.c
src/mesa/drivers/dri/i965/brw_clip_state.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_gs_state.c
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_sf.c
src/mesa/drivers/dri/i965/brw_sf_state.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_cache.c
src/mesa/drivers/dri/i965/brw_state_dump.c
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_vs_state.c
src/mesa/drivers/dri/i965/brw_vtbl.c
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/gen6_gs_state.c
src/mesa/drivers/dri/i965/gen6_urb.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_disable.c
src/mesa/drivers/dri/i965/gen7_urb.c
src/mesa/drivers/dri/i965/gen7_vs_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c

index c7d428ba48dadf0b4d185b071e2cd898adfb9a0b..d82206bae52176917bea1784e6f686ad142b3c46 100644 (file)
@@ -146,15 +146,12 @@ static void compile_clip_prog( struct brw_context *brw,
       printf("\n");
    }
 
-   /* Upload
-    */
-   drm_intel_bo_unreference(brw->clip.prog_bo);
-   brw->clip.prog_bo = brw_upload_cache(&brw->cache,
-                                       BRW_CLIP_PROG,
-                                       &c.key, sizeof(c.key),
-                                       program, program_size,
-                                       &c.prog_data, sizeof(c.prog_data),
-                                       &brw->clip.prog_data);
+   brw_upload_cache(&brw->cache,
+                   BRW_CLIP_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, sizeof(c.prog_data),
+                   &brw->clip.prog_offset, &brw->clip.prog_data);
    ralloc_free(mem_ctx);
 }
 
@@ -271,12 +268,11 @@ static void upload_clip_prog(struct brw_context *brw)
       }
    }
 
-   drm_intel_bo_unreference(brw->clip.prog_bo);
-   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
-                                       &key, sizeof(key),
-                                       &brw->clip.prog_data);
-   if (brw->clip.prog_bo == NULL)
+   if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+                        &key, sizeof(key),
+                        &brw->clip.prog_offset, &brw->clip.prog_data)) {
       compile_clip_prog( brw, &key );
+   }
 }
 
 
index 6015c8cbe9fde8e93830b256d3c1641d745d8bab..b9efbb74c87a38492d0f38cc482b56716e5c397e 100644 (file)
@@ -43,11 +43,15 @@ brw_prepare_clip_unit(struct brw_context *brw)
    clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
    memset(clip, 0, sizeof(*clip));
 
-   /* CACHE_NEW_CLIP_PROG */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
    clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
                                 16 - 1);
-   /* reloc */
-   clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+   clip->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->clip.state_offset +
+                       offsetof(struct brw_clip_unit_state, thread0),
+                       brw->clip.prog_offset +
+                       (clip->thread0.grf_reg_count << 1)) >> 6;
 
    clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    clip->thread1.single_program_flow = 1;
@@ -110,14 +114,6 @@ brw_prepare_clip_unit(struct brw_context *brw)
    clip->viewport_ymin = -1;
    clip->viewport_ymax = 1;
 
-   /* Emit clip program relocation */
-   assert(brw->clip.prog_bo);
-   drm_intel_bo_emit_reloc(intel->batch.bo,
-                          (brw->clip.state_offset +
-                           offsetof(struct brw_clip_unit_state, thread0)),
-                          brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
-
    brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
 }
 
@@ -125,6 +121,7 @@ const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
index 621b6f8990b2661ffac0e772a45cd3ab0f35be81..16b71f6b1c986144e992615661a3695ccf3e094a 100644 (file)
@@ -142,7 +142,8 @@ enum brw_state_id {
    BRW_STATE_NR_VS_SURFACES,
    BRW_STATE_INDEX_BUFFER,
    BRW_STATE_VS_CONSTBUF,
-   BRW_STATE_WM_CONSTBUF
+   BRW_STATE_WM_CONSTBUF,
+   BRW_STATE_PROGRAM_CACHE,
 };
 
 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
@@ -172,6 +173,7 @@ enum brw_state_id {
 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 #define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -365,7 +367,8 @@ struct brw_cache_item {
    GLuint key_size;            /* for variable-sized keys */
    const void *key;
 
-   drm_intel_bo *bo;
+   uint32_t offset;
+   uint32_t size;
 
    struct brw_cache_item *next;
 };   
@@ -376,14 +379,11 @@ struct brw_cache {
    struct brw_context *brw;
 
    struct brw_cache_item **items;
+   drm_intel_bo *bo;
    GLuint size, n_items;
 
-   char *name[BRW_MAX_CACHE];
-
-   /* Record of the last BOs chosen for each cache_id.  Used to set
-    * brw->state.dirty.cache when a new cache item is chosen.
-    */
-   drm_intel_bo *last_bo[BRW_MAX_CACHE];
+   uint32_t next_offset;
+   bool bo_used_by_gpu;
 };
 
 
@@ -634,8 +634,9 @@ struct brw_context
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
-      drm_intel_bo *prog_bo;
       drm_intel_bo *const_bo;
+      /** Offset in the program cache to the VS program */
+      uint32_t prog_offset;
       uint32_t state_offset;
 
       /** Binding table of pointers to surf_bo entries */
@@ -651,14 +652,16 @@ struct brw_context
       struct brw_gs_prog_data *prog_data;
 
       GLboolean prog_active;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
       uint32_t state_offset;
-      drm_intel_bo *prog_bo;
    } gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
 
-      drm_intel_bo *prog_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
 
       /* Offset in the batch to the CLIP state on pre-gen6. */
       uint32_t state_offset;
@@ -673,7 +676,8 @@ struct brw_context
    struct {
       struct brw_sf_prog_data *prog_data;
 
-      drm_intel_bo *prog_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
       uint32_t state_offset;
       uint32_t vp_offset;
    } sf;
@@ -700,12 +704,14 @@ struct brw_context
       GLuint sampler_count;
       uint32_t sampler_offset;
 
+      /** Offset in the program cache to the WM program */
+      uint32_t prog_offset;
+
       /** Binding table of pointers to surf_bo entries */
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_WM_MAX_SURF];
       uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
 
-      drm_intel_bo *prog_bo;
       drm_intel_bo *const_bo; /* pull constant buffer. */
       /**
        * This is offset in the batch to the push constants on gen6.
@@ -717,9 +723,6 @@ struct brw_context
 
 
    struct {
-      /* gen4 */
-      drm_intel_bo *prog_bo;
-
       uint32_t state_offset;
       uint32_t blend_state_offset;
       uint32_t depth_stencil_state_offset;
@@ -874,6 +877,26 @@ brw_register_blocks(int reg_count)
    return ALIGN(reg_count, 16) / 16 - 1;
 }
 
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+                 uint32_t prog_offset)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 5) {
+      /* Using state base address. */
+      return prog_offset;
+   }
+
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+                          state_offset,
+                          brw->cache.bo,
+                          prog_offset,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return brw->cache.bo->offset + prog_offset;
+}
+
 GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
 
 #endif
index 7c73a8fbf024b34ae9c5eaab5f2d67211224fb36..8580c78fea1ac18ffcc86f0915bdb583fca49ebf 100644 (file)
@@ -1697,14 +1697,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 
    key.program_string_id = bfp->id;
 
-   drm_intel_bo *old_prog_bo = brw->wm.prog_bo;
+   uint32_t old_prog_offset = brw->wm.prog_offset;
    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
-   brw->wm.prog_bo = NULL;
 
    bool success = do_wm_prog(brw, prog, bfp, &key);
 
-   drm_intel_bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = old_prog_bo;
+   brw->wm.prog_offset = old_prog_offset;
    brw->wm.prog_data = old_prog_data;
 
    return success;
index 001cd62f8cac8a20f922409a8a1b4ef41d040bf0..3171e97d7af912b00bb8f0a059417a922053933d 100644 (file)
@@ -121,14 +121,11 @@ static void compile_gs_prog( struct brw_context *brw,
       printf("\n");
     }
 
-   /* Upload
-    */
-   drm_intel_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG,
-                                     &c.key, sizeof(c.key),
-                                     program, program_size,
-                                     &c.prog_data, sizeof(c.prog_data),
-                                     &brw->gs.prog_data);
+   brw_upload_cache(&brw->cache, BRW_GS_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, sizeof(c.prog_data),
+                   &brw->gs.prog_offset, &brw->gs.prog_data);
    ralloc_free(mem_ctx);
 }
 
@@ -189,15 +186,12 @@ static void prepare_gs_prog(struct brw_context *brw)
       brw->gs.prog_active = key.need_gs_prog;
    }
 
-   drm_intel_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = NULL;
-
    if (brw->gs.prog_active) {
-      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
-                                        &key, sizeof(key),
-                                        &brw->gs.prog_data);
-      if (brw->gs.prog_bo == NULL)
+      if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+                           &key, sizeof(key),
+                           &brw->gs.prog_offset, &brw->gs.prog_data)) {
         compile_gs_prog( brw, &key );
+      }
    }
 }
 
index 542874b7706a18aebfa14cc486963a89edfd20d4..bbfefcd816ae79c3409a58ab275944e232db7c8f 100644 (file)
@@ -45,12 +45,17 @@ brw_prepare_gs_unit(struct brw_context *brw)
 
    memset(gs, 0, sizeof(*gs));
 
-   /* CACHE_NEW_GS_PROG */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */
    if (brw->gs.prog_active) {
       gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) /
                                   16 - 1);
-      /* reloc */
-      gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+      gs->thread0.kernel_start_pointer =
+        brw_program_reloc(brw,
+                          brw->gs.state_offset +
+                          offsetof(struct brw_gs_unit_state, thread0),
+                          brw->gs.prog_offset +
+                          (gs->thread0.grf_reg_count << 1)) >> 6;
 
       gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
       gs->thread1.single_program_flow = 1;
@@ -69,13 +74,6 @@ brw_prepare_gs_unit(struct brw_context *brw)
         gs->thread4.max_threads = 1;
       else
         gs->thread4.max_threads = 0;
-
-      /* Emit GS program relocation */
-      drm_intel_bo_emit_reloc(intel->batch.bo,
-                             (brw->gs.state_offset +
-                              offsetof(struct brw_gs_unit_state, thread0)),
-                             brw->gs.prog_bo, gs->thread0.grf_reg_count << 1,
-                             I915_GEM_DOMAIN_INSTRUCTION, 0);
    }
 
    if (intel->gen == 5)
@@ -91,6 +89,7 @@ const struct brw_tracked_state brw_gs_unit = {
    .dirty = {
       .mesa  = 0,
       .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_GS_PROG
index 1f3b64fd53834e4562afca095fc0a4e99431ad97..b0f95dd66b5f2e10accf48edc5d2fc7233eaa44e 100644 (file)
@@ -706,7 +706,9 @@ static void upload_state_base_address( struct brw_context *brw )
                                   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
 
        OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
-       OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
+       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                1); /* Instruction base address: shader kernels (incl. SIP) */
+
        OUT_BATCH(1); /* General state upper bound */
        OUT_BATCH(1); /* Dynamic state upper bound */
        OUT_BATCH(1); /* Indirect object upper bound */
@@ -719,7 +721,8 @@ static void upload_state_base_address( struct brw_context *brw )
        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
                 1); /* Surface state base address */
        OUT_BATCH(1); /* Indirect object base address */
-       OUT_BATCH(1); /* Instruction base address */
+       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                1); /* Instruction base address */
        OUT_BATCH(1); /* General state upper bound */
        OUT_BATCH(1); /* Indirect object upper bound */
        OUT_BATCH(1); /* Instruction access upper bound */
@@ -740,7 +743,8 @@ static void upload_state_base_address( struct brw_context *brw )
 const struct brw_tracked_state brw_state_base_address = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_BATCH,
+      .brw = (BRW_NEW_BATCH |
+             BRW_NEW_PROGRAM_CACHE),
       .cache = 0,
    },
    .emit = upload_state_base_address
index c2227777cfb645b4517ac07e8fb06d4d00fea0cd..fca30a74aaf8ba2a6615960b5aeeed2e78e62fab 100644 (file)
@@ -120,14 +120,11 @@ static void compile_sf_prog( struct brw_context *brw,
       printf("\n");
    }
 
-   /* Upload
-    */
-   drm_intel_bo_unreference(brw->sf.prog_bo);
-   brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG,
-                                     &c.key, sizeof(c.key),
-                                     program, program_size,
-                                     &c.prog_data, sizeof(c.prog_data),
-                                     &brw->sf.prog_data);
+   brw_upload_cache(&brw->cache, BRW_SF_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, sizeof(c.prog_data),
+                   &brw->sf.prog_offset, &brw->sf.prog_data);
    ralloc_free(mem_ctx);
 }
 
@@ -191,12 +188,11 @@ static void upload_sf_prog(struct brw_context *brw)
       key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
    }
 
-   drm_intel_bo_unreference(brw->sf.prog_bo);
-   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
-                                     &key, sizeof(key),
-                                     &brw->sf.prog_data);
-   if (brw->sf.prog_bo == NULL)
+   if (!brw_search_cache(&brw->cache, BRW_SF_PROG,
+                        &key, sizeof(key),
+                        &brw->sf.prog_offset, &brw->sf.prog_data)) {
       compile_sf_prog( brw, &key );
+   }
 }
 
 
index 78b22c4df3d50cf2c7f22a06ec3e6f1faad3ffcb..eb3d103099b43a3998b7c9e94d741f252795c7c6 100644 (file)
@@ -133,9 +133,14 @@ static void upload_sf_unit( struct brw_context *brw )
 
    memset(sf, 0, sizeof(*sf));
 
-   /* CACHE_NEW_SF_PROG */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */
    sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
-   sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+   sf->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->sf.state_offset +
+                       offsetof(struct brw_sf_unit_state, thread0),
+                       brw->sf.prog_offset +
+                       (sf->thread0.grf_reg_count << 1)) >> 6;
 
    sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -282,11 +287,6 @@ static void upload_sf_unit( struct brw_context *brw )
    /* STATE_PREFETCH command description describes this state as being
     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
     */
-   /* Emit SF program relocation */
-   drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
-                               offsetof(struct brw_sf_unit_state, thread0)),
-                          brw->sf.prog_bo, sf->thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
 
    /* Emit SF viewport relocation */
    drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
@@ -308,6 +308,7 @@ const struct brw_tracked_state brw_sf_unit = {
                _NEW_SCISSOR |
                _NEW_BUFFERS),
       .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_URB_FENCE),
       .cache = (CACHE_NEW_SF_VP |
                CACHE_NEW_SF_PROG)
index 544ef7d47e6b37a3b7a4496a9353f0a6b1d27606..b384651d8d04150c4fd9e7aa70a5bcad76a9bf93 100644 (file)
@@ -145,21 +145,21 @@ void brw_clear_validated_bos(struct brw_context *brw);
  * brw_state_cache.c
  */
 
-drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
-                              enum brw_cache_id cache_id,
-                              const void *key,
-                              GLuint key_sz,
-                              const void *data,
-                              GLuint data_sz,
-                              const void *aux,
-                              GLuint aux_sz,
-                              void *aux_return);
-
-drm_intel_bo *brw_search_cache( struct brw_cache *cache,
-                         enum brw_cache_id cache_id,
-                         const void *key,
-                         GLuint key_size,
-                         void *aux_return);
+void brw_upload_cache(struct brw_cache *cache,
+                     enum brw_cache_id cache_id,
+                     const void *key,
+                     GLuint key_sz,
+                     const void *data,
+                     GLuint data_sz,
+                     const void *aux,
+                     GLuint aux_sz,
+                     uint32_t *out_offset, void *out_aux);
+
+bool brw_search_cache(struct brw_cache *cache,
+                     enum brw_cache_id cache_id,
+                     const void *key,
+                     GLuint key_size,
+                     uint32_t *inout_offset, void *out_aux);
 void brw_state_cache_check_size( struct brw_context *brw );
 
 void brw_init_caches( struct brw_context *brw );
index f13a41fa7cc09d7a00935a8cc6d9f06c130544c9..d13711b19b7825ea360d2e6a3a7a355e5103174c 100644 (file)
@@ -45,6 +45,7 @@
  */
 
 #include "main/imports.h"
+#include "intel_batchbuffer.h"
 #include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_STATE
@@ -67,23 +68,6 @@ hash_key(struct brw_cache_item *item)
    return hash;
 }
 
-
-/**
- * Marks a new buffer as being chosen for the given cache id.
- */
-static void
-update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
-                 drm_intel_bo *bo)
-{
-   if (bo == cache->last_bo[cache_id])
-      return; /* no change */
-
-   drm_intel_bo_unreference(cache->last_bo[cache_id]);
-   cache->last_bo[cache_id] = bo;
-   drm_intel_bo_reference(cache->last_bo[cache_id]);
-   cache->brw->state.dirty.cache |= 1 << cache_id;
-}
-
 static int
 brw_cache_item_equals(const struct brw_cache_item *a,
                      const struct brw_cache_item *b)
@@ -145,12 +129,13 @@ rehash(struct brw_cache *cache)
 /**
  * Returns the buffer object matching cache_id and key, or NULL.
  */
-drm_intel_bo *
+bool
 brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key, GLuint key_size,
-                 void *aux_return)
+                 uint32_t *inout_offset, void *out_aux)
 {
+   struct brw_context *brw = cache->brw;
    struct brw_cache_item *item;
    struct brw_cache_item lookup;
    GLuint hash;
@@ -164,19 +149,45 @@ brw_search_cache(struct brw_cache *cache,
    item = search_cache(cache, hash, &lookup);
 
    if (item == NULL)
-      return NULL;
+      return false;
 
-   if (aux_return)
-      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+   *(void **)out_aux = ((char *)item->key + item->key_size);
 
-   update_cache_last(cache, cache_id, item->bo);
+   if (item->offset != *inout_offset) {
+      brw->state.dirty.cache |= (1 << cache_id);
+      *inout_offset = item->offset;
+   }
 
-   drm_intel_bo_reference(item->bo);
-   return item->bo;
+   return true;
 }
 
+static void
+brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
+{
+   struct brw_context *brw = cache->brw;
+   struct intel_context *intel = &brw->intel;
+   drm_intel_bo *new_bo;
+
+   new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64);
 
-drm_intel_bo *
+   /* Copy any existing data that needs to be saved. */
+   if (cache->next_offset != 0) {
+      drm_intel_bo_map(cache->bo, false);
+      drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual);
+      drm_intel_bo_unmap(cache->bo);
+   }
+
+   drm_intel_bo_unreference(cache->bo);
+   cache->bo = new_bo;
+   cache->bo_used_by_gpu = false;
+
+   /* Since we have a new BO in place, we need to signal the units
+    * that depend on it (state base address on gen5+, or unit state before).
+    */
+   brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
+}
+
+void
 brw_upload_cache(struct brw_cache *cache,
                 enum brw_cache_id cache_id,
                 const void *key,
@@ -185,12 +196,12 @@ brw_upload_cache(struct brw_cache *cache,
                 GLuint data_size,
                 const void *aux,
                 GLuint aux_size,
-                void *aux_return)
+                uint32_t *out_offset,
+                void *out_aux)
 {
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    GLuint hash;
    void *tmp;
-   drm_intel_bo *bo;
 
    item->cache_id = cache_id;
    item->key = key;
@@ -198,10 +209,28 @@ brw_upload_cache(struct brw_cache *cache,
    hash = hash_key(item);
    item->hash = hash;
 
-   /* Create the buffer object to contain the data */
-   bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr,
-                          cache->name[cache_id], data_size, 1 << 6);
+   /* Allocate space in the cache BO for our new program. */
+   if (cache->next_offset + data_size > cache->bo->size) {
+      uint32_t new_size = cache->bo->size * 2;
+
+      while (cache->next_offset + data_size > new_size)
+        new_size *= 2;
+
+      brw_cache_new_bo(cache, new_size);
+   }
+
+   /* If we would block on writing to an in-use program BO, just
+    * recreate it.
+    */
+   if (cache->bo_used_by_gpu) {
+      brw_cache_new_bo(cache, cache->bo->size);
+   }
+
+   item->offset = cache->next_offset;
+   item->size = data_size;
 
+   /* Programs are always 64-byte aligned, so set up the next one now */
+   cache->next_offset = ALIGN(item->offset + data_size, 64);
 
    /* Set up the memory containing the key and aux_data */
    tmp = malloc(key_size + aux_size);
@@ -211,9 +240,6 @@ brw_upload_cache(struct brw_cache *cache,
 
    item->key = tmp;
 
-   item->bo = bo;
-   drm_intel_bo_reference(bo);
-
    if (cache->n_items > cache->size * 1.5)
       rehash(cache);
 
@@ -222,34 +248,18 @@ brw_upload_cache(struct brw_cache *cache,
    cache->items[hash] = item;
    cache->n_items++;
 
-   if (aux_return) {
-      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
-   }
-
-   DBG("upload %s: %d bytes to cache id %d\n",
-       cache->name[cache_id],
-       data_size, cache_id);
-
    /* Copy data to the buffer */
-   drm_intel_bo_subdata(bo, 0, data_size, data);
-
-   update_cache_last(cache, cache_id, bo);
+   drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
 
-   return bo;
-}
-
-static void
-brw_init_cache_id(struct brw_cache *cache,
-                  const char *name,
-                  enum brw_cache_id id)
-{
-   cache->name[id] = strdup(name);
+   *out_offset = item->offset;
+   *(void **)out_aux = (void *)((char *)item->key + item->key_size);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
-
 void
 brw_init_caches(struct brw_context *brw)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_cache *cache = &brw->cache;
 
    cache->brw = brw;
@@ -259,36 +269,15 @@ brw_init_caches(struct brw_context *brw)
    cache->items = (struct brw_cache_item **)
       calloc(1, cache->size * sizeof(struct brw_cache_item));
 
-   brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
-   brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
-   brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
-   brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
-   brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
-   brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
-   brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
-
-   brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
-
-   brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
-
-   brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
-
-   brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
-
-   brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
-   brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP);
-
-   brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
-
-   brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
-   brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
-   brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE);
-   brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE);
+   cache->bo = drm_intel_bo_alloc(intel->bufmgr,
+                                 "program cache",
+                                 4096, 64);
 }
 
 static void
 brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_cache_item *c, *next;
    GLuint i;
 
@@ -297,7 +286,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
         next = c->next;
-        drm_intel_bo_unreference(c->bo);
         free((void *)c->key);
         free(c);
       }
@@ -306,9 +294,18 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 
    cache->n_items = 0;
 
+   /* Start putting programs into the start of the BO again, since
+    * we'll never find the old results.
+    */
+   cache->next_offset = 0;
+
+   /* We need to make sure that the programs get regenerated, since
+    * any offsets leftover in brw_context will no longer be valid.
+    */
    brw->state.dirty.mesa |= ~0;
    brw->state.dirty.brw |= ~0;
    brw->state.dirty.cache |= ~0;
+   intel_batchbuffer_flush(intel);
 }
 
 void
@@ -325,15 +322,10 @@ brw_state_cache_check_size(struct brw_context *brw)
 static void
 brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 {
-   GLuint i;
 
    DBG("%s\n", __FUNCTION__);
 
    brw_clear_cache(brw, cache);
-   for (i = 0; i < BRW_MAX_CACHE; i++) {
-      drm_intel_bo_unreference(cache->last_bo[i]);
-      free(cache->name[i]);
-   }
    free(cache->items);
    cache->items = NULL;
    cache->size = 0;
index ff06cb3a91edc208a7a64e9e96f8802099568f5b..7a3a88f04f5713b7846bbdc4ea2cc4f8b43e2ded 100644 (file)
@@ -459,21 +459,19 @@ static void dump_blend_state(struct brw_context *brw)
 
 }
 
-static void brw_debug_prog(const char *name, drm_intel_bo *prog)
+static void brw_debug_prog(struct brw_context *brw,
+                          const char *name, uint32_t prog_offset)
 {
    unsigned int i;
    uint32_t *data;
 
-   if (prog == NULL)
-      return;
-
-   drm_intel_bo_map(prog, GL_FALSE);
+   drm_intel_bo_map(brw->cache.bo, false);
 
-   data = prog->virtual;
+   data = brw->cache.bo->virtual + prog_offset;
 
-   for (i = 0; i < prog->size / 4 / 4; i++) {
+   for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) {
       fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-             name, (unsigned int)prog->offset + i * 4 * 4,
+             name, (unsigned int)brw->cache.bo->offset + i * 4 * 4,
              data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
       /* Stop at the end of the program.  It'd be nice to keep track of the actual
        * intended program size instead of guessing like this.
@@ -485,7 +483,7 @@ static void brw_debug_prog(const char *name, drm_intel_bo *prog)
         break;
    }
 
-   drm_intel_bo_unmap(prog);
+   drm_intel_bo_unmap(brw->cache.bo);
 }
 
 
@@ -518,17 +516,19 @@ void brw_debug_batch(struct intel_context *intel)
    if (intel->gen < 6)
        state_struct_out("VS", intel->batch.bo, brw->vs.state_offset,
                        sizeof(struct brw_vs_unit_state));
-   brw_debug_prog("VS prog", brw->vs.prog_bo);
+   brw_debug_prog(brw, "VS prog", brw->vs.prog_offset);
 
    if (intel->gen < 6)
        state_struct_out("GS", intel->batch.bo, brw->gs.state_offset,
                        sizeof(struct brw_gs_unit_state));
-   brw_debug_prog("GS prog", brw->gs.prog_bo);
+   if (brw->gs.prog_active) {
+      brw_debug_prog(brw, "GS prog", brw->gs.prog_offset);
+   }
 
    if (intel->gen < 6) {
       state_struct_out("SF", intel->batch.bo, brw->sf.state_offset,
                       sizeof(struct brw_sf_unit_state));
-      brw_debug_prog("SF prog", brw->sf.prog_bo);
+      brw_debug_prog(brw, "SF prog", brw->sf.prog_offset);
    }
    if (intel->gen >= 7)
       dump_sf_clip_viewport_state(brw);
@@ -540,7 +540,7 @@ void brw_debug_batch(struct intel_context *intel)
    if (intel->gen < 6)
        state_struct_out("WM", intel->batch.bo, brw->wm.state_offset,
                        sizeof(struct brw_wm_unit_state));
-   brw_debug_prog("WM prog", brw->wm.prog_bo);
+   brw_debug_prog(brw, "WM prog", brw->wm.prog_offset);
 
    if (intel->gen >= 6) {
        dump_cc_viewport_state(brw);
index 6a4c112dcf51a97aaee9531c4054399cc18c7587..50ab490f339fcda744a9851e0bc8588794445cdf 100644 (file)
@@ -47,11 +47,11 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_check_fallback,
 
    &brw_wm_input_sizes,
-   &brw_vs_prog,
-   &brw_gs_prog, 
-   &brw_clip_prog, 
-   &brw_sf_prog,
-   &brw_wm_prog,
+   &brw_vs_prog, /* must do before GS prog, state base address. */
+   &brw_gs_prog, /* must do before state base address */
+   &brw_clip_prog, /* must do before state base address */
+   &brw_sf_prog, /* must do before state base address */
+   &brw_wm_prog, /* must do before state base address */
 
    /* Once all the programs are done, we know how large urb entry
     * sizes need to be and can decide if we need to change the urb
@@ -110,9 +110,9 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_check_fallback,
 
    &brw_wm_input_sizes,
-   &brw_vs_prog,
-   &brw_gs_prog,
-   &brw_wm_prog,
+   &brw_vs_prog, /* must do before state base address */
+   &brw_gs_prog, /* must do before state base address */
+   &brw_wm_prog, /* must do before state base address */
 
    &gen6_clip_vp,
    &gen6_sf_vp,
@@ -365,6 +365,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_PRIMITIVE),
    DEFINE_BIT(BRW_NEW_CONTEXT),
    DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    DEFINE_BIT(BRW_NEW_PSP),
    DEFINE_BIT(BRW_NEW_WM_SURFACES),
    DEFINE_BIT(BRW_NEW_INDICES),
index 80d5e78ed0bcbeefeb75ec8ba6db3c57dd163beb..a9ad5311fe3555a68cf5d42536cde70581bc667e 100644 (file)
@@ -105,12 +105,11 @@ static void do_vs_prog( struct brw_context *brw,
    /* constant_map */
    aux_size += c.vp->program.Base.Parameters->NumParameters;
 
-   drm_intel_bo_unreference(brw->vs.prog_bo);
-   brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG,
-                                     &c.key, sizeof(c.key),
-                                     program, program_size,
-                                     &c.prog_data, aux_size,
-                                     &brw->vs.prog_data);
+   brw_upload_cache(&brw->cache, BRW_VS_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, aux_size,
+                   &brw->vs.prog_offset, &brw->vs.prog_data);
    ralloc_free(mem_ctx);
 }
 
@@ -153,14 +152,11 @@ static void brw_upload_vs_prog(struct brw_context *brw)
       }
    }
 
-   /* Make an early check for the key.
-    */
-   drm_intel_bo_unreference(brw->vs.prog_bo);
-   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
-                                     &key, sizeof(key),
-                                     &brw->vs.prog_data);
-   if (brw->vs.prog_bo == NULL)
+   if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
+                        &key, sizeof(key),
+                        &brw->vs.prog_offset, &brw->vs.prog_data)) {
       do_vs_prog(brw, vp, &key);
+   }
    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
                           sizeof(*brw->vs.prog_data));
 }
index 1eee5b7e5de8e593f88149a7df826c8859215a72..185020c8164283d5fcf4759175fc3e93dda42c1e 100644 (file)
@@ -58,8 +58,14 @@ brw_prepare_vs_unit(struct brw_context *brw)
    vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset);
    memset(vs, 0, sizeof(*vs));
 
-   /* CACHE_NEW_VS_PROG */
-   vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
+   vs->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->vs.state_offset +
+                       offsetof(struct brw_vs_unit_state, thread0),
+                       brw->vs.prog_offset +
+                       (vs->thread0.grf_reg_count << 1)) >> 6;
+
    vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
    vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    /* Choosing multiple program flow means that we may get 2-vertex threads,
@@ -152,13 +158,6 @@ brw_prepare_vs_unit(struct brw_context *brw)
     */
    vs->vs6.vs_enable = 1;
 
-   /* Emit VS program relocation */
-   drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset +
-                                            offsetof(struct brw_vs_unit_state,
-                                                     thread0)),
-                          brw->vs.prog_bo, vs->thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
-
    brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
 }
 
@@ -166,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_CURBE_OFFSETS |
                 BRW_NEW_NR_VS_SURFACES |
                BRW_NEW_URB_FENCE),
index 236c4d297d1fed713dc576bd1382ae9848da2510..0f731482629a6783d63bc8e5513bcfc1c13d5cdd 100644 (file)
@@ -69,14 +69,8 @@ static void brw_destroy_context( struct intel_context *intel )
    ralloc_free(brw->wm.compile_data);
 
    dri_bo_release(&brw->curbe.curbe_bo);
-   dri_bo_release(&brw->vs.prog_bo);
    dri_bo_release(&brw->vs.const_bo);
-   dri_bo_release(&brw->gs.prog_bo);
-   dri_bo_release(&brw->clip.prog_bo);
-   dri_bo_release(&brw->sf.prog_bo);
-   dri_bo_release(&brw->wm.prog_bo);
    dri_bo_release(&brw->wm.const_bo);
-   dri_bo_release(&brw->cc.prog_bo);
 
    free(brw->curbe.last_buf);
    free(brw->curbe.next_buf);
@@ -125,6 +119,12 @@ static void brw_new_batch( struct intel_context *intel )
    brw->state.dirty.brw |= BRW_NEW_CONTEXT | BRW_NEW_BATCH;
 
    brw->vb.nr_current_buffers = 0;
+
+   /* Mark that the current program cache BO has been used by the GPU.
+    * It will be reallocated if we need to put new programs in for the
+    * next batch.
+    */
+   brw->cache.bo_used_by_gpu = true;
 }
 
 static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
index 1aebd12df496709ab36279918f19111e8c5fce6c..f1c9985290c1d8103a77767f6e97069c6c73bf60 100644 (file)
@@ -273,12 +273,11 @@ bool do_wm_prog(struct brw_context *brw,
     */
    program = brw_get_program(&c->func, &program_size);
 
-   drm_intel_bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG,
-                                     &c->key, sizeof(c->key),
-                                     program, program_size,
-                                     &c->prog_data, sizeof(c->prog_data),
-                                     &brw->wm.prog_data);
+   brw_upload_cache(&brw->cache, BRW_WM_PROG,
+                   &c->key, sizeof(c->key),
+                   program, program_size,
+                   &c->prog_data, sizeof(c->prog_data),
+                   &brw->wm.prog_offset, &brw->wm.prog_data);
 
    return true;
 }
@@ -477,13 +476,9 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
 
    brw_wm_populate_key(brw, &key);
 
-   /* Make an early check for the key.
-    */
-   drm_intel_bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
-                                     &key, sizeof(key),
-                                     &brw->wm.prog_data);
-   if (brw->wm.prog_bo == NULL) {
+   if (!brw_search_cache(&brw->cache, BRW_WM_PROG,
+                        &key, sizeof(key),
+                        &brw->wm.prog_offset, &brw->wm.prog_data)) {
       bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp,
                                &key);
       assert(success);
index ef98f8126dc7401134febbf518c434cbeaa839b0..506e2bdff5bf835bfe3b88e8c37d58f7fd5e21ad 100644 (file)
@@ -90,13 +90,25 @@ brw_prepare_wm_unit(struct brw_context *brw)
             brw->wm.prog_data->first_curbe_grf_16);
    }
 
-   /* CACHE_NEW_WM_PROG */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */
    wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
    wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
-   wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
-   /* reloc */
-   wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
-                                    brw->wm.prog_data->prog_offset_16) >> 6;
+
+   wm->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->wm.state_offset +
+                       offsetof(struct brw_wm_unit_state, thread0),
+                       brw->wm.prog_offset +
+                       (wm->thread0.grf_reg_count << 1)) >> 6;
+
+   wm->wm9.kernel_start_pointer_2 =
+      brw_program_reloc(brw,
+                       brw->wm.state_offset +
+                       offsetof(struct brw_wm_unit_state, wm9),
+                       brw->wm.prog_offset +
+                       brw->wm.prog_data->prog_offset_16 +
+                       (wm->wm9.grf_reg_count_2 << 1)) >> 6;
+
    wm->thread1.depth_coef_urb_read_offset = 1;
    wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -214,23 +226,6 @@ brw_prepare_wm_unit(struct brw_context *brw)
    if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm)
       wm->wm4.stats_enable = 1;
 
-   /* Emit WM program relocation */
-   drm_intel_bo_emit_reloc(intel->batch.bo,
-                          brw->wm.state_offset +
-                          offsetof(struct brw_wm_unit_state, thread0),
-                          brw->wm.prog_bo, wm->thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
-
-   if (brw->wm.prog_data->prog_offset_16) {
-      drm_intel_bo_emit_reloc(intel->batch.bo,
-                             brw->wm.state_offset +
-                             offsetof(struct brw_wm_unit_state, wm9),
-                             brw->wm.prog_bo,
-                             ((wm->wm9.grf_reg_count_2 << 1) +
-                              brw->wm.prog_data->prog_offset_16),
-                             I915_GEM_DOMAIN_INSTRUCTION, 0);
-   }
-
    /* Emit scratch space relocation */
    if (brw->wm.prog_data->total_scratch != 0) {
       drm_intel_bo_emit_reloc(intel->batch.bo,
@@ -265,6 +260,7 @@ const struct brw_tracked_state brw_wm_unit = {
               _NEW_BUFFERS),
 
       .brw = (BRW_NEW_BATCH |
+             BRW_NEW_PROGRAM_CACHE |
              BRW_NEW_FRAGMENT_PROGRAM |
              BRW_NEW_CURBE_OFFSETS |
              BRW_NEW_NR_WM_SURFACES),
index c1d0a7393944720e5a54031b58b385acc1f616cf..e73e7824afe6afebc05e1c281c7dbd2e6e081140 100644 (file)
@@ -45,7 +45,7 @@ upload_gs_state(struct brw_context *brw)
    ADVANCE_BATCH();
 
    // GS should never be used on Gen6.  Disable it.
-   assert(brw->gs.prog_bo == NULL);
+   assert(!brw->gs.prog_active);
    BEGIN_BATCH(7);
    OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
    OUT_BATCH(0); /* prog_bo */
index 62645a6a30f64c9824783a3ad1aea0cf0c66c852..b4105111c8c842dba4dfedbb763df03fc341f64a 100644 (file)
@@ -64,7 +64,7 @@ upload_urb(struct brw_context *brw)
    assert(brw->urb.nr_vs_entries % 4 == 0);
    assert(brw->urb.nr_gs_entries % 4 == 0);
    /* GS requirement */
-   assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+   assert(!brw->gs.prog_active || brw->urb.vs_size < 5);
 
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
index b46368e36e29539798dc23f3e3bdd85452391860..7838a91d8d06cb6903e3155a9c935abfcb5d9c8b 100644 (file)
@@ -147,7 +147,7 @@ upload_vs_state(struct brw_context *brw)
 
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
-   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(brw->vs.prog_offset);
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
             GEN6_VS_FLOATING_POINT_MODE_ALT |
             (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
index 43e651db3ef8fdb80c46036e39d6c959ae86fa26..024a1d879e7e9f7accd4e5f01f70b6098c888136 100644 (file)
@@ -183,7 +183,7 @@ upload_wm_state(struct brw_context *brw)
 
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
-   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
    if (brw->wm.prog_data->total_scratch) {
       OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
@@ -195,12 +195,8 @@ upload_wm_state(struct brw_context *brw)
    OUT_BATCH(dw5);
    OUT_BATCH(dw6);
    OUT_BATCH(0); /* kernel 1 pointer */
-   if (brw->wm.prog_data->prog_offset_16) {
-      OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-               brw->wm.prog_data->prog_offset_16);
-   } else {
-      OUT_BATCH(0); /* kernel 2 pointer */
-   }
+   /* kernel 2 pointer */
+   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
    ADVANCE_BATCH();
 }
 
index 4e9461739d0eadf0fc044887317d8d6193980034..a44d31596b9e785b43e4f6e09a205b84f1a0bed5 100644 (file)
@@ -31,7 +31,7 @@ disable_stages(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
 
-   assert(brw->gs.prog_bo == NULL);
+   assert(!brw->gs.prog_active);
 
    /* Disable the Geometry Shader (GS) Unit */
    BEGIN_BATCH(7);
index 3a614693dfc892fb9da2419b5b54deca4f657d9c..b36d780ed4a848893ab9dacd1ab3ab97bc7e49ac 100644 (file)
@@ -78,7 +78,7 @@ upload_urb(struct brw_context *brw)
    assert(brw->urb.nr_vs_entries % 8 == 0);
    assert(brw->urb.nr_gs_entries % 8 == 0);
    /* GS requirement */
-   assert(!brw->gs.prog_bo);
+   assert(brw->gs.prog_active);
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
index ae7a1d6c35cc9a4492acde265a2d3f4952b8bbce..0fad3d2fb683972ea12856561e7973107b1e1cb7 100644 (file)
@@ -67,7 +67,7 @@ upload_vs_state(struct brw_context *brw)
 
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
-   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(brw->vs.prog_offset);
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
             GEN6_VS_FLOATING_POINT_MODE_ALT |
             (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
index 6a64eb8a2d352bbb4d71bdf49378eefbebb67f41..ac6ba2fed1230ede4607fc7ab17f6f7e9279da3b 100644 (file)
@@ -227,18 +227,13 @@ upload_ps_state(struct brw_context *brw)
 
    BEGIN_BATCH(8);
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
-   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
    OUT_BATCH(0); /* scratch space base offset */
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */
-   if (brw->wm.prog_data->prog_offset_16) {
-      OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-               brw->wm.prog_data->prog_offset_16);
-   } else {
-      OUT_BATCH(0); /* kernel 2 pointer */
-   }
+   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
    ADVANCE_BATCH();
 }