printf("\n");
}
- /* Upload
- */
- drm_intel_bo_unreference(brw->clip.prog_bo);
- brw->clip.prog_bo = brw_upload_cache(&brw->cache,
- BRW_CLIP_PROG,
- &c.key, sizeof(c.key),
- program, program_size,
- &c.prog_data, sizeof(c.prog_data),
- &brw->clip.prog_data);
+ brw_upload_cache(&brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->clip.prog_offset, &brw->clip.prog_data);
ralloc_free(mem_ctx);
}
}
}
- drm_intel_bo_unreference(brw->clip.prog_bo);
- brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
- &key, sizeof(key),
- &brw->clip.prog_data);
- if (brw->clip.prog_bo == NULL)
+ if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+ &key, sizeof(key),
+ &brw->clip.prog_offset, &brw->clip.prog_data)) {
compile_clip_prog( brw, &key );
+ }
}
clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
memset(clip, 0, sizeof(*clip));
- /* CACHE_NEW_CLIP_PROG */
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
16 - 1);
- /* reloc */
- clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+ clip->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->clip.state_offset +
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_offset +
+ (clip->thread0.grf_reg_count << 1)) >> 6;
clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
clip->thread1.single_program_flow = 1;
clip->viewport_ymin = -1;
clip->viewport_ymax = 1;
- /* Emit clip program relocation */
- assert(brw->clip.prog_bo);
- drm_intel_bo_emit_reloc(intel->batch.bo,
- (brw->clip.state_offset +
- offsetof(struct brw_clip_unit_state, thread0)),
- brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
-
brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
}
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
BRW_STATE_NR_VS_SURFACES,
BRW_STATE_INDEX_BUFFER,
BRW_STATE_VS_CONSTBUF,
- BRW_STATE_WM_CONSTBUF
+ BRW_STATE_WM_CONSTBUF,
+ BRW_STATE_PROGRAM_CACHE,
};
#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE)
#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint key_size; /* for variable-sized keys */
const void *key;
- drm_intel_bo *bo;
+ uint32_t offset;
+ uint32_t size;
struct brw_cache_item *next;
};
struct brw_context *brw;
struct brw_cache_item **items;
+ drm_intel_bo *bo;
GLuint size, n_items;
- char *name[BRW_MAX_CACHE];
-
- /* Record of the last BOs chosen for each cache_id. Used to set
- * brw->state.dirty.cache when a new cache item is chosen.
- */
- drm_intel_bo *last_bo[BRW_MAX_CACHE];
+ uint32_t next_offset;
+ bool bo_used_by_gpu;
};
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
- drm_intel_bo *prog_bo;
drm_intel_bo *const_bo;
+ /** Offset in the program cache to the VS program */
+ uint32_t prog_offset;
uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
struct brw_gs_prog_data *prog_data;
GLboolean prog_active;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
uint32_t state_offset;
- drm_intel_bo *prog_bo;
} gs;
struct {
struct brw_clip_prog_data *prog_data;
- drm_intel_bo *prog_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
/* Offset in the batch to the CLIP state on pre-gen6. */
uint32_t state_offset;
struct {
struct brw_sf_prog_data *prog_data;
- drm_intel_bo *prog_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
uint32_t state_offset;
uint32_t vp_offset;
} sf;
GLuint sampler_count;
uint32_t sampler_offset;
+ /** Offset in the program cache to the WM program */
+ uint32_t prog_offset;
+
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_WM_MAX_SURF];
uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
- drm_intel_bo *prog_bo;
drm_intel_bo *const_bo; /* pull constant buffer. */
/**
* This is offset in the batch to the push constants on gen6.
struct {
- /* gen4 */
- drm_intel_bo *prog_bo;
-
uint32_t state_offset;
uint32_t blend_state_offset;
uint32_t depth_stencil_state_offset;
return ALIGN(reg_count, 16) / 16 - 1;
}
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+ uint32_t prog_offset)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 5) {
+ /* Using state base address. */
+ return prog_offset;
+ }
+
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ state_offset,
+ brw->cache.bo,
+ prog_offset,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+ return brw->cache.bo->offset + prog_offset;
+}
+
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
key.program_string_id = bfp->id;
- drm_intel_bo *old_prog_bo = brw->wm.prog_bo;
+ uint32_t old_prog_offset = brw->wm.prog_offset;
struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
- brw->wm.prog_bo = NULL;
bool success = do_wm_prog(brw, prog, bfp, &key);
- drm_intel_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = old_prog_bo;
+ brw->wm.prog_offset = old_prog_offset;
brw->wm.prog_data = old_prog_data;
return success;
printf("\n");
}
- /* Upload
- */
- drm_intel_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG,
- &c.key, sizeof(c.key),
- program, program_size,
- &c.prog_data, sizeof(c.prog_data),
- &brw->gs.prog_data);
+ brw_upload_cache(&brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->gs.prog_offset, &brw->gs.prog_data);
ralloc_free(mem_ctx);
}
brw->gs.prog_active = key.need_gs_prog;
}
- drm_intel_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = NULL;
-
if (brw->gs.prog_active) {
- brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
- &key, sizeof(key),
- &brw->gs.prog_data);
- if (brw->gs.prog_bo == NULL)
+ if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ &brw->gs.prog_offset, &brw->gs.prog_data)) {
compile_gs_prog( brw, &key );
+ }
}
}
memset(gs, 0, sizeof(*gs));
- /* CACHE_NEW_GS_PROG */
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */
if (brw->gs.prog_active) {
gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) /
16 - 1);
- /* reloc */
- gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+ gs->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->gs.state_offset +
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_offset +
+ (gs->thread0.grf_reg_count << 1)) >> 6;
gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
gs->thread1.single_program_flow = 1;
gs->thread4.max_threads = 1;
else
gs->thread4.max_threads = 0;
-
- /* Emit GS program relocation */
- drm_intel_bo_emit_reloc(intel->batch.bo,
- (brw->gs.state_offset +
- offsetof(struct brw_gs_unit_state, thread0)),
- brw->gs.prog_bo, gs->thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
}
if (intel->gen == 5)
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_GS_PROG
I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
- OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
+ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1); /* Instruction base address: shader kernels (incl. SIP) */
+
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Dynamic state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* Instruction base address */
+ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1); /* Instruction base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE),
.cache = 0,
},
.emit = upload_state_base_address
printf("\n");
}
- /* Upload
- */
- drm_intel_bo_unreference(brw->sf.prog_bo);
- brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG,
- &c.key, sizeof(c.key),
- program, program_size,
- &c.prog_data, sizeof(c.prog_data),
- &brw->sf.prog_data);
+ brw_upload_cache(&brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->sf.prog_offset, &brw->sf.prog_data);
ralloc_free(mem_ctx);
}
key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
}
- drm_intel_bo_unreference(brw->sf.prog_bo);
- brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
- &key, sizeof(key),
- &brw->sf.prog_data);
- if (brw->sf.prog_bo == NULL)
+ if (!brw_search_cache(&brw->cache, BRW_SF_PROG,
+ &key, sizeof(key),
+ &brw->sf.prog_offset, &brw->sf.prog_data)) {
compile_sf_prog( brw, &key );
+ }
}
memset(sf, 0, sizeof(*sf));
- /* CACHE_NEW_SF_PROG */
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */
sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
- sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+ sf->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->sf.state_offset +
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_offset +
+ (sf->thread0.grf_reg_count << 1)) >> 6;
sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* STATE_PREFETCH command description describes this state as being
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
*/
- /* Emit SF program relocation */
- drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
- offsetof(struct brw_sf_unit_state, thread0)),
- brw->sf.prog_bo, sf->thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
/* Emit SF viewport relocation */
drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
_NEW_SCISSOR |
_NEW_BUFFERS),
.brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
BRW_NEW_URB_FENCE),
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
* brw_state_cache.c
*/
-drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_sz,
- const void *data,
- GLuint data_sz,
- const void *aux,
- GLuint aux_sz,
- void *aux_return);
-
-drm_intel_bo *brw_search_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- void *aux_return);
+void brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ GLuint aux_sz,
+ uint32_t *out_offset, void *out_aux);
+
+bool brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ uint32_t *inout_offset, void *out_aux);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
*/
#include "main/imports.h"
+#include "intel_batchbuffer.h"
#include "brw_state.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
return hash;
}
-
-/**
- * Marks a new buffer as being chosen for the given cache id.
- */
-static void
-update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
- drm_intel_bo *bo)
-{
- if (bo == cache->last_bo[cache_id])
- return; /* no change */
-
- drm_intel_bo_unreference(cache->last_bo[cache_id]);
- cache->last_bo[cache_id] = bo;
- drm_intel_bo_reference(cache->last_bo[cache_id]);
- cache->brw->state.dirty.cache |= 1 << cache_id;
-}
-
static int
brw_cache_item_equals(const struct brw_cache_item *a,
const struct brw_cache_item *b)
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
-drm_intel_bo *
+bool
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key, GLuint key_size,
- void *aux_return)
+ uint32_t *inout_offset, void *out_aux)
{
+ struct brw_context *brw = cache->brw;
struct brw_cache_item *item;
struct brw_cache_item lookup;
GLuint hash;
item = search_cache(cache, hash, &lookup);
if (item == NULL)
- return NULL;
+ return false;
- if (aux_return)
- *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ *(void **)out_aux = ((char *)item->key + item->key_size);
- update_cache_last(cache, cache_id, item->bo);
+ if (item->offset != *inout_offset) {
+ brw->state.dirty.cache |= (1 << cache_id);
+ *inout_offset = item->offset;
+ }
- drm_intel_bo_reference(item->bo);
- return item->bo;
+ return true;
}
+static void
+brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
+{
+ struct brw_context *brw = cache->brw;
+ struct intel_context *intel = &brw->intel;
+ drm_intel_bo *new_bo;
+
+ new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64);
-drm_intel_bo *
+ /* Copy any existing data that needs to be saved. */
+ if (cache->next_offset != 0) {
+ drm_intel_bo_map(cache->bo, false);
+ drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual);
+ drm_intel_bo_unmap(cache->bo);
+ }
+
+ drm_intel_bo_unreference(cache->bo);
+ cache->bo = new_bo;
+ cache->bo_used_by_gpu = false;
+
+ /* Since we have a new BO in place, we need to signal the units
+ * that depend on it (state base address on gen5+, or unit state before).
+ */
+ brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
+}
+
+void
brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint data_size,
const void *aux,
GLuint aux_size,
- void *aux_return)
+ uint32_t *out_offset,
+ void *out_aux)
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
GLuint hash;
void *tmp;
- drm_intel_bo *bo;
item->cache_id = cache_id;
item->key = key;
hash = hash_key(item);
item->hash = hash;
- /* Create the buffer object to contain the data */
- bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr,
- cache->name[cache_id], data_size, 1 << 6);
+ /* Allocate space in the cache BO for our new program. */
+ if (cache->next_offset + data_size > cache->bo->size) {
+ uint32_t new_size = cache->bo->size * 2;
+
+ while (cache->next_offset + data_size > new_size)
+ new_size *= 2;
+
+ brw_cache_new_bo(cache, new_size);
+ }
+
+ /* If we would block on writing to an in-use program BO, just
+ * recreate it.
+ */
+ if (cache->bo_used_by_gpu) {
+ brw_cache_new_bo(cache, cache->bo->size);
+ }
+
+ item->offset = cache->next_offset;
+ item->size = data_size;
+ /* Programs are always 64-byte aligned, so set up the next one now */
+ cache->next_offset = ALIGN(item->offset + data_size, 64);
/* Set up the memory containing the key and aux_data */
tmp = malloc(key_size + aux_size);
item->key = tmp;
- item->bo = bo;
- drm_intel_bo_reference(bo);
-
if (cache->n_items > cache->size * 1.5)
rehash(cache);
cache->items[hash] = item;
cache->n_items++;
- if (aux_return) {
- *(void **)aux_return = (void *)((char *)item->key + item->key_size);
- }
-
- DBG("upload %s: %d bytes to cache id %d\n",
- cache->name[cache_id],
- data_size, cache_id);
-
/* Copy data to the buffer */
- drm_intel_bo_subdata(bo, 0, data_size, data);
-
- update_cache_last(cache, cache_id, bo);
+ drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
- return bo;
-}
-
-static void
-brw_init_cache_id(struct brw_cache *cache,
- const char *name,
- enum brw_cache_id id)
-{
- cache->name[id] = strdup(name);
+ *out_offset = item->offset;
+ *(void **)out_aux = (void *)((char *)item->key + item->key_size);
+ cache->brw->state.dirty.cache |= 1 << cache_id;
}
-
void
brw_init_caches(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct brw_cache *cache = &brw->cache;
cache->brw = brw;
cache->items = (struct brw_cache_item **)
calloc(1, cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
- brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
- brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
- brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
- brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
- brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
- brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
-
- brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
-
- brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
-
- brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
-
- brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
-
- brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
- brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP);
-
- brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
-
- brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
- brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
- brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE);
- brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE);
+ cache->bo = drm_intel_bo_alloc(intel->bufmgr,
+ "program cache",
+ 4096, 64);
}
static void
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
+ struct intel_context *intel = &brw->intel;
struct brw_cache_item *c, *next;
GLuint i;
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
- drm_intel_bo_unreference(c->bo);
free((void *)c->key);
free(c);
}
cache->n_items = 0;
+ /* Start putting programs into the start of the BO again, since
+ * we'll never find the old results.
+ */
+ cache->next_offset = 0;
+
+ /* We need to make sure that the programs get regenerated, since
+ * any offsets leftover in brw_context will no longer be valid.
+ */
brw->state.dirty.mesa |= ~0;
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
+ intel_batchbuffer_flush(intel);
}
void
static void
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
- GLuint i;
DBG("%s\n", __FUNCTION__);
brw_clear_cache(brw, cache);
- for (i = 0; i < BRW_MAX_CACHE; i++) {
- drm_intel_bo_unreference(cache->last_bo[i]);
- free(cache->name[i]);
- }
free(cache->items);
cache->items = NULL;
cache->size = 0;
}
-static void brw_debug_prog(const char *name, drm_intel_bo *prog)
+static void brw_debug_prog(struct brw_context *brw,
+ const char *name, uint32_t prog_offset)
{
unsigned int i;
uint32_t *data;
- if (prog == NULL)
- return;
-
- drm_intel_bo_map(prog, GL_FALSE);
+ drm_intel_bo_map(brw->cache.bo, false);
- data = prog->virtual;
+ data = brw->cache.bo->virtual + prog_offset;
- for (i = 0; i < prog->size / 4 / 4; i++) {
+ for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) {
fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
- name, (unsigned int)prog->offset + i * 4 * 4,
+ name, (unsigned int)brw->cache.bo->offset + i * 4 * 4,
data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
/* Stop at the end of the program. It'd be nice to keep track of the actual
* intended program size instead of guessing like this.
break;
}
- drm_intel_bo_unmap(prog);
+ drm_intel_bo_unmap(brw->cache.bo);
}
if (intel->gen < 6)
state_struct_out("VS", intel->batch.bo, brw->vs.state_offset,
sizeof(struct brw_vs_unit_state));
- brw_debug_prog("VS prog", brw->vs.prog_bo);
+ brw_debug_prog(brw, "VS prog", brw->vs.prog_offset);
if (intel->gen < 6)
state_struct_out("GS", intel->batch.bo, brw->gs.state_offset,
sizeof(struct brw_gs_unit_state));
- brw_debug_prog("GS prog", brw->gs.prog_bo);
+ if (brw->gs.prog_active) {
+ brw_debug_prog(brw, "GS prog", brw->gs.prog_offset);
+ }
if (intel->gen < 6) {
state_struct_out("SF", intel->batch.bo, brw->sf.state_offset,
sizeof(struct brw_sf_unit_state));
- brw_debug_prog("SF prog", brw->sf.prog_bo);
+ brw_debug_prog(brw, "SF prog", brw->sf.prog_offset);
}
if (intel->gen >= 7)
dump_sf_clip_viewport_state(brw);
if (intel->gen < 6)
state_struct_out("WM", intel->batch.bo, brw->wm.state_offset,
sizeof(struct brw_wm_unit_state));
- brw_debug_prog("WM prog", brw->wm.prog_bo);
+ brw_debug_prog(brw, "WM prog", brw->wm.prog_offset);
if (intel->gen >= 6) {
dump_cc_viewport_state(brw);
&brw_check_fallback,
&brw_wm_input_sizes,
- &brw_vs_prog,
- &brw_gs_prog,
- &brw_clip_prog,
- &brw_sf_prog,
- &brw_wm_prog,
+ &brw_vs_prog, /* must do before GS prog, state base address. */
+ &brw_gs_prog, /* must do before state base address */
+ &brw_clip_prog, /* must do before state base address */
+ &brw_sf_prog, /* must do before state base address */
+ &brw_wm_prog, /* must do before state base address */
/* Once all the programs are done, we know how large urb entry
* sizes need to be and can decide if we need to change the urb
&brw_check_fallback,
&brw_wm_input_sizes,
- &brw_vs_prog,
- &brw_gs_prog,
- &brw_wm_prog,
+ &brw_vs_prog, /* must do before state base address */
+ &brw_gs_prog, /* must do before state base address */
+ &brw_wm_prog, /* must do before state base address */
&gen6_clip_vp,
&gen6_sf_vp,
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_WM_SURFACES),
DEFINE_BIT(BRW_NEW_INDICES),
/* constant_map */
aux_size += c.vp->program.Base.Parameters->NumParameters;
- drm_intel_bo_unreference(brw->vs.prog_bo);
- brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG,
- &c.key, sizeof(c.key),
- program, program_size,
- &c.prog_data, aux_size,
- &brw->vs.prog_data);
+ brw_upload_cache(&brw->cache, BRW_VS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, aux_size,
+ &brw->vs.prog_offset, &brw->vs.prog_data);
ralloc_free(mem_ctx);
}
}
}
- /* Make an early check for the key.
- */
- drm_intel_bo_unreference(brw->vs.prog_bo);
- brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
- &key, sizeof(key),
- &brw->vs.prog_data);
- if (brw->vs.prog_bo == NULL)
+ if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
+ &key, sizeof(key),
+ &brw->vs.prog_offset, &brw->vs.prog_data)) {
do_vs_prog(brw, vp, &key);
+ }
brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
sizeof(*brw->vs.prog_data));
}
vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset);
memset(vs, 0, sizeof(*vs));
- /* CACHE_NEW_VS_PROG */
- vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
+ vs->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->vs.state_offset +
+ offsetof(struct brw_vs_unit_state, thread0),
+ brw->vs.prog_offset +
+ (vs->thread0.grf_reg_count << 1)) >> 6;
+
vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* Choosing multiple program flow means that we may get 2-vertex threads,
*/
vs->vs6.vs_enable = 1;
- /* Emit VS program relocation */
- drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset +
- offsetof(struct brw_vs_unit_state,
- thread0)),
- brw->vs.prog_bo, vs->thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
-
brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
}
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
ralloc_free(brw->wm.compile_data);
dri_bo_release(&brw->curbe.curbe_bo);
- dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.const_bo);
- dri_bo_release(&brw->gs.prog_bo);
- dri_bo_release(&brw->clip.prog_bo);
- dri_bo_release(&brw->sf.prog_bo);
- dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.const_bo);
- dri_bo_release(&brw->cc.prog_bo);
free(brw->curbe.last_buf);
free(brw->curbe.next_buf);
brw->state.dirty.brw |= BRW_NEW_CONTEXT | BRW_NEW_BATCH;
brw->vb.nr_current_buffers = 0;
+
+ /* Mark that the current program cache BO has been used by the GPU.
+ * It will be reallocated if we need to put new programs in for the
+ * next batch.
+ */
+ brw->cache.bo_used_by_gpu = true;
}
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
*/
program = brw_get_program(&c->func, &program_size);
- drm_intel_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG,
- &c->key, sizeof(c->key),
- program, program_size,
- &c->prog_data, sizeof(c->prog_data),
- &brw->wm.prog_data);
+ brw_upload_cache(&brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ program, program_size,
+ &c->prog_data, sizeof(c->prog_data),
+ &brw->wm.prog_offset, &brw->wm.prog_data);
return true;
}
brw_wm_populate_key(brw, &key);
- /* Make an early check for the key.
- */
- drm_intel_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
- &key, sizeof(key),
- &brw->wm.prog_data);
- if (brw->wm.prog_bo == NULL) {
+ if (!brw_search_cache(&brw->cache, BRW_WM_PROG,
+ &key, sizeof(key),
+ &brw->wm.prog_offset, &brw->wm.prog_data)) {
bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp,
&key);
assert(success);
brw->wm.prog_data->first_curbe_grf_16);
}
- /* CACHE_NEW_WM_PROG */
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */
wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
- wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
- /* reloc */
- wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
- brw->wm.prog_data->prog_offset_16) >> 6;
+
+ wm->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_offset +
+ (wm->thread0.grf_reg_count << 1)) >> 6;
+
+ wm->wm9.kernel_start_pointer_2 =
+ brw_program_reloc(brw,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, wm9),
+ brw->wm.prog_offset +
+ brw->wm.prog_data->prog_offset_16 +
+ (wm->wm9.grf_reg_count_2 << 1)) >> 6;
+
wm->thread1.depth_coef_urb_read_offset = 1;
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm)
wm->wm4.stats_enable = 1;
- /* Emit WM program relocation */
- drm_intel_bo_emit_reloc(intel->batch.bo,
- brw->wm.state_offset +
- offsetof(struct brw_wm_unit_state, thread0),
- brw->wm.prog_bo, wm->thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
-
- if (brw->wm.prog_data->prog_offset_16) {
- drm_intel_bo_emit_reloc(intel->batch.bo,
- brw->wm.state_offset +
- offsetof(struct brw_wm_unit_state, wm9),
- brw->wm.prog_bo,
- ((wm->wm9.grf_reg_count_2 << 1) +
- brw->wm.prog_data->prog_offset_16),
- I915_GEM_DOMAIN_INSTRUCTION, 0);
- }
-
/* Emit scratch space relocation */
if (brw->wm.prog_data->total_scratch != 0) {
drm_intel_bo_emit_reloc(intel->batch.bo,
_NEW_BUFFERS),
.brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_WM_SURFACES),
ADVANCE_BATCH();
// GS should never be used on Gen6. Disable it.
- assert(brw->gs.prog_bo == NULL);
+ assert(!brw->gs.prog_active);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0); /* prog_bo */
assert(brw->urb.nr_vs_entries % 4 == 0);
assert(brw->urb.nr_gs_entries % 4 == 0);
/* GS requirement */
- assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+ assert(!brw->gs.prog_active || brw->urb.vs_size < 5);
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
- OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(brw->vs.prog_offset);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
- OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(brw->wm.prog_offset);
OUT_BATCH(dw2);
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* kernel 1 pointer */
- if (brw->wm.prog_data->prog_offset_16) {
- OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
- brw->wm.prog_data->prog_offset_16);
- } else {
- OUT_BATCH(0); /* kernel 2 pointer */
- }
+ /* kernel 2 pointer */
+ OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
ADVANCE_BATCH();
}
{
struct intel_context *intel = &brw->intel;
- assert(brw->gs.prog_bo == NULL);
+ assert(!brw->gs.prog_active);
/* Disable the Geometry Shader (GS) Unit */
BEGIN_BATCH(7);
assert(brw->urb.nr_vs_entries % 8 == 0);
assert(brw->urb.nr_gs_entries % 8 == 0);
/* GS requirement */
- assert(!brw->gs.prog_bo);
+ assert(brw->gs.prog_active);
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
- OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(brw->vs.prog_offset);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
- OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(brw->wm.prog_offset);
OUT_BATCH(dw2);
OUT_BATCH(0); /* scratch space base offset */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(0); /* kernel 1 pointer */
- if (brw->wm.prog_data->prog_offset_16) {
- OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
- brw->wm.prog_data->prog_offset_16);
- } else {
- OUT_BATCH(0); /* kernel 2 pointer */
- }
+ OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
ADVANCE_BATCH();
}