* Keith Whitwell <keith@tungstengraphics.com>
*/
-
-
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "main/imports.h"
+#include "glsl/ralloc.h"
-
-
-/* A facility similar to the data caching code above, which aims to
- * prevent identical commands being issued repeatedly.
- */
-GLboolean brw_cached_batch_struct( struct brw_context *brw,
- const void *data,
- GLuint sz )
+static void
+brw_track_state_batch(struct brw_context *brw,
+ enum state_struct_type type,
+ uint32_t offset,
+ int size)
{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
- struct header *newheader = (struct header *)data;
-
- if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz);
- return GL_TRUE;
- }
+ struct intel_batchbuffer *batch = &brw->intel.batch;
- while (item) {
- if (item->header->opcode == newheader->opcode) {
- if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
- return GL_FALSE;
- if (item->sz != sz) {
- _mesa_free(item->header);
- item->header = _mesa_malloc(sz);
- item->sz = sz;
- }
- goto emit;
- }
- item = item->next;
+ if (!brw->state_batch_list) {
+ /* Our structs are always aligned to at least 32 bytes, so
+ * our array doesn't need to be any larger
+ */
+ brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) *
+ batch->bo->size / 32);
}
- assert(!item);
- item = CALLOC_STRUCT(brw_cached_batch_item);
- item->header = _mesa_malloc(sz);
- item->sz = sz;
- item->next = brw->cached_batch_items;
- brw->cached_batch_items = item;
-
- emit:
- memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz);
- return GL_TRUE;
+ brw->state_batch_list[brw->state_batch_count].offset = offset;
+ brw->state_batch_list[brw->state_batch_count].size = size;
+ brw->state_batch_list[brw->state_batch_count].type = type;
+ brw->state_batch_count++;
}
-void brw_clear_batch_cache( struct brw_context *brw )
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver. It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it. So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+ enum state_struct_type type,
+ int size,
+ int alignment,
+ uint32_t *out_offset)
{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct intel_batchbuffer *batch = &brw->intel.batch;
+ uint32_t offset;
+
+ assert(size < batch->bo->size);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
- while (item) {
- struct brw_cached_batch_item *next = item->next;
- free((void *)item->header);
- free(item);
- item = next;
+ /* If allocating from the top would wrap below the batchbuffer, or
+ * if the batch's used space (plus the reserved pad) collides with our
+ * space, then flush and try again.
+ */
+ if (batch->state_batch_offset < size ||
+ offset < 4*batch->used + batch->reserved_space) {
+ intel_batchbuffer_flush(&brw->intel);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
}
- brw->cached_batch_items = NULL;
-}
+ batch->state_batch_offset = offset;
-void brw_destroy_batch_cache( struct brw_context *brw )
-{
- brw_clear_batch_cache(brw);
+ if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
+ brw_track_state_batch(brw, type, offset, size);
+
+ *out_offset = offset;
+ return batch->map + (offset>>2);
}