* Keith Whitwell <keith@tungstengraphics.com>
*/
-
-
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "imports.h"
-
-
-
-/* A facility similar to the data caching code above, which aims to
- * prevent identical commands being issued repeatedly.
- */
-GLboolean brw_cached_batch_struct( struct brw_context *brw,
- const void *data,
- GLuint sz )
+#include "main/imports.h"
+#include "glsl/ralloc.h"
+
+static void
+brw_track_state_batch(struct brw_context *brw,
+ enum state_struct_type type,
+ uint32_t offset,
+ int size)
{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
- struct header *newheader = (struct header *)data;
-
- if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
- return GL_TRUE;
+ struct intel_batchbuffer *batch = &brw->batch;
+
+ if (!brw->state_batch_list) {
+ /* Our structs are always aligned to at least 32 bytes, so
+ * our array doesn't need to be any larger
+ */
+ brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) *
+ batch->bo->size / 32);
}
- while (item) {
- if (item->header->opcode == newheader->opcode) {
- if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
- return GL_FALSE;
- if (item->sz != sz) {
- _mesa_free(item->header);
- item->header = _mesa_malloc(sz);
- item->sz = sz;
- }
- goto emit;
- }
- item = item->next;
- }
-
- assert(!item);
- item = CALLOC_STRUCT(brw_cached_batch_item);
- item->header = _mesa_malloc(sz);
- item->sz = sz;
- item->next = brw->cached_batch_items;
- brw->cached_batch_items = item;
-
- emit:
- memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
- return GL_TRUE;
+ brw->state_batch_list[brw->state_batch_count].offset = offset;
+ brw->state_batch_list[brw->state_batch_count].size = size;
+ brw->state_batch_list[brw->state_batch_count].type = type;
+ brw->state_batch_count++;
}
-static void clear_batch_cache( struct brw_context *brw )
+/**
+ * Convenience function to populate a single drm_intel_aub_annotation data
+ * structure.
+ */
+static inline void
+make_annotation(drm_intel_aub_annotation *annotation, uint32_t type,
+ uint32_t subtype, uint32_t ending_offset)
{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
+ annotation->type = type;
+ annotation->subtype = subtype;
+ annotation->ending_offset = ending_offset;
+}
- while (item) {
- struct brw_cached_batch_item *next = item->next;
- free((void *)item->header);
- free(item);
- item = next;
+/**
+ * Generate a set of aub file annotations for the current batch buffer, and
+ * deliver them to DRM.
+ *
+ * The "used" section of the batch buffer (the portion containing batch
+ * commands) is annotated with AUB_TRACE_TYPE_BATCH. The remainder of the
+ * batch buffer (which contains data structures pointed to by batch commands)
+ * is annotated according to the type of each data structure.
+ */
+void
+brw_annotate_aub(struct brw_context *brw)
+{
+ unsigned annotation_count = 2 * brw->state_batch_count + 1;
+ drm_intel_aub_annotation annotations[annotation_count];
+ int a = 0;
+ make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0,
+ 4*brw->batch.used);
+ for (int i = brw->state_batch_count; i-- > 0; ) {
+ uint32_t type = brw->state_batch_list[i].type;
+ uint32_t start_offset = brw->state_batch_list[i].offset;
+ uint32_t end_offset = start_offset + brw->state_batch_list[i].size;
+ make_annotation(&annotations[a++], AUB_TRACE_TYPE_NOTYPE, 0,
+ start_offset);
+ make_annotation(&annotations[a++], AUB_TRACE_TYPE(type),
+ AUB_TRACE_SUBTYPE(type), end_offset);
}
-
- brw->cached_batch_items = NULL;
+ assert(a == annotation_count);
+ drm_intel_bufmgr_gem_set_aub_annotations(brw->batch.bo, annotations,
+ annotation_count);
}
-void brw_clear_batch_cache_flush( struct brw_context *brw )
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver. It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it. So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+ enum state_struct_type type,
+ int size,
+ int alignment,
+ uint32_t *out_offset)
{
- clear_batch_cache(brw);
-
- brw->wrap = 0;
-
-/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
-
- brw->state.dirty.mesa |= ~0;
- brw->state.dirty.brw |= ~0;
- brw->state.dirty.cache |= ~0;
-}
+ struct intel_batchbuffer *batch = &brw->batch;
+ uint32_t offset;
+
+ assert(size < batch->bo->size);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+
+ /* If allocating from the top would wrap below the batchbuffer, or
+ * if the batch's used space (plus the reserved pad) collides with our
+ * space, then flush and try again.
+ */
+ if (batch->state_batch_offset < size ||
+ offset < 4*batch->used + batch->reserved_space) {
+ intel_batchbuffer_flush(brw);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+ }
+ batch->state_batch_offset = offset;
+ if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB)))
+ brw_track_state_batch(brw, type, offset, size);
-void brw_destroy_batch_cache( struct brw_context *brw )
-{
- clear_batch_cache(brw);
+ *out_offset = offset;
+ return batch->map + (offset>>2);
}