i965/vec4: Make with_writemask() non-static.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_batch.c
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c

index 34be1012734d74cd0354fddfb50d7a130150b818..c71d2f301d25baf8fdbe80b9437ea96be1a6f704 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -29,86 +29,119 @@
    *   Keith Whitwell <keith@tungstengraphics.com>
    */
       
-
-
  #include "brw_state.h"
  #include "intel_batchbuffer.h"
-#include "imports.h"
-
-
-
-/* A facility similar to the data caching code above, which aims to
- * prevent identical commands being issued repeatedly.
- */
-GLboolean brw_cached_batch_struct( struct brw_context *brw,
-                                  const void *data,
-                                  GLuint sz )
+#include "main/imports.h"
+#include "glsl/ralloc.h"
+
+static void
+brw_track_state_batch(struct brw_context *brw,
+                     enum state_struct_type type,
+                     uint32_t offset,
+                     int size)
  {
-   struct brw_cached_batch_item *item = brw->cached_batch_items;
-   struct header *newheader = (struct header *)data;
-
-   if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
-      return GL_TRUE;
+   struct intel_batchbuffer *batch = &brw->batch;
+
+   if (!brw->state_batch_list) {
+      /* Our structs are always aligned to at least 32 bytes, so
+       * our array doesn't need to be any larger
+       */
+      brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) *
+                                         batch->bo->size / 32);
     }
  
-   while (item) {
-      if (item->header->opcode == newheader->opcode) {
-        if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
-           return GL_FALSE;
-        if (item->sz != sz) {
-           _mesa_free(item->header);
-           item->header = _mesa_malloc(sz);
-           item->sz = sz;
-        }
-        goto emit;
-      }
-      item = item->next;
-   }
-
-   assert(!item);
-   item = CALLOC_STRUCT(brw_cached_batch_item);
-   item->header = _mesa_malloc(sz);
-   item->sz = sz;
-   item->next = brw->cached_batch_items;
-   brw->cached_batch_items = item;
-
- emit:
-   memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
-   return GL_TRUE;
+   brw->state_batch_list[brw->state_batch_count].offset = offset;
+   brw->state_batch_list[brw->state_batch_count].size = size;
+   brw->state_batch_list[brw->state_batch_count].type = type;
+   brw->state_batch_count++;
  }
  
-static void clear_batch_cache( struct brw_context *brw )
+/**
+ * Convenience function to populate a single drm_intel_aub_annotation data
+ * structure.
+ */
+static inline void
+make_annotation(drm_intel_aub_annotation *annotation, uint32_t type,
+                uint32_t subtype, uint32_t ending_offset)
  {
-   struct brw_cached_batch_item *item = brw->cached_batch_items;
+   annotation->type = type;
+   annotation->subtype = subtype;
+   annotation->ending_offset = ending_offset;
+}
  
-   while (item) {
-      struct brw_cached_batch_item *next = item->next;
-      free((void *)item->header);
-      free(item);
-      item = next;
+/**
+ * Generate a set of aub file annotations for the current batch buffer, and
+ * deliver them to DRM.
+ *
+ * The "used" section of the batch buffer (the portion containing batch
+ * commands) is annotated with AUB_TRACE_TYPE_BATCH.  The remainder of the
+ * batch buffer (which contains data structures pointed to by batch commands)
+ * is annotated according to the type of each data structure.
+ */
+void
+brw_annotate_aub(struct brw_context *brw)
+{
+   unsigned annotation_count = 2 * brw->state_batch_count + 1;
+   drm_intel_aub_annotation annotations[annotation_count];
+   int a = 0;
+   make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0,
+                   4*brw->batch.used);
+   for (int i = brw->state_batch_count; i-- > 0; ) {
+      uint32_t type = brw->state_batch_list[i].type;
+      uint32_t start_offset = brw->state_batch_list[i].offset;
+      uint32_t end_offset = start_offset + brw->state_batch_list[i].size;
+      make_annotation(&annotations[a++], AUB_TRACE_TYPE_NOTYPE, 0,
+                      start_offset);
+      make_annotation(&annotations[a++], AUB_TRACE_TYPE(type),
+                      AUB_TRACE_SUBTYPE(type), end_offset);
     }
-
-   brw->cached_batch_items = NULL;
+   assert(a == annotation_count);
+   drm_intel_bufmgr_gem_set_aub_annotations(brw->batch.bo, annotations,
+                                            annotation_count);
  }
  
-void brw_clear_batch_cache_flush( struct brw_context *brw )
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver.  It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it.  So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+               enum state_struct_type type,
+               int size,
+               int alignment,
+               uint32_t *out_offset)
  {
-   clear_batch_cache(brw);
-
-   brw->wrap = 0;
-   
-/*    brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
-   
-   brw->state.dirty.mesa |= ~0;
-   brw->state.dirty.brw |= ~0;
-   brw->state.dirty.cache |= ~0;
-}
+   struct intel_batchbuffer *batch = &brw->batch;
+   uint32_t offset;
+
+   assert(size < batch->bo->size);
+   offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+
+   /* If allocating from the top would wrap below the batchbuffer, or
+    * if the batch's used space (plus the reserved pad) collides with our
+    * space, then flush and try again.
+    */
+   if (batch->state_batch_offset < size ||
+       offset < 4*batch->used + batch->reserved_space) {
+      intel_batchbuffer_flush(brw);
+      offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+   }
  
+   batch->state_batch_offset = offset;
  
+   if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB)))
+      brw_track_state_batch(brw, type, offset, size);
  
-void brw_destroy_batch_cache( struct brw_context *brw )
-{
-   clear_batch_cache(brw);
+   *out_offset = offset;
+   return batch->map + (offset>>2);
  }