Merge remote branch 'vdpau/pipe-video' into pipe-video

[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_batch.c
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c

index ed8120d617fe9ff1f374cfef5965f8f58fc6b1b2..a21af13caa328b3c9388407bc3488e98a08fbdff 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
     struct header *newheader = (struct header *)data;
  
     if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz);
+      intel_batchbuffer_data(brw->intel.batch, data, sz, false);
        return GL_TRUE;
     }
  
@@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
          if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
             return GL_FALSE;
          if (item->sz != sz) {
-           _mesa_free(item->header);
-           item->header = _mesa_malloc(sz);
+           free(item->header);
+           item->header = malloc(sz);
             item->sz = sz;
          }
          goto emit;
@@ -68,14 +68,14 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
  
     assert(!item);
     item = CALLOC_STRUCT(brw_cached_batch_item);
-   item->header = _mesa_malloc(sz);
+   item->header = malloc(sz);
     item->sz = sz;
     item->next = brw->cached_batch_items;
     brw->cached_batch_items = item;
  
   emit:
     memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz);
+   intel_batchbuffer_data(brw->intel.batch, data, sz, false);
     return GL_TRUE;
  }
  
@@ -97,3 +97,52 @@ void brw_destroy_batch_cache( struct brw_context *brw )
  {
     brw_clear_batch_cache(brw);
  }
+
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver.  It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it.  So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+               int size,
+               int alignment,
+               drm_intel_bo **out_bo,
+               uint32_t *out_offset)
+{
+   struct intel_batchbuffer *batch = brw->intel.batch;
+   uint32_t offset;
+
+   assert(size < batch->buf->size);
+   offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+
+   /* If allocating from the top would wrap below the batchbuffer, or
+    * if the batch's used space (plus the reserved pad) collides with our
+    * space, then flush and try again.
+    */
+   if (batch->state_batch_offset < size ||
+       offset < batch->ptr - batch->map + batch->reserved_space) {
+      intel_batchbuffer_flush(batch);
+      offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+   }
+
+   batch->state_batch_offset = offset;
+
+   if (*out_bo != batch->buf) {
+      drm_intel_bo_unreference(*out_bo);
+      drm_intel_bo_reference(batch->buf);
+      *out_bo = batch->buf;
+   }
+
+   *out_offset = offset;
+   return batch->map + offset;
+}