Remove stale comment about glFlush().

[mesa.git] / src / mesa / drivers / dri / intel / intel_batchbuffer.c
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c

index 8ee48b5a689fd21b6c2fb10a10599fd31e33409e..803ff5e90ee382c96e9d477ecd99c90cde2b81c0 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -28,7 +28,7 @@
  #include "intel_batchbuffer.h"
  #include "intel_ioctl.h"
  #include "intel_decode.h"
-#include "i915_debug.h"
+#include "intel_reg.h"
  
  /* Relocations in kernel space:
   *    - pass dma buffer seperately
@@ -78,13 +78,26 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
        batch->buf = NULL;
     }
  
-   batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer",
-                            intel->intelScreen->maxBatchSize, 4096,
-                            DRM_BO_FLAG_MEM_TT);
-   dri_bo_map(batch->buf, GL_TRUE);
-   batch->map = batch->buf->virtual;
-   batch->size = intel->intelScreen->maxBatchSize;
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
+   batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
+                            intel->maxBatchSize, 4096,
+                            DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      dri_bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
+   batch->size = intel->maxBatchSize;
     batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+
+   /* account batchbuffer in aperture */
+   dri_bufmgr_check_aperture_space(batch->buf);
+
  }
  
  struct intel_batchbuffer *
@@ -93,7 +106,6 @@ intel_batchbuffer_alloc(struct intel_context *intel)
     struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
  
     batch->intel = intel;
-   batch->last_fence = NULL;
     intel_batchbuffer_reset(batch);
  
     return batch;
@@ -102,129 +114,68 @@ intel_batchbuffer_alloc(struct intel_context *intel)
  void
  intel_batchbuffer_free(struct intel_batchbuffer *batch)
  {
-   if (batch->last_fence) {
-      dri_fence_wait(batch->last_fence);
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = NULL;
-   }
-   if (batch->map) {
-      dri_bo_unmap(batch->buf);
-      batch->map = NULL;
+   if (batch->buffer)
+      free (batch->buffer);
+   else {
+      if (batch->map) {
+        dri_bo_unmap(batch->buf);
+        batch->map = NULL;
+      }
     }
     dri_bo_unreference(batch->buf);
     batch->buf = NULL;
     free(batch);
  }
  
-static int
-relocation_sort(const void *a_in, const void *b_in) {
-   const struct buffer_reloc *a = a_in, *b = b_in;
-
-   return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1;
-}
  
  
  /* TODO: Push this whole function into bufmgr.
   */
  static void
  do_flush_locked(struct intel_batchbuffer *batch,
-                GLuint used,
-                GLboolean ignore_cliprects, GLboolean allow_unlock)
+               GLuint used, GLboolean allow_unlock)
  {
-   GLuint *ptr;
-   GLuint i;
     struct intel_context *intel = batch->intel;
-   dri_fence *fo;
-   GLboolean performed_rendering = GL_FALSE;
-
-   assert(batch->buf->virtual != NULL);
-   ptr = batch->buf->virtual;
-
-   /* Sort our relocation list in terms of referenced buffer pointer.
-    * This lets us uniquely validate the buffers with the sum of all the flags,
-    * while avoiding O(n^2) on number of relocations.
-    */
-   qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]),
-        relocation_sort);
+   int ret = 0;
  
-   /* Perform the necessary validations of buffers, and enter the relocations
-    * in the batchbuffer.
-    */
-   for (i = 0; i < batch->nr_relocs; i++) {
-      struct buffer_reloc *r = &batch->reloc[i];
-
-      if (r->validate_flags & DRM_BO_FLAG_WRITE)
-        performed_rendering = GL_TRUE;
-
-      /* If this is the first time we've seen this buffer in the relocation
-       * list, figure out our flags and validate it.
-       */
-      if (i == 0 || batch->reloc[i - 1].buf != r->buf) {
-        uint32_t validate_flags;
-        int j, ret;
-
-        /* Accumulate the flags we need for validating this buffer. */
-        validate_flags = r->validate_flags;
-        for (j = i + 1; j < batch->nr_relocs; j++) {
-           if (batch->reloc[j].buf != r->buf)
-              break;
-           validate_flags |= batch->reloc[j].validate_flags;
-        }
-
-        /* Validate.  If we fail, fence to clear the unfenced list and bail
-         * out.
-         */
-        ret = dri_bo_validate(r->buf, validate_flags);
-        if (ret != 0) {
-           dri_bo_unmap(batch->buf);
-           fo = dri_fence_validated(intel->intelScreen->bufmgr,
-                                    "batchbuffer failure fence", GL_TRUE);
-           dri_fence_unreference(fo);
-           goto done;
-        }
-      }
-      ptr[r->offset / 4] = r->buf->offset + r->delta;
-      dri_bo_unreference(r->buf);
-   }
+   if (batch->buffer)
+      dri_bo_subdata (batch->buf, 0, used, batch->buffer);
+   else
+      dri_bo_unmap(batch->buf);
  
-   dri_bo_unmap(batch->buf);
     batch->map = NULL;
     batch->ptr = NULL;
  
-   dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
-
-   batch->list_count = 0;
-   batch->nr_relocs = 0;
-   batch->flags = 0;
-
     /* Throw away non-effective packets.  Won't work once we have
      * hardware contexts which would preserve statechanges beyond a
      * single buffer.
      */
  
-   if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
-      intel_batch_ioctl(batch->intel,
-                        batch->buf->offset,
-                        used, ignore_cliprects, allow_unlock);
+   if (!(intel->numClipRects == 0 &&
+        batch->cliprect_mode == LOOP_CLIPRECTS)) {
+      if (intel->ttm == GL_TRUE) {
+        struct drm_i915_gem_execbuffer *execbuf;
+
+        execbuf = dri_process_relocs(batch->buf);
+        ret = intel_exec_ioctl(batch->intel,
+                               used,
+                               batch->cliprect_mode != LOOP_CLIPRECTS,
+                               allow_unlock,
+                               execbuf);
+      } else {
+        dri_process_relocs(batch->buf);
+        ret = intel_batch_ioctl(batch->intel,
+                                batch->buf->offset,
+                                used,
+                                batch->cliprect_mode != LOOP_CLIPRECTS,
+                                allow_unlock);
+      }
     }
  
-   /* Associate a fence with the validated buffers, and note that we included
-    * a flush at the end.
-    */
-   fo = dri_fence_validated(intel->intelScreen->bufmgr,
-                           "Batch fence", GL_TRUE);
-
-   if (performed_rendering) {
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = fo;
-   } else {
-     /* If we didn't validate any buffers for writing by the card, we don't
-      * need to track the fence for glFinish().
-      */
-      dri_fence_unreference(fo);
-   }
+   dri_post_submit(batch->buf);
  
-   if (intel->numClipRects == 0 && !ignore_cliprects) {
+   if (intel->numClipRects == 0 &&
+       batch->cliprect_mode == LOOP_CLIPRECTS) {
        if (allow_unlock) {
          /* If we are not doing any actual user-visible rendering,
           * do a sched_yield to keep the app from pegging the cpu while
@@ -234,20 +185,28 @@ do_flush_locked(struct intel_batchbuffer *batch,
           sched_yield();
           LOCK_HARDWARE(intel);
        }
-      intel->vtbl.lost_hardware(intel);
     }
  
-done:
     if (INTEL_DEBUG & DEBUG_BATCH) {
        dri_bo_map(batch->buf, GL_FALSE);
-      intel_decode(ptr, used / 4, batch->buf->offset);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+                  intel->intelScreen->deviceID);
        dri_bo_unmap(batch->buf);
+
+      if (intel->vtbl.debug_batch != NULL)
+        intel->vtbl.debug_batch(intel);
     }
-}
  
+   if (ret != 0) {
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+   intel->vtbl.new_batch(intel);
+}
  
  void
-intel_batchbuffer_flush(struct intel_batchbuffer *batch)
+_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
+                        int line)
  {
     struct intel_context *intel = batch->intel;
     GLuint used = batch->ptr - batch->map;
@@ -256,76 +215,101 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
     if (used == 0)
        return;
  
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = 0;
-      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
-      used += 12;
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+             used);
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (!intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
     }
-   else {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
-      used += 8;
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
     }
  
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
+   /* Workaround for recursive batchbuffer flushing: If the window is
+    * moved, we can get into a case where we try to flush during a
+    * flush.  What happens is that when we try to grab the lock for
+    * the first flush, we detect that the window moved which then
+    * causes another flush (from the intel_draw_buffer() call in
+    * intelUpdatePageFlipping()).  To work around this we reset the
+    * batchbuffer tail pointer before trying to get the lock.  This
+    * prevent the nested buffer flush, but a better fix would be to
+    * avoid that in the first place. */
+   batch->ptr = batch->map;
+
     /* TODO: Just pass the relocation list and dma buffer up to the
      * kernel.
      */
     if (!was_locked)
        LOCK_HARDWARE(intel);
  
-   do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
-                  GL_FALSE);
+   do_flush_locked(batch, used, GL_FALSE);
  
     if (!was_locked)
        UNLOCK_HARDWARE(intel);
  
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      int irq;
+
+      fprintf(stderr, "waiting for idle\n");
+      LOCK_HARDWARE(intel);
+      irq = intelEmitIrqLocked(intel);
+      UNLOCK_HARDWARE(intel);
+      intelWaitIrq(intel, irq);
+   }
+
     /* Reset the buffer:
      */
     intel_batchbuffer_reset(batch);
  }
  
-void
-intel_batchbuffer_finish(struct intel_batchbuffer *batch)
-{
-   intel_batchbuffer_flush(batch);
-   if (batch->last_fence != NULL)
-      dri_fence_wait(batch->last_fence);
-}
-
  
  /*  This is the only way buffers get added to the validate list.
   */
  GLboolean
  intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                               dri_bo *buffer,
-                             GLuint flags, GLuint delta)
+                             uint32_t read_domains, uint32_t write_domain,
+                            uint32_t delta)
  {
-   struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
-
-   assert(batch->nr_relocs <= MAX_RELOCS);
-
-   dri_bo_reference(buffer);
-   r->buf = buffer;
-   r->offset = batch->ptr - batch->map;
-   r->delta = delta;
-   r->validate_flags = flags;
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+                 batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = dri_emit_reloc(batch->buf, read_domains, write_domain,
+                       delta, batch->ptr - batch->map, buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
  
-   batch->ptr += 4;
     return GL_TRUE;
  }
  
-
-
  void
  intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes, GLuint flags)
+                       const void *data, GLuint bytes,
+                      enum cliprect_mode cliprect_mode)
  {
     assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
+   intel_batchbuffer_require_space(batch, bytes, cliprect_mode);
     __memcpy(batch->ptr, data, bytes);
     batch->ptr += bytes;
  }