Remove stale comment about glFlush().
[mesa.git] / src / mesa / drivers / dri / intel / intel_batchbuffer.c
index 8ee48b5a689fd21b6c2fb10a10599fd31e33409e..803ff5e90ee382c96e9d477ecd99c90cde2b81c0 100644 (file)
@@ -28,7 +28,7 @@
 #include "intel_batchbuffer.h"
 #include "intel_ioctl.h"
 #include "intel_decode.h"
-#include "i915_debug.h"
+#include "intel_reg.h"
 
 /* Relocations in kernel space:
  *    - pass dma buffer seperately
@@ -78,13 +78,26 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
       batch->buf = NULL;
    }
 
-   batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer",
-                            intel->intelScreen->maxBatchSize, 4096,
-                            DRM_BO_FLAG_MEM_TT);
-   dri_bo_map(batch->buf, GL_TRUE);
-   batch->map = batch->buf->virtual;
-   batch->size = intel->intelScreen->maxBatchSize;
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
+   batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
+                            intel->maxBatchSize, 4096,
+                            DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      dri_bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
+   batch->size = intel->maxBatchSize;
    batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+
+   /* account batchbuffer in aperture */
+   dri_bufmgr_check_aperture_space(batch->buf);
+
 }
 
 struct intel_batchbuffer *
@@ -93,7 +106,6 @@ intel_batchbuffer_alloc(struct intel_context *intel)
    struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
 
    batch->intel = intel;
-   batch->last_fence = NULL;
    intel_batchbuffer_reset(batch);
 
    return batch;
@@ -102,129 +114,68 @@ intel_batchbuffer_alloc(struct intel_context *intel)
 void
 intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
-   if (batch->last_fence) {
-      dri_fence_wait(batch->last_fence);
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = NULL;
-   }
-   if (batch->map) {
-      dri_bo_unmap(batch->buf);
-      batch->map = NULL;
+   if (batch->buffer)
+      free (batch->buffer);
+   else {
+      if (batch->map) {
+        dri_bo_unmap(batch->buf);
+        batch->map = NULL;
+      }
    }
    dri_bo_unreference(batch->buf);
    batch->buf = NULL;
    free(batch);
 }
 
-static int
-relocation_sort(const void *a_in, const void *b_in) {
-   const struct buffer_reloc *a = a_in, *b = b_in;
-
-   return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1;
-}
 
 
 /* TODO: Push this whole function into bufmgr.
  */
 static void
 do_flush_locked(struct intel_batchbuffer *batch,
-                GLuint used,
-                GLboolean ignore_cliprects, GLboolean allow_unlock)
+               GLuint used, GLboolean allow_unlock)
 {
-   GLuint *ptr;
-   GLuint i;
    struct intel_context *intel = batch->intel;
-   dri_fence *fo;
-   GLboolean performed_rendering = GL_FALSE;
-
-   assert(batch->buf->virtual != NULL);
-   ptr = batch->buf->virtual;
-
-   /* Sort our relocation list in terms of referenced buffer pointer.
-    * This lets us uniquely validate the buffers with the sum of all the flags,
-    * while avoiding O(n^2) on number of relocations.
-    */
-   qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]),
-        relocation_sort);
+   int ret = 0;
 
-   /* Perform the necessary validations of buffers, and enter the relocations
-    * in the batchbuffer.
-    */
-   for (i = 0; i < batch->nr_relocs; i++) {
-      struct buffer_reloc *r = &batch->reloc[i];
-
-      if (r->validate_flags & DRM_BO_FLAG_WRITE)
-        performed_rendering = GL_TRUE;
-
-      /* If this is the first time we've seen this buffer in the relocation
-       * list, figure out our flags and validate it.
-       */
-      if (i == 0 || batch->reloc[i - 1].buf != r->buf) {
-        uint32_t validate_flags;
-        int j, ret;
-
-        /* Accumulate the flags we need for validating this buffer. */
-        validate_flags = r->validate_flags;
-        for (j = i + 1; j < batch->nr_relocs; j++) {
-           if (batch->reloc[j].buf != r->buf)
-              break;
-           validate_flags |= batch->reloc[j].validate_flags;
-        }
-
-        /* Validate.  If we fail, fence to clear the unfenced list and bail
-         * out.
-         */
-        ret = dri_bo_validate(r->buf, validate_flags);
-        if (ret != 0) {
-           dri_bo_unmap(batch->buf);
-           fo = dri_fence_validated(intel->intelScreen->bufmgr,
-                                    "batchbuffer failure fence", GL_TRUE);
-           dri_fence_unreference(fo);
-           goto done;
-        }
-      }
-      ptr[r->offset / 4] = r->buf->offset + r->delta;
-      dri_bo_unreference(r->buf);
-   }
+   if (batch->buffer)
+      dri_bo_subdata (batch->buf, 0, used, batch->buffer);
+   else
+      dri_bo_unmap(batch->buf);
 
-   dri_bo_unmap(batch->buf);
    batch->map = NULL;
    batch->ptr = NULL;
 
-   dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
-
-   batch->list_count = 0;
-   batch->nr_relocs = 0;
-   batch->flags = 0;
-
    /* Throw away non-effective packets.  Won't work once we have
     * hardware contexts which would preserve statechanges beyond a
     * single buffer.
     */
 
-   if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
-      intel_batch_ioctl(batch->intel,
-                        batch->buf->offset,
-                        used, ignore_cliprects, allow_unlock);
+   if (!(intel->numClipRects == 0 &&
+        batch->cliprect_mode == LOOP_CLIPRECTS)) {
+      if (intel->ttm == GL_TRUE) {
+        struct drm_i915_gem_execbuffer *execbuf;
+
+        execbuf = dri_process_relocs(batch->buf);
+        ret = intel_exec_ioctl(batch->intel,
+                               used,
+                               batch->cliprect_mode != LOOP_CLIPRECTS,
+                               allow_unlock,
+                               execbuf);
+      } else {
+        dri_process_relocs(batch->buf);
+        ret = intel_batch_ioctl(batch->intel,
+                                batch->buf->offset,
+                                used,
+                                batch->cliprect_mode != LOOP_CLIPRECTS,
+                                allow_unlock);
+      }
    }
 
-   /* Associate a fence with the validated buffers, and note that we included
-    * a flush at the end.
-    */
-   fo = dri_fence_validated(intel->intelScreen->bufmgr,
-                           "Batch fence", GL_TRUE);
-
-   if (performed_rendering) {
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = fo;
-   } else {
-     /* If we didn't validate any buffers for writing by the card, we don't
-      * need to track the fence for glFinish().
-      */
-      dri_fence_unreference(fo);
-   }
+   dri_post_submit(batch->buf);
 
-   if (intel->numClipRects == 0 && !ignore_cliprects) {
+   if (intel->numClipRects == 0 &&
+       batch->cliprect_mode == LOOP_CLIPRECTS) {
       if (allow_unlock) {
         /* If we are not doing any actual user-visible rendering,
          * do a sched_yield to keep the app from pegging the cpu while
@@ -234,20 +185,28 @@ do_flush_locked(struct intel_batchbuffer *batch,
          sched_yield();
          LOCK_HARDWARE(intel);
       }
-      intel->vtbl.lost_hardware(intel);
    }
 
-done:
    if (INTEL_DEBUG & DEBUG_BATCH) {
       dri_bo_map(batch->buf, GL_FALSE);
-      intel_decode(ptr, used / 4, batch->buf->offset);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+                  intel->intelScreen->deviceID);
       dri_bo_unmap(batch->buf);
+
+      if (intel->vtbl.debug_batch != NULL)
+        intel->vtbl.debug_batch(intel);
    }
-}
 
+   if (ret != 0) {
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+   intel->vtbl.new_batch(intel);
+}
 
 void
-intel_batchbuffer_flush(struct intel_batchbuffer *batch)
+_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
+                        int line)
 {
    struct intel_context *intel = batch->intel;
    GLuint used = batch->ptr - batch->map;
@@ -256,76 +215,101 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
    if (used == 0)
       return;
 
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = 0;
-      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
-      used += 12;
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+             used);
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (!intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
    }
-   else {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
-      used += 8;
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
    }
 
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
+   /* Workaround for recursive batchbuffer flushing: If the window is
+    * moved, we can get into a case where we try to flush during a
+    * flush.  What happens is that when we try to grab the lock for
+    * the first flush, we detect that the window moved which then
+    * causes another flush (from the intel_draw_buffer() call in
+    * intelUpdatePageFlipping()).  To work around this we reset the
+    * batchbuffer tail pointer before trying to get the lock.  This
+    * prevent the nested buffer flush, but a better fix would be to
+    * avoid that in the first place. */
+   batch->ptr = batch->map;
+
    /* TODO: Just pass the relocation list and dma buffer up to the
     * kernel.
     */
    if (!was_locked)
       LOCK_HARDWARE(intel);
 
-   do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
-                  GL_FALSE);
+   do_flush_locked(batch, used, GL_FALSE);
 
    if (!was_locked)
       UNLOCK_HARDWARE(intel);
 
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      int irq;
+
+      fprintf(stderr, "waiting for idle\n");
+      LOCK_HARDWARE(intel);
+      irq = intelEmitIrqLocked(intel);
+      UNLOCK_HARDWARE(intel);
+      intelWaitIrq(intel, irq);
+   }
+
    /* Reset the buffer:
     */
    intel_batchbuffer_reset(batch);
 }
 
-void
-intel_batchbuffer_finish(struct intel_batchbuffer *batch)
-{
-   intel_batchbuffer_flush(batch);
-   if (batch->last_fence != NULL)
-      dri_fence_wait(batch->last_fence);
-}
-
 
 /*  This is the only way buffers get added to the validate list.
  */
 GLboolean
 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                              dri_bo *buffer,
-                             GLuint flags, GLuint delta)
+                             uint32_t read_domains, uint32_t write_domain,
+                            uint32_t delta)
 {
-   struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
-
-   assert(batch->nr_relocs <= MAX_RELOCS);
-
-   dri_bo_reference(buffer);
-   r->buf = buffer;
-   r->offset = batch->ptr - batch->map;
-   r->delta = delta;
-   r->validate_flags = flags;
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+                 batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = dri_emit_reloc(batch->buf, read_domains, write_domain,
+                       delta, batch->ptr - batch->map, buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
 
-   batch->ptr += 4;
    return GL_TRUE;
 }
 
-
-
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes, GLuint flags)
+                       const void *data, GLuint bytes,
+                      enum cliprect_mode cliprect_mode)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
+   intel_batchbuffer_require_space(batch, bytes, cliprect_mode);
    __memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
 }