Merge branch 'gallium-polygon-stipple'
[mesa.git] / src / gallium / drivers / i915 / i915_prim_vbuf.c
index 6b832140a87c3e82f159e1410b74960ed9da7343..d8ae1de29632d7c23da9230ddd04626dc52a6123 100644 (file)
@@ -41,7 +41,7 @@
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "util/u_debug.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_fifo.h"
@@ -52,8 +52,7 @@
 #include "i915_state.h"
 
 
-#undef VBUF_USE_FIFO
-#undef VBUF_MAP_BUFFER
+#define VBUF_MAP_BUFFER
 
 /**
  * Primitive renderer for i915.
@@ -76,26 +75,21 @@ struct i915_vbuf_render {
    unsigned fallback;
 
    /* Stuff for the vbo */
-   struct intel_buffer *vbo;
+   struct i915_winsys_buffer *vbo;
    size_t vbo_size; /**< current size of allocated buffer */
    size_t vbo_alloc_size; /**< minimum buffer size to allocate */
-   size_t vbo_offset;
+   size_t vbo_hw_offset; /**< offset that we program the hardware with */
+   size_t vbo_sw_offset; /**< offset that we work with */
+   size_t vbo_index; /**< index offset to be added to all indices */
    void *vbo_ptr;
    size_t vbo_max_used;
+   size_t vbo_max_index; /**< index offset to be added to all indices */
 
 #ifndef VBUF_MAP_BUFFER
    size_t map_used_start;
    size_t map_used_end;
    size_t map_size;
 #endif
-
-#ifdef VBUF_USE_FIFO
-   /* Stuff for the pool */
-   struct util_fifo *pool_fifo;
-   unsigned pool_used;
-   unsigned pool_buffer_size;
-   boolean pool_not_used;
-#endif
 };
 
 
@@ -109,6 +103,35 @@ i915_vbuf_render(struct vbuf_render *render)
    return (struct i915_vbuf_render *)render;
 }
 
+/**
+ * If vbo state differs between renderer and context
+ * push state to the context. This function pushes
+ * hw_offset to i915->vbo_offset and vbo to i915->vbo.
+ *
+ * Side effects:
+ *    May updates context vbo_offset and vbo fields.
+ */
+static void
+i915_vbuf_update_vbo_state(struct vbuf_render *render)
+{
+   struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+   struct i915_context *i915 = i915_render->i915;
+
+   if (i915->vbo != i915_render->vbo ||
+       i915->vbo_offset != i915_render->vbo_hw_offset) {
+      i915->vbo = i915_render->vbo;
+      i915->vbo_offset = i915_render->vbo_hw_offset;
+      i915->dirty |= I915_NEW_VBO;
+   }
+}
+
+/**
+ * Callback exported to the draw module.
+ * Returns the current vertex_info.
+ *
+ * Side effects:
+ *    If state is dirty update derived state.
+ */
 static const struct vertex_info *
 i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
 {
@@ -123,12 +146,18 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
    return &i915->current.vertex_info;
 }
 
+/**
+ * Reserve space in the vbo for vertices.
+ *
+ * Side effects:
+ *    None.
+ */
 static boolean
 i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
 {
    struct i915_context *i915 = i915_render->i915;
 
-   if (i915_render->vbo_size < size + i915_render->vbo_offset)
+   if (i915_render->vbo_size < size + i915_render->vbo_sw_offset)
       return FALSE;
 
    if (i915->vbo_flushed)
@@ -137,28 +166,38 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
    return TRUE;
 }
 
+/**
+ * Allocate a new vbo buffer should there not be enough space for
+ * the requested number of vertices by the draw module.
+ *
+ * Side effects:
+ *    Updates hw_offset, sw_offset, index and allocates a new buffer.
+ *    Will set i915->vbo to null on buffer allocation.
+ */
 static void
 i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
 {
    struct i915_context *i915 = i915_render->i915;
-   struct intel_winsys *iws = i915->iws;
+   struct i915_winsys *iws = i915->iws;
 
    if (i915_render->vbo) {
-#ifdef VBUF_USE_FIFO
-      if (i915_render->pool_not_used)
-         iws->buffer_destroy(iws, i915_render->vbo);
-      else
-         u_fifo_add(i915_render->pool_fifo, i915_render->vbo);
-      i915_render->vbo = NULL;
-#else
+      iws->buffer_unmap(iws, i915_render->vbo);
       iws->buffer_destroy(iws, i915_render->vbo);
-#endif
+      /*
+       * XXX If buffers where referenced then this should be done in
+       * update_vbo_state but since they arn't and malloc likes to reuse
+       * memory we need to set it to null
+       */
+      i915->vbo = NULL;
+      i915_render->vbo = NULL;
    }
 
    i915->vbo_flushed = 0;
 
    i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
-   i915_render->vbo_offset = 0;
+   i915_render->vbo_hw_offset = 0;
+   i915_render->vbo_sw_offset = 0;
+   i915_render->vbo_index = 0;
 
 #ifndef VBUF_MAP_BUFFER
    if (i915_render->vbo_size > i915_render->map_size) {
@@ -168,52 +207,52 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
    }
 #endif
 
-#ifdef VBUF_USE_FIFO
-   if (i915_render->vbo_size != i915_render->pool_buffer_size) {
-      i915_render->pool_not_used = TRUE;
-      i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
-            INTEL_NEW_VERTEX);
-   } else {
-      i915_render->pool_not_used = FALSE;
-
-      if (i915_render->pool_used >= 2) {
-         FLUSH_BATCH(NULL);
-         i915->vbo_flushed = 0;
-         i915_render->pool_used = 0;
-      }
-      u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo);
-   }
-#else
    i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size,
-                                         64, INTEL_NEW_VERTEX);
-#endif
+                                         I915_NEW_VERTEX);
+   i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
 }
 
+/**
+ * Callback exported to the draw module.
+ *
+ * Side effects:
+ *    Updates hw_offset, sw_offset, index and may allocate
+ *    a new buffer. Also updates may update the vbo state
+ *    on the i915 context.
+ */
 static boolean
 i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
                                    ushort vertex_size,
                                    ushort nr_vertices)
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
-   struct i915_context *i915 = i915_render->i915;
    size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+   size_t offset;
 
-   /* FIXME: handle failure */
-   assert(!i915->vbo);
+   /*
+    * Align sw_offset with first multiple of vertex size from hw_offset.
+    * Set index to be the multiples from from hw_offset to sw_offset.
+    * i915_vbuf_render_new_buf will reset index, sw_offset, hw_offset
+    * when it allocates a new buffer this is correct.
+    */
+   {
+      offset = i915_render->vbo_sw_offset - i915_render->vbo_hw_offset;
+      offset = util_align_npot(offset, vertex_size);
+      i915_render->vbo_sw_offset = i915_render->vbo_hw_offset + offset;
+      i915_render->vbo_index = offset / vertex_size;
+   }
 
-   if (!i915_vbuf_render_reserve(i915_render, size)) {
-#ifdef VBUF_USE_FIFO
-      /* incase we flushed reset the number of pool buffers used */
-      if (i915->vbo_flushed)
-         i915_render->pool_used = 0;
-#endif
+   if (!i915_vbuf_render_reserve(i915_render, size))
       i915_vbuf_render_new_buf(i915_render, size);
-   }
+
+   /*
+    * If a new buffer has been alocated sw_offset,
+    * hw_offset & index will be reset by new_buf
+    */
 
    i915_render->vertex_size = vertex_size;
-   i915->vbo = i915_render->vbo;
-   i915->vbo_offset = i915_render->vbo_offset;
-   i915->dirty |= I915_NEW_VBO;
+
+   i915_vbuf_update_vbo_state(render);
 
    if (!i915_render->vbo)
       return FALSE;
@@ -225,16 +264,13 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render)
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
-   struct intel_winsys *iws = i915->iws;
 
    if (i915->vbo_flushed)
       debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__);
 
 #ifdef VBUF_MAP_BUFFER
-   i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
-   return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset;
+   return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset;
 #else
-   (void)iws;
    return (unsigned char *)i915_render->vbo_ptr;
 #endif
 }
@@ -246,22 +282,46 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render,
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
-   struct intel_winsys *iws = i915->iws;
+   struct i915_winsys *iws = i915->iws;
 
+   i915_render->vbo_max_index = max_index;
    i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1));
 #ifdef VBUF_MAP_BUFFER
-   iws->buffer_unmap(iws, i915_render->vbo);
+   (void)iws;
 #else
    i915_render->map_used_start = i915_render->vertex_size * min_index;
    i915_render->map_used_end = i915_render->vertex_size * (max_index + 1);
    iws->buffer_write(iws, i915_render->vbo,
-                     i915_render->map_used_start + i915_render->vbo_offset,
+                     i915_render->map_used_start + i915_render->vbo_sw_offset,
                      i915_render->map_used_end - i915_render->map_used_start,
                      (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start);
 
 #endif
 }
 
+/**
+ * Ensure that the given max_index given is not larger ushort max.
+ * If it is larger then ushort max it advanced the hw_offset to the
+ * same position in the vbo as sw_offset and set index to zero.
+ *
+ * Side effects:
+ *    On failure update hw_offset and index.
+ */
+static void
+i915_vbuf_ensure_index_bounds(struct vbuf_render *render,
+                              unsigned max_index)
+{
+   struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+
+   if (max_index + i915_render->vbo_index < ((1 << 17) - 1))
+      return;
+
+   i915_render->vbo_hw_offset = i915_render->vbo_sw_offset;
+   i915_render->vbo_index = 0;
+
+   i915_vbuf_update_vbo_state(render);
+}
+
 static boolean
 i915_vbuf_render_set_primitive(struct vbuf_render *render, 
                                unsigned prim)
@@ -327,7 +387,9 @@ draw_arrays_generate_indices(struct vbuf_render *render,
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
    unsigned i;
-   unsigned end = start + nr;
+   unsigned end = start + nr + i915_render->vbo_index;
+   start += i915_render->vbo_index;
+
    switch(type) {
    case 0:
       for (i = start; i+1 < end; i += 2)
@@ -391,30 +453,32 @@ draw_arrays_fallback(struct vbuf_render *render,
    struct i915_context *i915 = i915_render->i915;
    unsigned nr_indices;
 
+   nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback);
+   if (!nr_indices)
+      return;
+
+   i915_vbuf_ensure_index_bounds(render, start + nr_indices);
+
    if (i915->dirty)
       i915_update_derived(i915);
 
    if (i915->hardware_dirty)
       i915_emit_hardware_state(i915);
 
-   nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback);
-   if (!nr_indices)
-      return;
-
-   if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+   if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
       FLUSH_BATCH(NULL);
 
       /* Make sure state is re-emitted after a flush:
        */
-      i915_update_derived(i915);
       i915_emit_hardware_state(i915);
       i915->vbo_flushed = 1;
 
-      if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+      if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
          assert(0);
          goto out;
       }
    }
+
    OUT_BATCH(_3DPRIMITIVE |
              PRIM_INDIRECT |
              i915_render->hwprim |
@@ -423,6 +487,7 @@ draw_arrays_fallback(struct vbuf_render *render,
 
    draw_arrays_generate_indices(render, start, nr, i915_render->fallback);
 
+   i915_flush_heuristically(i915, nr_indices);
 out:
    return;
 }
@@ -440,22 +505,24 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
       return;
    }
 
+   i915_vbuf_ensure_index_bounds(render, start + nr);
+   start += i915_render->vbo_index;
+
    if (i915->dirty)
       i915_update_derived(i915);
 
    if (i915->hardware_dirty)
       i915_emit_hardware_state(i915);
 
-   if (!BEGIN_BATCH(2, 0)) {
+   if (!BEGIN_BATCH(2)) {
       FLUSH_BATCH(NULL);
 
       /* Make sure state is re-emitted after a flush:
        */
-      i915_update_derived(i915);
       i915_emit_hardware_state(i915);
       i915->vbo_flushed = 1;
 
-      if (!BEGIN_BATCH(2, 0)) {
+      if (!BEGIN_BATCH(2)) {
          assert(0);
          goto out;
       }
@@ -468,6 +535,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
              nr);
    OUT_BATCH(start); /* Beginning vertex index */
 
+   i915_flush_heuristically(i915, nr);
 out:
    return;
 }
@@ -485,35 +553,36 @@ draw_generate_indices(struct vbuf_render *render,
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
    unsigned i;
+   unsigned o = i915_render->vbo_index;
 
    switch(type) {
    case 0:
       for (i = 0; i + 1 < nr_indices; i += 2) {
-         OUT_BATCH(indices[i] | indices[i+1] << 16);
+         OUT_BATCH((o+indices[i]) | (o+indices[i+1]) << 16);
       }
       if (i < nr_indices) {
-         OUT_BATCH(indices[i]);
+         OUT_BATCH((o+indices[i]));
       }
       break;
    case PIPE_PRIM_LINE_LOOP:
       if (nr_indices >= 2) {
          for (i = 1; i < nr_indices; i++)
-            OUT_BATCH(indices[i-1] | indices[i] << 16);
-         OUT_BATCH(indices[i-1] | indices[0] << 16);
+            OUT_BATCH((o+indices[i-1]) | (o+indices[i]) << 16);
+         OUT_BATCH((o+indices[i-1]) | (o+indices[0]) << 16);
       }
       break;
    case PIPE_PRIM_QUADS:
       for (i = 0; i + 3 < nr_indices; i += 4) {
-         OUT_BATCH(indices[i+0] | indices[i+1] << 16);
-         OUT_BATCH(indices[i+3] | indices[i+1] << 16);
-         OUT_BATCH(indices[i+2] | indices[i+3] << 16);
+         OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16);
+         OUT_BATCH((o+indices[i+3]) | (o+indices[i+1]) << 16);
+         OUT_BATCH((o+indices[i+2]) | (o+indices[i+3]) << 16);
       }
       break;
    case PIPE_PRIM_QUAD_STRIP:
       for (i = 0; i + 3 < nr_indices; i += 2) {
-         OUT_BATCH(indices[i+0] | indices[i+1] << 16);
-         OUT_BATCH(indices[i+3] | indices[i+2] << 16);
-         OUT_BATCH(indices[i+0] | indices[i+3] << 16);
+         OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16);
+         OUT_BATCH((o+indices[i+3]) | (o+indices[i+2]) << 16);
+         OUT_BATCH((o+indices[i+0]) | (o+indices[i+3]) << 16);
       }
       break;
    default:
@@ -544,9 +613,9 @@ draw_calc_nr_indices(uint nr_indices, unsigned type)
 }
 
 static void 
-i915_vbuf_render_draw(struct vbuf_render *render,
-                      const ushort *indices,
-                      uint nr_indices)
+i915_vbuf_render_draw_elements(struct vbuf_render *render,
+                               const ushort *indices,
+                               uint nr_indices)
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
@@ -558,22 +627,23 @@ i915_vbuf_render_draw(struct vbuf_render *render,
    if (!nr_indices)
       return;
 
+   i915_vbuf_ensure_index_bounds(render, i915_render->vbo_max_index);
+
    if (i915->dirty)
       i915_update_derived(i915);
 
    if (i915->hardware_dirty)
       i915_emit_hardware_state(i915);
 
-   if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+   if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
       FLUSH_BATCH(NULL);
 
       /* Make sure state is re-emitted after a flush: 
        */
-      i915_update_derived(i915);
       i915_emit_hardware_state(i915);
       i915->vbo_flushed = 1;
 
-      if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+      if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
          assert(0);
          goto out;
       }
@@ -589,6 +659,7 @@ i915_vbuf_render_draw(struct vbuf_render *render,
                          save_nr_indices,
                          i915_render->fallback);
 
+   i915_flush_heuristically(i915, nr_indices);
 out:
    return;
 }
@@ -597,20 +668,30 @@ static void
 i915_vbuf_render_release_vertices(struct vbuf_render *render)
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
-   struct i915_context *i915 = i915_render->i915;
-
-   assert(i915->vbo);
 
-   i915_render->vbo_offset += i915_render->vbo_max_used;
+   i915_render->vbo_sw_offset += i915_render->vbo_max_used;
    i915_render->vbo_max_used = 0;
-   i915->vbo = NULL;
-   i915->dirty |= I915_NEW_VBO;
+
+   /*
+    * Micro optimization, by calling update here we the offset change
+    * will be picked up on the next pipe_context::draw_*.
+    */
+   i915_vbuf_update_vbo_state(render);
 }
 
 static void
 i915_vbuf_render_destroy(struct vbuf_render *render)
 {
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+   struct i915_context *i915 = i915_render->i915;
+   struct i915_winsys *iws = i915->iws;
+
+   if (i915_render->vbo) {
+      i915->vbo = NULL;
+      iws->buffer_unmap(iws, i915_render->vbo);
+      iws->buffer_destroy(iws, i915_render->vbo);
+   }
+
    FREE(i915_render);
 }
 
@@ -621,7 +702,7 @@ static struct vbuf_render *
 i915_vbuf_render_create(struct i915_context *i915)
 {
    struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
-   struct intel_winsys *iws = i915->iws;
+   struct i915_winsys *iws = i915->iws;
    int i;
 
    i915_render->i915 = i915;
@@ -638,7 +719,7 @@ i915_vbuf_render_create(struct i915_context *i915)
    i915_render->base.map_vertices = i915_vbuf_render_map_vertices;
    i915_render->base.unmap_vertices = i915_vbuf_render_unmap_vertices;
    i915_render->base.set_primitive = i915_vbuf_render_set_primitive;
-   i915_render->base.draw = i915_vbuf_render_draw;
+   i915_render->base.draw_elements = i915_vbuf_render_draw_elements;
    i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays;
    i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
    i915_render->base.destroy = i915_vbuf_render_destroy;
@@ -652,7 +733,8 @@ i915_vbuf_render_create(struct i915_context *i915)
    i915_render->vbo = NULL;
    i915_render->vbo_ptr = NULL;
    i915_render->vbo_size = 0;
-   i915_render->vbo_offset = 0;
+   i915_render->vbo_hw_offset = 0;
+   i915_render->vbo_sw_offset = 0;
    i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4;
 
 #ifdef VBUF_USE_POOL
@@ -661,8 +743,8 @@ i915_vbuf_render_create(struct i915_context *i915)
    i915_render->pool_fifo = u_fifo_create(6);
    for (i = 0; i < 6; i++)
       u_fifo_add(i915_render->pool_fifo,
-                 iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
-                                    INTEL_NEW_VERTEX));
+                 iws->buffer_create(iws, i915_render->pool_buffer_size,
+                                    I915_NEW_VERTEX));
 #else
    (void)i;
    (void)iws;