nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

[mesa.git] / src / gallium / drivers / ilo / ilo_3d.c
diff --git a/src/gallium/drivers/ilo/ilo_3d.c b/src/gallium/drivers/ilo/ilo_3d.c

index 52a47de5bd9fe5406b406892448a16777501515e..90063761d16e4e631f51932fe5f2966ed2aaf34d 100644 (file)
--- a/src/gallium/drivers/ilo/ilo_3d.c
+++ b/src/gallium/drivers/ilo/ilo_3d.c
@@ -25,9 +25,11 @@
   *    Chia-I Wu <olv@lunarg.com>
   */
  
+#include "util/u_prim.h"
  #include "intel_winsys.h"
  
  #include "ilo_3d_pipeline.h"
+#include "ilo_blit.h"
  #include "ilo_context.h"
  #include "ilo_cp.h"
  #include "ilo_query.h"
@@ -35,6 +37,186 @@
  #include "ilo_state.h"
  #include "ilo_3d.h"
  
+static void
+process_query_for_occlusion_counter(struct ilo_3d *hw3d,
+                                    struct ilo_query *q)
+{
+   uint64_t *vals, depth_count = 0;
+   int i;
+
+   /* in pairs */
+   assert(q->reg_read % 2 == 0);
+
+   vals = intel_bo_map(q->bo, false);
+   for (i = 1; i < q->reg_read; i += 2)
+      depth_count += vals[i] - vals[i - 1];
+   intel_bo_unmap(q->bo);
+
+   /* accumulate so that the query can be resumed if wanted */
+   q->data.u64 += depth_count;
+   q->reg_read = 0;
+}
+
+static uint64_t
+timestamp_to_ns(uint64_t timestamp)
+{
+   /* see ilo_get_timestamp() */
+   return (timestamp & 0xffffffff) * 80;
+}
+
+static void
+process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q)
+{
+   uint64_t *vals, timestamp;
+
+   assert(q->reg_read == 1);
+
+   vals = intel_bo_map(q->bo, false);
+   timestamp = vals[0];
+   intel_bo_unmap(q->bo);
+
+   q->data.u64 = timestamp_to_ns(timestamp);
+   q->reg_read = 0;
+}
+
+static void
+process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
+{
+   uint64_t *vals, elapsed = 0;
+   int i;
+
+   /* in pairs */
+   assert(q->reg_read % 2 == 0);
+
+   vals = intel_bo_map(q->bo, false);
+
+   for (i = 1; i < q->reg_read; i += 2)
+      elapsed += vals[i] - vals[i - 1];
+
+   intel_bo_unmap(q->bo);
+
+   /* accumulate so that the query can be resumed if wanted */
+   q->data.u64 += timestamp_to_ns(elapsed);
+   q->reg_read = 0;
+}
+
+static void
+process_query_for_pipeline_statistics(struct ilo_3d *hw3d,
+                                      struct ilo_query *q)
+{
+   const uint64_t *vals;
+   int i;
+
+   assert(q->reg_read % 22 == 0);
+
+   vals = intel_bo_map(q->bo, false);
+
+   for (i = 0; i < q->reg_read; i += 22) {
+      struct pipe_query_data_pipeline_statistics *stats =
+         &q->data.pipeline_statistics;
+      const uint64_t *begin = vals + i;
+      const uint64_t *end = begin + 11;
+
+      stats->ia_vertices    += end[0] - begin[0];
+      stats->ia_primitives  += end[1] - begin[1];
+      stats->vs_invocations += end[2] - begin[2];
+      stats->gs_invocations += end[3] - begin[3];
+      stats->gs_primitives  += end[4] - begin[4];
+      stats->c_invocations  += end[5] - begin[5];
+      stats->c_primitives   += end[6] - begin[6];
+      stats->ps_invocations += end[7] - begin[7];
+      stats->hs_invocations += end[8] - begin[8];
+      stats->ds_invocations += end[9] - begin[9];
+      stats->cs_invocations += end[10] - begin[10];
+   }
+
+   intel_bo_unmap(q->bo);
+
+   q->reg_read = 0;
+}
+
+static void
+ilo_3d_resume_queries(struct ilo_3d *hw3d)
+{
+   struct ilo_query *q;
+
+   /* resume occlusion queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
+      /* accumulate the result if the bo is alreay full */
+      if (q->reg_read >= q->reg_total)
+         process_query_for_occlusion_counter(hw3d, q);
+
+      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* resume timer queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
+      /* accumulate the result if the bo is alreay full */
+      if (q->reg_read >= q->reg_total)
+         process_query_for_time_elapsed(hw3d, q);
+
+      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* resume pipeline statistics queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
+      /* accumulate the result if the bo is alreay full */
+      if (q->reg_read >= q->reg_total)
+         process_query_for_pipeline_statistics(hw3d, q);
+
+      ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+            q->bo, q->reg_read);
+      q->reg_read += 11;
+   }
+}
+
+static void
+ilo_3d_pause_queries(struct ilo_3d *hw3d)
+{
+   struct ilo_query *q;
+
+   /* pause occlusion queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
+      assert(q->reg_read < q->reg_total);
+      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* pause timer queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
+      assert(q->reg_read < q->reg_total);
+      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* pause pipeline statistics queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
+      assert(q->reg_read < q->reg_total);
+      ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+            q->bo, q->reg_read);
+      q->reg_read += 11;
+   }
+}
+
+static void
+ilo_3d_release_render_ring(struct ilo_cp *cp, void *data)
+{
+   struct ilo_3d *hw3d = data;
+
+   ilo_3d_pause_queries(hw3d);
+}
+
+void
+ilo_3d_own_render_ring(struct ilo_3d *hw3d)
+{
+   ilo_cp_set_ring(hw3d->cp, INTEL_RING_RENDER);
+
+   if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
+      ilo_3d_resume_queries(hw3d);
+}
+
  /**
   * Begin a query.
   */
@@ -43,14 +225,15 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
  {
     struct ilo_3d *hw3d = ilo->hw3d;
  
-   ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
+   ilo_3d_own_render_ring(hw3d);
  
     switch (q->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
        /* reserve some space for pausing the query */
        q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
              ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, NULL);
-      ilo_cp_reserve_for_pre_flush(hw3d->cp, q->reg_cmd_size);
+      hw3d->owner_reserve += q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
  
        q->data.u64 = 0;
  
@@ -69,7 +252,8 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
        /* reserve some space for pausing the query */
        q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
              ILO_3D_PIPELINE_WRITE_TIMESTAMP, NULL);
-      ilo_cp_reserve_for_pre_flush(hw3d->cp, q->reg_cmd_size);
+      hw3d->owner_reserve += q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
  
        q->data.u64 = 0;
  
@@ -89,6 +273,25 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
        q->data.u64 = 0;
        list_add(&q->list, &hw3d->prim_emitted_queries);
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      /* reserve some space for pausing the query */
+      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+            ILO_3D_PIPELINE_WRITE_STATISTICS, NULL);
+      hw3d->owner_reserve += q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+
+      memset(&q->data.pipeline_statistics, 0,
+            sizeof(q->data.pipeline_statistics));
+
+      if (ilo_query_alloc_bo(q, 11 * 2, -1, hw3d->cp->winsys)) {
+         /* XXX we should check the aperture size */
+         ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+               q->bo, q->reg_read);
+         q->reg_read += 11;
+
+         list_add(&q->list, &hw3d->pipeline_statistics_queries);
+      }
+      break;
     default:
        assert(!"unknown query type");
        break;
@@ -103,14 +306,15 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
  {
     struct ilo_3d *hw3d = ilo->hw3d;
  
-   ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
+   ilo_3d_own_render_ring(hw3d);
  
     switch (q->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
        list_del(&q->list);
  
        assert(q->reg_read < q->reg_total);
-      ilo_cp_reserve_for_pre_flush(hw3d->cp, -q->reg_cmd_size);
+      hw3d->owner_reserve -= q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
        ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
              q->bo, q->reg_read++);
        break;
@@ -126,7 +330,8 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
        list_del(&q->list);
  
        assert(q->reg_read < q->reg_total);
-      ilo_cp_reserve_for_pre_flush(hw3d->cp, -q->reg_cmd_size);
+      hw3d->owner_reserve -= q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
        ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
              q->bo, q->reg_read++);
        break;
@@ -134,78 +339,22 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
     case PIPE_QUERY_PRIMITIVES_EMITTED:
        list_del(&q->list);
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      list_del(&q->list);
+
+      assert(q->reg_read + 11 <= q->reg_total);
+      hw3d->owner_reserve -= q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
+            q->bo, q->reg_read);
+      q->reg_read += 11;
+      break;
     default:
        assert(!"unknown query type");
        break;
     }
  }
  
-static void
-process_query_for_occlusion_counter(struct ilo_3d *hw3d,
-                                    struct ilo_query *q)
-{
-   uint64_t *vals, depth_count = 0;
-   int i;
-
-   /* in pairs */
-   assert(q->reg_read % 2 == 0);
-
-   q->bo->map(q->bo, false);
-   vals = q->bo->get_virtual(q->bo);
-   for (i = 1; i < q->reg_read; i += 2)
-      depth_count += vals[i] - vals[i - 1];
-   q->bo->unmap(q->bo);
-
-   /* accumulate so that the query can be resumed if wanted */
-   q->data.u64 += depth_count;
-   q->reg_read = 0;
-}
-
-static uint64_t
-timestamp_to_ns(uint64_t timestamp)
-{
-   /* see ilo_get_timestamp() */
-   return (timestamp & 0xffffffff) * 80;
-}
-
-static void
-process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q)
-{
-   uint64_t *vals, timestamp;
-
-   assert(q->reg_read == 1);
-
-   q->bo->map(q->bo, false);
-   vals = q->bo->get_virtual(q->bo);
-   timestamp = vals[0];
-   q->bo->unmap(q->bo);
-
-   q->data.u64 = timestamp_to_ns(timestamp);
-   q->reg_read = 0;
-}
-
-static void
-process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
-{
-   uint64_t *vals, elapsed = 0;
-   int i;
-
-   /* in pairs */
-   assert(q->reg_read % 2 == 0);
-
-   q->bo->map(q->bo, false);
-   vals = q->bo->get_virtual(q->bo);
-
-   for (i = 1; i < q->reg_read; i += 2)
-      elapsed += vals[i] - vals[i - 1];
-
-   q->bo->unmap(q->bo);
-
-   /* accumulate so that the query can be resumed if wanted */
-   q->data.u64 += timestamp_to_ns(elapsed);
-   q->reg_read = 0;
-}
-
  /**
   * Process the raw query data.
   */
@@ -230,6 +379,10 @@ ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
     case PIPE_QUERY_PRIMITIVES_GENERATED:
     case PIPE_QUERY_PRIMITIVES_EMITTED:
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      if (q->bo)
+         process_query_for_pipeline_statistics(hw3d, q);
+      break;
     default:
        assert(!"unknown query type");
        break;
@@ -240,80 +393,24 @@ ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
   * Hook for CP new-batch.
   */
  void
-ilo_3d_new_cp_batch(struct ilo_3d *hw3d)
+ilo_3d_cp_flushed(struct ilo_3d *hw3d)
  {
-   struct ilo_query *q;
-
-   hw3d->new_batch = true;
+   if (ilo_debug & ILO_DEBUG_3D)
+      ilo_3d_pipeline_dump(hw3d->pipeline);
  
     /* invalidate the pipeline */
     ilo_3d_pipeline_invalidate(hw3d->pipeline,
           ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
           ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
-   if (!hw3d->cp->hw_ctx) {
-      ilo_3d_pipeline_invalidate(hw3d->pipeline,
-            ILO_3D_PIPELINE_INVALIDATE_HW);
-   }
-
-   /* resume occlusion queries */
-   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
-      /* accumulate the result if the bo is alreay full */
-      if (q->reg_read >= q->reg_total)
-         process_query_for_occlusion_counter(hw3d, q);
-
-      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
-            q->bo, q->reg_read++);
-   }
-
-   /* resume timer queries */
-   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
-      /* accumulate the result if the bo is alreay full */
-      if (q->reg_read >= q->reg_total)
-         process_query_for_time_elapsed(hw3d, q);
-
-      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
-            q->bo, q->reg_read++);
-   }
-}
-
-/**
- * Hook for CP pre-flush.
- */
-void
-ilo_3d_pre_cp_flush(struct ilo_3d *hw3d)
-{
-   struct ilo_query *q;
-
-   /* pause occlusion queries */
-   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
-      assert(q->reg_read < q->reg_total);
-      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
-            q->bo, q->reg_read++);
-   }
-
-   /* pause timer queries */
-   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
-      assert(q->reg_read < q->reg_total);
-      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
-            q->bo, q->reg_read++);
-   }
-}
  
-/**
- * Hook for CP post-flush
- */
-void
-ilo_3d_post_cp_flush(struct ilo_3d *hw3d)
-{
-   if (ilo_debug & ILO_DEBUG_3D)
-      ilo_3d_pipeline_dump(hw3d->pipeline);
+   hw3d->new_batch = true;
  }
  
  /**
   * Create a 3D context.
   */
  struct ilo_3d *
-ilo_3d_create(struct ilo_cp *cp, int gen, int gt)
+ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
  {
     struct ilo_3d *hw3d;
  
@@ -322,14 +419,18 @@ ilo_3d_create(struct ilo_cp *cp, int gen, int gt)
        return NULL;
  
     hw3d->cp = cp;
+   hw3d->owner.release_callback = ilo_3d_release_render_ring;
+   hw3d->owner.release_data = hw3d;
+
     hw3d->new_batch = true;
  
     list_inithead(&hw3d->occlusion_queries);
     list_inithead(&hw3d->time_elapsed_queries);
     list_inithead(&hw3d->prim_generated_queries);
     list_inithead(&hw3d->prim_emitted_queries);
+   list_inithead(&hw3d->pipeline_statistics_queries);
  
-   hw3d->pipeline = ilo_3d_pipeline_create(cp, gen, gt);
+   hw3d->pipeline = ilo_3d_pipeline_create(cp, dev);
     if (!hw3d->pipeline) {
        FREE(hw3d);
        return NULL;
@@ -345,26 +446,34 @@ void
  ilo_3d_destroy(struct ilo_3d *hw3d)
  {
     ilo_3d_pipeline_destroy(hw3d->pipeline);
+
+   if (hw3d->kernel.bo)
+      intel_bo_unreference(hw3d->kernel.bo);
+
     FREE(hw3d);
  }
  
  static bool
  draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
-         const struct pipe_draw_info *info,
           int *prim_generated, int *prim_emitted)
  {
-   bool need_flush;
+   bool need_flush = false;
     int max_len;
  
-   ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
+   ilo_3d_own_render_ring(hw3d);
  
-   /*
-    * Without a better tracking mechanism, when the framebuffer changes, we
-    * have to assume that the old framebuffer may be sampled from.  If that
-    * happens in the middle of a batch buffer, we need to insert manual
-    * flushes.
-    */
-   need_flush = (!hw3d->new_batch && (ilo->dirty & ILO_DIRTY_FRAMEBUFFER));
+   if (!hw3d->new_batch) {
+      /*
+       * Without a better tracking mechanism, when the framebuffer changes, we
+       * have to assume that the old framebuffer may be sampled from.  If that
+       * happens in the middle of a batch buffer, we need to insert manual
+       * flushes.
+       */
+      need_flush = (ilo->dirty & ILO_DIRTY_FB);
+
+      /* same to SO target changes */
+      need_flush |= (ilo->dirty & ILO_DIRTY_SO);
+   }
  
     /* make sure there is enough room first */
     max_len = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
@@ -375,7 +484,7 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
     }
  
     if (max_len > ilo_cp_space(hw3d->cp)) {
-      ilo_cp_flush(hw3d->cp);
+      ilo_cp_flush(hw3d->cp, "out of space");
        need_flush = false;
        assert(max_len <= ilo_cp_space(hw3d->cp));
     }
@@ -383,7 +492,7 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
     if (need_flush)
        ilo_3d_pipeline_emit_flush(hw3d->pipeline);
  
-   return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo, info,
+   return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo,
           prim_generated, prim_emitted);
  }
  
@@ -399,9 +508,10 @@ update_prim_count(struct ilo_3d *hw3d, int generated, int emitted)
        q->data.u64 += emitted;
  }
  
-static bool
-pass_render_condition(struct ilo_3d *hw3d, struct pipe_context *pipe)
+bool
+ilo_3d_pass_render_condition(struct ilo_context *ilo)
  {
+   struct ilo_3d *hw3d = ilo->hw3d;
     uint64_t result;
     bool wait;
  
@@ -420,13 +530,251 @@ pass_render_condition(struct ilo_3d *hw3d, struct pipe_context *pipe)
        break;
     }
  
-   if (pipe->get_query_result(pipe, hw3d->render_condition.query,
-            wait, (union pipe_query_result *) &result)) {
-      return (result > 0);
+   if (ilo->base.get_query_result(&ilo->base, hw3d->render_condition.query,
+            wait, (union pipe_query_result *) &result))
+      return (!result == hw3d->render_condition.cond);
+   else
+      return true;
+}
+
+#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
+#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
+
+/**
+ * \see find_sub_primitives() from core mesa
+ */
+static int
+ilo_find_sub_primitives(const void *elements, unsigned element_size,
+                    const struct pipe_draw_info *orig_info,
+                    struct pipe_draw_info *info)
+{
+   const unsigned max_prims = orig_info->count - orig_info->start;
+   unsigned i, cur_start, cur_count;
+   int scan_index;
+   unsigned scan_num;
+
+   cur_start = orig_info->start;
+   cur_count = 0;
+   scan_num = 0;
+
+#define IB_INDEX_READ(TYPE, INDEX) (((const TYPE *) elements)[INDEX])
+
+#define SCAN_ELEMENTS(TYPE) \
+   info[scan_num] = *orig_info; \
+   info[scan_num].primitive_restart = false; \
+   for (i = orig_info->start; i < orig_info->count; i++) { \
+      scan_index = IB_INDEX_READ(TYPE, i); \
+      if (scan_index == orig_info->restart_index) { \
+         if (cur_count > 0) { \
+            assert(scan_num < max_prims); \
+            info[scan_num].start = cur_start; \
+            info[scan_num].count = cur_count; \
+            scan_num++; \
+            info[scan_num] = *orig_info; \
+            info[scan_num].primitive_restart = false; \
+         } \
+         cur_start = i + 1; \
+         cur_count = 0; \
+      } \
+      else { \
+         UPDATE_MIN2(info[scan_num].min_index, scan_index); \
+         UPDATE_MAX2(info[scan_num].max_index, scan_index); \
+         cur_count++; \
+      } \
+   } \
+   if (cur_count > 0) { \
+      assert(scan_num < max_prims); \
+      info[scan_num].start = cur_start; \
+      info[scan_num].count = cur_count; \
+      scan_num++; \
+   }
+
+   switch (element_size) {
+   case 1:
+      SCAN_ELEMENTS(uint8_t);
+      break;
+   case 2:
+      SCAN_ELEMENTS(uint16_t);
+      break;
+   case 4:
+      SCAN_ELEMENTS(uint32_t);
+      break;
+   default:
+      assert(0 && "bad index_size in find_sub_primitives()");
+   }
+
+#undef SCAN_ELEMENTS
+
+   return scan_num;
+}
+
+static inline bool
+ilo_check_restart_index(const struct ilo_context *ilo, unsigned restart_index)
+{
+   /*
+    * Haswell (GEN(7.5)) supports an arbitrary cut index, check everything
+    * older.
+    */
+   if (ilo->dev->gen >= ILO_GEN(7.5))
+      return true;
+
+   /* Note: indices must be unsigned byte, unsigned short or unsigned int */
+   switch (ilo->ib.index_size) {
+   case 1:
+      return ((restart_index & 0xff) == 0xff);
+      break;
+   case 2:
+      return ((restart_index & 0xffff) == 0xffff);
+      break;
+   case 4:
+      return (restart_index == 0xffffffff);
+      break;
+   }
+   return false;
+}
+
+static inline bool
+ilo_check_restart_prim_type(const struct ilo_context *ilo, unsigned prim)
+{
+   switch (prim) {
+   case PIPE_PRIM_POINTS:
+   case PIPE_PRIM_LINES:
+   case PIPE_PRIM_LINE_STRIP:
+   case PIPE_PRIM_TRIANGLES:
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      /* All 965 GEN graphics support a cut index for these primitive types */
+      return true;
+      break;
+
+   case PIPE_PRIM_LINE_LOOP:
+   case PIPE_PRIM_POLYGON:
+   case PIPE_PRIM_QUAD_STRIP:
+   case PIPE_PRIM_QUADS:
+   case PIPE_PRIM_TRIANGLE_FAN:
+      if (ilo->dev->gen >= ILO_GEN(7.5)) {
+         /* Haswell and newer parts can handle these prim types. */
+         return true;
+      }
+      break;
+   }
+
+   return false;
+}
+
+/*
+ * Handle VBOs using primitive restart.
+ * Verify that restart index and primitive type can be handled by the HW.
+ * Return true if this routine did the rendering
+ * Return false if this routine did NOT render because restart can be handled
+ * in HW.
+ */
+static void
+ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
+                             const struct pipe_draw_info *info)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct pipe_draw_info *restart_info = NULL;
+   int sub_prim_count = 1;
+
+   /*
+    * We have to break up the primitive into chunks manually
+    * Worst case, every other index could be a restart index so
+    * need to have space for that many primitives
+    */
+   restart_info = MALLOC(((info->count + 1) / 2) * sizeof(*info));
+   if (NULL == restart_info) {
+      /* If we can't get memory for this, bail out */
+      ilo_err("%s:%d - Out of memory", __FILE__, __LINE__);
+      return;
+   }
+
+   if (ilo->ib.buffer) {
+      struct pipe_transfer *transfer;
+      const void *map;
+
+      map = pipe_buffer_map(pipe, ilo->ib.buffer,
+            PIPE_TRANSFER_READ, &transfer);
+
+      sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.offset,
+            ilo->ib.index_size, info, restart_info);
+
+      pipe_buffer_unmap(pipe, transfer);
     }
     else {
+      sub_prim_count = ilo_find_sub_primitives(ilo->ib.user_buffer,
+               ilo->ib.index_size, info, restart_info);
+   }
+
+   info = restart_info;
+
+   while (sub_prim_count > 0) {
+      pipe->draw_vbo(pipe, info);
+
+      sub_prim_count--;
+      info++;
+   }
+
+   FREE(restart_info);
+}
+
+static bool
+upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc)
+{
+   bool incremental = true;
+   int upload;
+
+   upload = ilo_shader_cache_upload(shc,
+         NULL, hw3d->kernel.used, incremental);
+   if (!upload)
        return true;
+
+   /*
+    * Allocate a new bo.  When this is a new batch, assume the bo is still in
+    * use by the previous batch and force allocation.
+    *
+    * Does it help to make shader cache upload with unsynchronized mapping,
+    * and remove the check for new batch here?
+    */
+   if (hw3d->kernel.used + upload > hw3d->kernel.size || hw3d->new_batch) {
+      unsigned new_size = (hw3d->kernel.size) ?
+         hw3d->kernel.size : (8 * 1024);
+
+      while (hw3d->kernel.used + upload > new_size)
+         new_size *= 2;
+
+      if (hw3d->kernel.bo)
+         intel_bo_unreference(hw3d->kernel.bo);
+
+      hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys,
+            "kernel bo", new_size, INTEL_DOMAIN_CPU);
+      if (!hw3d->kernel.bo) {
+         ilo_err("failed to allocate kernel bo\n");
+         return false;
+      }
+
+      hw3d->kernel.used = 0;
+      hw3d->kernel.size = new_size;
+      incremental = false;
+
+      assert(new_size >= ilo_shader_cache_upload(shc,
+            NULL, hw3d->kernel.used, incremental));
+
+      ilo_3d_pipeline_invalidate(hw3d->pipeline,
+            ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
     }
+
+   upload = ilo_shader_cache_upload(shc,
+         hw3d->kernel.bo, hw3d->kernel.used, incremental);
+   if (upload < 0) {
+      ilo_err("failed to upload shaders\n");
+      return false;
+   }
+
+   hw3d->kernel.used += upload;
+
+   assert(hw3d->kernel.used <= hw3d->kernel.size);
+
+   return true;
  }
  
  static void
@@ -436,35 +784,53 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     struct ilo_3d *hw3d = ilo->hw3d;
     int prim_generated, prim_emitted;
  
-   if (!pass_render_condition(hw3d, pipe))
-      return;
+   if (ilo_debug & ILO_DEBUG_DRAW) {
+      if (info->indexed) {
+         ilo_printf("indexed draw %s: "
+               "index start %d, count %d, vertex range [%d, %d]\n",
+               u_prim_name(info->mode), info->start, info->count,
+               info->min_index, info->max_index);
+      }
+      else {
+         ilo_printf("draw %s: vertex start %d, count %d\n",
+               u_prim_name(info->mode), info->start, info->count);
+      }
  
-   /* assume the cache is still in use by the previous batch */
-   if (hw3d->new_batch)
-      ilo_shader_cache_mark_busy(ilo->shader_cache);
+      ilo_dump_dirty_flags(ilo->dirty);
+   }
  
-   ilo_finalize_states(ilo);
+   if (!ilo_3d_pass_render_condition(ilo))
+      return;
  
-   /* the shaders may be uploaded to a new shader cache */
-   if (hw3d->shader_cache_seqno != ilo->shader_cache->seqno) {
-      ilo_3d_pipeline_invalidate(hw3d->pipeline,
-            ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
+   if (info->primitive_restart && info->indexed) {
+      /*
+       * Want to draw an indexed primitive using primitive restart
+       * Check that HW can handle the request and fall to SW if not.
+       */
+      if (!ilo_check_restart_index(ilo, info->restart_index) ||
+          !ilo_check_restart_prim_type(ilo, info->mode)) {
+         ilo_draw_vbo_with_sw_restart(pipe, info);
+         return;
+      }
     }
  
-   /*
-    * The VBs and/or IB may have different BOs due to being mapped with
-    * PIPE_TRANSFER_DISCARD_x.  We should track that instead of setting the
-    * dirty flags for the performance reason.
-    */
-   ilo->dirty |= ILO_DIRTY_VERTEX_BUFFERS | ILO_DIRTY_INDEX_BUFFER;
+   ilo_finalize_3d_states(ilo, info);
  
-   if (!draw_vbo(hw3d, ilo, info, &prim_generated, &prim_emitted))
+   if (!upload_shaders(hw3d, ilo->shader_cache))
+      return;
+
+   ilo_blit_resolve_framebuffer(ilo);
+
+   /* If draw_vbo ever fails, return immediately. */
+   if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted))
        return;
  
     /* clear dirty status */
     ilo->dirty = 0x0;
     hw3d->new_batch = false;
-   hw3d->shader_cache_seqno = ilo->shader_cache->seqno;
+
+   /* avoid dangling pointer reference */
+   ilo->draw = NULL;
  
     update_prim_count(hw3d, prim_generated, prim_emitted);
  
@@ -475,6 +841,7 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  static void
  ilo_render_condition(struct pipe_context *pipe,
                       struct pipe_query *query,
+                     boolean condition,
                       uint mode)
  {
     struct ilo_context *ilo = ilo_context(pipe);
@@ -483,6 +850,7 @@ ilo_render_condition(struct pipe_context *pipe,
     /* reference count? */
     hw3d->render_condition.query = query;
     hw3d->render_condition.mode = mode;
+   hw3d->render_condition.cond = condition;
  }
  
  static void
@@ -491,14 +859,14 @@ ilo_texture_barrier(struct pipe_context *pipe)
     struct ilo_context *ilo = ilo_context(pipe);
     struct ilo_3d *hw3d = ilo->hw3d;
  
-   if (ilo->cp->ring != ILO_CP_RING_RENDER)
+   if (ilo->cp->ring != INTEL_RING_RENDER)
        return;
  
     ilo_3d_pipeline_emit_flush(hw3d->pipeline);
  
     /* don't know why */
     if (ilo->dev->gen >= ILO_GEN(7))
-      ilo_cp_flush(hw3d->cp);
+      ilo_cp_flush(hw3d->cp, "texture barrier");
  }
  
  static void