ilo: add BLT-based blitting methods to ilo_blitter
[mesa.git] / src / gallium / drivers / ilo / ilo_3d.c
index afe6323300ff20be9908dd13c72928fbb6c2a574..b2cbcf04e0ca208af5ff86ea6daec20f9aae8885 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "intel_winsys.h"
+
+#include "ilo_3d_pipeline.h"
 #include "ilo_context.h"
+#include "ilo_cp.h"
+#include "ilo_query.h"
+#include "ilo_shader.h"
+#include "ilo_state.h"
 #include "ilo_3d.h"
 
+static void
+process_query_for_occlusion_counter(struct ilo_3d *hw3d,
+                                    struct ilo_query *q)
+{
+   uint64_t *vals, depth_count = 0;
+   int i;
+
+   /* in pairs */
+   assert(q->reg_read % 2 == 0);
+
+   intel_bo_map(q->bo, false);
+   vals = intel_bo_get_virtual(q->bo);
+   for (i = 1; i < q->reg_read; i += 2)
+      depth_count += vals[i] - vals[i - 1];
+   intel_bo_unmap(q->bo);
+
+   /* accumulate so that the query can be resumed if wanted */
+   q->data.u64 += depth_count;
+   q->reg_read = 0;
+}
+
+static uint64_t
+timestamp_to_ns(uint64_t timestamp)
+{
+   /* see ilo_get_timestamp() */
+   return (timestamp & 0xffffffff) * 80;
+}
+
+static void
+process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q)
+{
+   uint64_t *vals, timestamp;
+
+   assert(q->reg_read == 1);
+
+   intel_bo_map(q->bo, false);
+   vals = intel_bo_get_virtual(q->bo);
+   timestamp = vals[0];
+   intel_bo_unmap(q->bo);
+
+   q->data.u64 = timestamp_to_ns(timestamp);
+   q->reg_read = 0;
+}
+
+static void
+process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
+{
+   uint64_t *vals, elapsed = 0;
+   int i;
+
+   /* in pairs */
+   assert(q->reg_read % 2 == 0);
+
+   intel_bo_map(q->bo, false);
+   vals = intel_bo_get_virtual(q->bo);
+
+   for (i = 1; i < q->reg_read; i += 2)
+      elapsed += vals[i] - vals[i - 1];
+
+   intel_bo_unmap(q->bo);
+
+   /* accumulate so that the query can be resumed if wanted */
+   q->data.u64 += timestamp_to_ns(elapsed);
+   q->reg_read = 0;
+}
+
+static void
+ilo_3d_resume_queries(struct ilo_3d *hw3d)
+{
+   struct ilo_query *q;
+
+   /* resume occlusion queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
+      /* accumulate the result if the bo is alreay full */
+      if (q->reg_read >= q->reg_total)
+         process_query_for_occlusion_counter(hw3d, q);
+
+      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* resume timer queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
+      /* accumulate the result if the bo is alreay full */
+      if (q->reg_read >= q->reg_total)
+         process_query_for_time_elapsed(hw3d, q);
+
+      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+}
+
+static void
+ilo_3d_pause_queries(struct ilo_3d *hw3d)
+{
+   struct ilo_query *q;
+
+   /* pause occlusion queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
+      assert(q->reg_read < q->reg_total);
+      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+
+   /* pause timer queries */
+   LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
+      assert(q->reg_read < q->reg_total);
+      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+            q->bo, q->reg_read++);
+   }
+}
+
+static void
+ilo_3d_release_render_ring(struct ilo_cp *cp, void *data)
+{
+   struct ilo_3d *hw3d = data;
+
+   ilo_3d_pause_queries(hw3d);
+}
+
+static void
+ilo_3d_own_render_ring(struct ilo_3d *hw3d)
+{
+   ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
+
+   if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
+      ilo_3d_resume_queries(hw3d);
+}
+
+/**
+ * Begin a query.
+ */
+void
+ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
+{
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   ilo_3d_own_render_ring(hw3d);
+
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      /* reserve some space for pausing the query */
+      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+            ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, NULL);
+      hw3d->owner_reserve += q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+
+      q->data.u64 = 0;
+
+      if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
+         /* XXX we should check the aperture size */
+         ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+               q->bo, q->reg_read++);
+
+         list_add(&q->list, &hw3d->occlusion_queries);
+      }
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      /* nop */
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+      /* reserve some space for pausing the query */
+      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+            ILO_3D_PIPELINE_WRITE_TIMESTAMP, NULL);
+      hw3d->owner_reserve += q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+
+      q->data.u64 = 0;
+
+      if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
+         /* XXX we should check the aperture size */
+         ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+               q->bo, q->reg_read++);
+
+         list_add(&q->list, &hw3d->time_elapsed_queries);
+      }
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      q->data.u64 = 0;
+      list_add(&q->list, &hw3d->prim_generated_queries);
+      break;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      q->data.u64 = 0;
+      list_add(&q->list, &hw3d->prim_emitted_queries);
+      break;
+   default:
+      assert(!"unknown query type");
+      break;
+   }
+}
+
+/**
+ * End a query.
+ */
+void
+ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
+{
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   ilo_3d_own_render_ring(hw3d);
+
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      list_del(&q->list);
+
+      assert(q->reg_read < q->reg_total);
+      hw3d->owner_reserve -= q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
+            q->bo, q->reg_read++);
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      q->data.u64 = 0;
+
+      if (ilo_query_alloc_bo(q, 1, 1, hw3d->cp->winsys)) {
+         ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+               q->bo, q->reg_read++);
+      }
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+      list_del(&q->list);
+
+      assert(q->reg_read < q->reg_total);
+      hw3d->owner_reserve -= q->reg_cmd_size;
+      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
+            q->bo, q->reg_read++);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      list_del(&q->list);
+      break;
+   default:
+      assert(!"unknown query type");
+      break;
+   }
+}
+
+/**
+ * Process the raw query data.
+ */
+void
+ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
+{
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      if (q->bo)
+         process_query_for_occlusion_counter(hw3d, q);
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      if (q->bo)
+         process_query_for_timestamp(hw3d, q);
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+      if (q->bo)
+         process_query_for_time_elapsed(hw3d, q);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      break;
+   default:
+      assert(!"unknown query type");
+      break;
+   }
+}
+
+/**
+ * Hook for CP new-batch.
+ */
+void
+ilo_3d_cp_flushed(struct ilo_3d *hw3d)
+{
+   if (ilo_debug & ILO_DEBUG_3D)
+      ilo_3d_pipeline_dump(hw3d->pipeline);
+
+   /* invalidate the pipeline */
+   ilo_3d_pipeline_invalidate(hw3d->pipeline,
+         ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
+         ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
+   if (!hw3d->cp->render_ctx) {
+      ilo_3d_pipeline_invalidate(hw3d->pipeline,
+            ILO_3D_PIPELINE_INVALIDATE_HW);
+   }
+
+   hw3d->new_batch = true;
+}
+
+/**
+ * Create a 3D context.
+ */
+struct ilo_3d *
+ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
+{
+   struct ilo_3d *hw3d;
+
+   hw3d = CALLOC_STRUCT(ilo_3d);
+   if (!hw3d)
+      return NULL;
+
+   hw3d->cp = cp;
+   hw3d->owner.release_callback = ilo_3d_release_render_ring;
+   hw3d->owner.release_data = hw3d;
+
+   hw3d->new_batch = true;
+
+   list_inithead(&hw3d->occlusion_queries);
+   list_inithead(&hw3d->time_elapsed_queries);
+   list_inithead(&hw3d->prim_generated_queries);
+   list_inithead(&hw3d->prim_emitted_queries);
+
+   hw3d->pipeline = ilo_3d_pipeline_create(cp, dev);
+   if (!hw3d->pipeline) {
+      FREE(hw3d);
+      return NULL;
+   }
+
+   return hw3d;
+}
+
+/**
+ * Destroy a 3D context.
+ */
+void
+ilo_3d_destroy(struct ilo_3d *hw3d)
+{
+   ilo_3d_pipeline_destroy(hw3d->pipeline);
+   FREE(hw3d);
+}
+
+static bool
+draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
+         const struct pipe_draw_info *info,
+         int *prim_generated, int *prim_emitted)
+{
+   bool need_flush = false;
+   int max_len;
+
+   ilo_3d_own_render_ring(hw3d);
+
+   if (!hw3d->new_batch) {
+      /*
+       * Without a better tracking mechanism, when the framebuffer changes, we
+       * have to assume that the old framebuffer may be sampled from.  If that
+       * happens in the middle of a batch buffer, we need to insert manual
+       * flushes.
+       */
+      need_flush = (ilo->dirty & ILO_DIRTY_FRAMEBUFFER);
+
+      /* same to SO target changes */
+      need_flush |= (ilo->dirty & ILO_DIRTY_STREAM_OUTPUT_TARGETS);
+   }
+
+   /* make sure there is enough room first */
+   max_len = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+         ILO_3D_PIPELINE_DRAW, ilo);
+   if (need_flush) {
+      max_len += ilo_3d_pipeline_estimate_size(hw3d->pipeline,
+            ILO_3D_PIPELINE_FLUSH, NULL);
+   }
+
+   if (max_len > ilo_cp_space(hw3d->cp)) {
+      ilo_cp_flush(hw3d->cp);
+      need_flush = false;
+      assert(max_len <= ilo_cp_space(hw3d->cp));
+   }
+
+   if (need_flush)
+      ilo_3d_pipeline_emit_flush(hw3d->pipeline);
+
+   return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo, info,
+         prim_generated, prim_emitted);
+}
+
+static void
+update_prim_count(struct ilo_3d *hw3d, int generated, int emitted)
+{
+   struct ilo_query *q;
+
+   LIST_FOR_EACH_ENTRY(q, &hw3d->prim_generated_queries, list)
+      q->data.u64 += generated;
+
+   LIST_FOR_EACH_ENTRY(q, &hw3d->prim_emitted_queries, list)
+      q->data.u64 += emitted;
+}
+
+static bool
+pass_render_condition(struct ilo_3d *hw3d, struct pipe_context *pipe)
+{
+   uint64_t result;
+   bool wait;
+
+   if (!hw3d->render_condition.query)
+      return true;
+
+   switch (hw3d->render_condition.mode) {
+   case PIPE_RENDER_COND_WAIT:
+   case PIPE_RENDER_COND_BY_REGION_WAIT:
+      wait = true;
+      break;
+   case PIPE_RENDER_COND_NO_WAIT:
+   case PIPE_RENDER_COND_BY_REGION_NO_WAIT:
+   default:
+      wait = false;
+      break;
+   }
+
+   if (pipe->get_query_result(pipe, hw3d->render_condition.query,
+            wait, (union pipe_query_result *) &result)) {
+      return (result > 0);
+   }
+   else {
+      return true;
+   }
+}
+
+#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
+#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
+
+/**
+ * \see find_sub_primitives() from core mesa
+ */
+static int
+ilo_find_sub_primitives(const void *elements, unsigned element_size,
+                    const struct pipe_draw_info *orig_info,
+                    struct pipe_draw_info *info)
+{
+   const unsigned max_prims = orig_info->count - orig_info->start;
+   unsigned i, cur_start, cur_count;
+   int scan_index;
+   unsigned scan_num;
+
+   cur_start = orig_info->start;
+   cur_count = 0;
+   scan_num = 0;
+
+#define IB_INDEX_READ(TYPE, INDEX) (((const TYPE *) elements)[INDEX])
+
+#define SCAN_ELEMENTS(TYPE) \
+   info[scan_num] = *orig_info; \
+   info[scan_num].primitive_restart = false; \
+   for (i = orig_info->start; i < orig_info->count; i++) { \
+      scan_index = IB_INDEX_READ(TYPE, i); \
+      if (scan_index == orig_info->restart_index) { \
+         if (cur_count > 0) { \
+            assert(scan_num < max_prims); \
+            info[scan_num].start = cur_start; \
+            info[scan_num].count = cur_count; \
+            scan_num++; \
+            info[scan_num] = *orig_info; \
+            info[scan_num].primitive_restart = false; \
+         } \
+         cur_start = i + 1; \
+         cur_count = 0; \
+      } \
+      else { \
+         UPDATE_MIN2(info[scan_num].min_index, scan_index); \
+         UPDATE_MAX2(info[scan_num].max_index, scan_index); \
+         cur_count++; \
+      } \
+   } \
+   if (cur_count > 0) { \
+      assert(scan_num < max_prims); \
+      info[scan_num].start = cur_start; \
+      info[scan_num].count = cur_count; \
+      scan_num++; \
+   }
+
+   switch (element_size) {
+   case 1:
+      SCAN_ELEMENTS(uint8_t);
+      break;
+   case 2:
+      SCAN_ELEMENTS(uint16_t);
+      break;
+   case 4:
+      SCAN_ELEMENTS(uint32_t);
+      break;
+   default:
+      assert(0 && "bad index_size in find_sub_primitives()");
+   }
+
+#undef SCAN_ELEMENTS
+
+   return scan_num;
+}
+
+static inline bool
+ilo_check_restart_index(struct ilo_context *ilo,
+                        const struct pipe_draw_info *info)
+{
+   /*
+    * Haswell (GEN(7.5)) supports an arbitrary cut index, check everything
+    * older.
+    */
+   if (ilo->dev->gen >= ILO_GEN(7.5))
+      return true;
+
+   /* Note: indices must be unsigned byte, unsigned short or unsigned int */
+   switch (ilo->ib.state.index_size) {
+   case 1:
+      return ((info->restart_index & 0xff) == 0xff);
+      break;
+   case 2:
+      return ((info->restart_index & 0xffff) == 0xffff);
+      break;
+   case 4:
+      return (info->restart_index == 0xffffffff);
+      break;
+   }
+   return false;
+}
+
+static inline bool
+ilo_check_restart_prim_type(struct ilo_context *ilo,
+                            const struct pipe_draw_info *info)
+{
+   switch (info->mode) {
+   case PIPE_PRIM_POINTS:
+   case PIPE_PRIM_LINES:
+   case PIPE_PRIM_LINE_STRIP:
+   case PIPE_PRIM_TRIANGLES:
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      /* All 965 GEN graphics support a cut index for these primitive types */
+      return true;
+      break;
+
+   case PIPE_PRIM_LINE_LOOP:
+   case PIPE_PRIM_POLYGON:
+   case PIPE_PRIM_QUAD_STRIP:
+   case PIPE_PRIM_QUADS:
+   case PIPE_PRIM_TRIANGLE_FAN:
+      if (ilo->dev->gen >= ILO_GEN(7.5)) {
+         /* Haswell and newer parts can handle these prim types. */
+         return true;
+      }
+      break;
+   }
+
+   return false;
+}
+
+/*
+ * Handle VBOs using primitive restart.
+ * Verify that restart index and primitive type can be handled by the HW.
+ * Return true if this routine did the rendering
+ * Return false if this routine did NOT render because restart can be handled
+ * in HW.
+ */
+static void
+ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
+                             const struct pipe_draw_info *info)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct pipe_draw_info *restart_info = NULL;
+   int sub_prim_count = 1;
+
+   /*
+    * We have to break up the primitive into chunks manually
+    * Worst case, every other index could be a restart index so
+    * need to have space for that many primitives
+    */
+   restart_info = MALLOC(((info->count + 1) / 2) * sizeof(*info));
+   if (NULL == restart_info) {
+      /* If we can't get memory for this, bail out */
+      ilo_err("%s:%d - Out of memory", __FILE__, __LINE__);
+      return;
+   }
+
+   struct pipe_transfer *transfer = NULL;
+   const void *map = NULL;
+   map = pipe_buffer_map(pipe, ilo->ib.state.buffer,
+         PIPE_TRANSFER_READ, &transfer);
+
+   sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.state.offset,
+           ilo->ib.state.index_size, info, restart_info);
+
+   pipe_buffer_unmap(pipe, transfer);
+
+   info = restart_info;
+
+   while (sub_prim_count > 0) {
+      pipe->draw_vbo(pipe, info);
+
+      sub_prim_count--;
+      info++;
+   }
+
+   FREE(restart_info);
+}
+
+static void
+ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_3d *hw3d = ilo->hw3d;
+   int prim_generated, prim_emitted;
+
+   if (!pass_render_condition(hw3d, pipe))
+      return;
+
+   if (info->primitive_restart && info->indexed) {
+      /*
+       * Want to draw an indexed primitive using primitive restart
+       * Check that HW can handle the request and fall to SW if not.
+       */
+      if (!ilo_check_restart_index(ilo, info) ||
+          !ilo_check_restart_prim_type(ilo, info)) {
+         ilo_draw_vbo_with_sw_restart(pipe, info);
+         return;
+      }
+   }
+
+   /* assume the cache is still in use by the previous batch */
+   if (hw3d->new_batch)
+      ilo_shader_cache_mark_busy(ilo->shader_cache);
+
+   ilo_finalize_states(ilo);
+
+   /* the shaders may be uploaded to a new shader cache */
+   if (hw3d->shader_cache_seqno != ilo->shader_cache->seqno) {
+      ilo_3d_pipeline_invalidate(hw3d->pipeline,
+            ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
+   }
+
+   /* If draw_vbo ever fails, return immediately. */
+   if (!draw_vbo(hw3d, ilo, info, &prim_generated, &prim_emitted))
+      return;
+
+   /* clear dirty status */
+   ilo->dirty = 0x0;
+   hw3d->new_batch = false;
+   hw3d->shader_cache_seqno = ilo->shader_cache->seqno;
+
+   update_prim_count(hw3d, prim_generated, prim_emitted);
+
+   if (ilo_debug & ILO_DEBUG_NOCACHE)
+      ilo_3d_pipeline_emit_flush(hw3d->pipeline);
+}
+
+static void
+ilo_render_condition(struct pipe_context *pipe,
+                     struct pipe_query *query,
+                     uint mode)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   /* reference count? */
+   hw3d->render_condition.query = query;
+   hw3d->render_condition.mode = mode;
+}
+
+static void
+ilo_texture_barrier(struct pipe_context *pipe)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   if (ilo->cp->ring != ILO_CP_RING_RENDER)
+      return;
+
+   ilo_3d_pipeline_emit_flush(hw3d->pipeline);
+
+   /* don't know why */
+   if (ilo->dev->gen >= ILO_GEN(7))
+      ilo_cp_flush(hw3d->cp);
+}
+
+static void
+ilo_get_sample_position(struct pipe_context *pipe,
+                        unsigned sample_count,
+                        unsigned sample_index,
+                        float *out_value)
+{
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_3d *hw3d = ilo->hw3d;
+
+   ilo_3d_pipeline_get_sample_position(hw3d->pipeline,
+         sample_count, sample_index,
+         &out_value[0], &out_value[1]);
+}
+
 /**
  * Initialize 3D-related functions.
  */
 void
 ilo_init_3d_functions(struct ilo_context *ilo)
 {
-   ilo->base.draw_vbo = NULL;
-   ilo->base.render_condition = NULL;
-   ilo->base.texture_barrier = NULL;
-   ilo->base.get_sample_position = NULL;
+   ilo->base.draw_vbo = ilo_draw_vbo;
+   ilo->base.render_condition = ilo_render_condition;
+   ilo->base.texture_barrier = ilo_texture_barrier;
+   ilo->base.get_sample_position = ilo_get_sample_position;
 }