ilo: clarify cp owning/releasing

[mesa.git] / src / gallium / drivers / ilo / ilo_3d.c
diff --git a/src/gallium/drivers/ilo/ilo_3d.c b/src/gallium/drivers/ilo/ilo_3d.c

index 48984ba006a58de701aa07fc46cac62d0256c329..30686d6075f1029666ae010e20a9c758bb176945 100644 (file)
--- a/src/gallium/drivers/ilo/ilo_3d.c
+++ b/src/gallium/drivers/ilo/ilo_3d.c
@@ -201,20 +201,25 @@ ilo_3d_pause_queries(struct ilo_3d *hw3d)
  }
  
  static void
-ilo_3d_release_render_ring(struct ilo_cp *cp, void *data)
+ilo_3d_own_render_ring(struct ilo_3d *hw3d)
  {
-   struct ilo_3d *hw3d = data;
-
-   ilo_3d_pause_queries(hw3d);
+   ilo_cp_set_owner(hw3d->cp, INTEL_RING_RENDER, &hw3d->owner);
  }
  
-void
-ilo_3d_own_render_ring(struct ilo_3d *hw3d)
+static void
+ilo_3d_reserve_for_query(struct ilo_3d *hw3d, struct ilo_query *q,
+                         enum ilo_3d_pipeline_action act)
  {
-   ilo_cp_set_ring(hw3d->cp, INTEL_RING_RENDER);
+   q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline, act, NULL);
  
-   if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
-      ilo_3d_resume_queries(hw3d);
+   /* XXX we should check the aperture size */
+   if (ilo_cp_space(hw3d->cp) < q->reg_cmd_size * 2) {
+      ilo_cp_submit(hw3d->cp, "out of space");
+      assert(ilo_cp_space(hw3d->cp) >= q->reg_cmd_size * 2);
+   }
+
+   /* reserve space for pausing the query */
+   hw3d->owner.reserve += q->reg_cmd_size;
  }
  
  /**
@@ -229,16 +234,10 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
  
     switch (q->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      /* reserve some space for pausing the query */
-      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
-            ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, NULL);
-      hw3d->owner_reserve += q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
-
+      ilo_3d_reserve_for_query(hw3d, q, ILO_3D_PIPELINE_WRITE_DEPTH_COUNT);
        q->data.u64 = 0;
  
        if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
-         /* XXX we should check the aperture size */
           ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
                 q->bo, q->reg_read++);
  
@@ -249,16 +248,10 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
        /* nop */
        break;
     case PIPE_QUERY_TIME_ELAPSED:
-      /* reserve some space for pausing the query */
-      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
-            ILO_3D_PIPELINE_WRITE_TIMESTAMP, NULL);
-      hw3d->owner_reserve += q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
-
+      ilo_3d_reserve_for_query(hw3d, q, ILO_3D_PIPELINE_WRITE_TIMESTAMP);
        q->data.u64 = 0;
  
        if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
-         /* XXX we should check the aperture size */
           ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
                 q->bo, q->reg_read++);
  
@@ -274,17 +267,11 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
        list_add(&q->list, &hw3d->prim_emitted_queries);
        break;
     case PIPE_QUERY_PIPELINE_STATISTICS:
-      /* reserve some space for pausing the query */
-      q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
-            ILO_3D_PIPELINE_WRITE_STATISTICS, NULL);
-      hw3d->owner_reserve += q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
-
+      ilo_3d_reserve_for_query(hw3d, q, ILO_3D_PIPELINE_WRITE_STATISTICS);
        memset(&q->data.pipeline_statistics, 0,
              sizeof(q->data.pipeline_statistics));
  
        if (ilo_query_alloc_bo(q, 11 * 2, -1, hw3d->cp->winsys)) {
-         /* XXX we should check the aperture size */
           ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
                 q->bo, q->reg_read);
           q->reg_read += 11;
@@ -313,8 +300,9 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
        list_del(&q->list);
  
        assert(q->reg_read < q->reg_total);
-      hw3d->owner_reserve -= q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      assert(hw3d->owner.reserve >= q->reg_cmd_size);
+      hw3d->owner.reserve -= q->reg_cmd_size;
+
        ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
              q->bo, q->reg_read++);
        break;
@@ -330,8 +318,9 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
        list_del(&q->list);
  
        assert(q->reg_read < q->reg_total);
-      hw3d->owner_reserve -= q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      assert(hw3d->owner.reserve >= q->reg_cmd_size);
+      hw3d->owner.reserve -= q->reg_cmd_size;
+
        ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
              q->bo, q->reg_read++);
        break;
@@ -343,8 +332,9 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
        list_del(&q->list);
  
        assert(q->reg_read + 11 <= q->reg_total);
-      hw3d->owner_reserve -= q->reg_cmd_size;
-      ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
+      assert(hw3d->owner.reserve >= q->reg_cmd_size);
+      hw3d->owner.reserve -= q->reg_cmd_size;
+
        ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
              q->bo, q->reg_read);
        q->reg_read += 11;
@@ -393,19 +383,41 @@ ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
   * Hook for CP new-batch.
   */
  void
-ilo_3d_cp_flushed(struct ilo_3d *hw3d)
+ilo_3d_cp_submitted(struct ilo_3d *hw3d)
  {
-   if (ilo_debug & ILO_DEBUG_3D)
-      ilo_3d_pipeline_dump(hw3d->pipeline);
-
     /* invalidate the pipeline */
     ilo_3d_pipeline_invalidate(hw3d->pipeline,
           ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
-         ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
+         ILO_3D_PIPELINE_INVALIDATE_STATE_BO |
+         ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
  
     hw3d->new_batch = true;
  }
  
+static void
+ilo_3d_own_cp(struct ilo_cp *cp, void *data)
+{
+   struct ilo_3d *hw3d = data;
+
+   /* multiply by 2 for both resuming and pausing */
+   if (ilo_cp_space(hw3d->cp) < hw3d->owner.reserve * 2) {
+      ilo_cp_submit(hw3d->cp, "out of space");
+      assert(ilo_cp_space(hw3d->cp) >= hw3d->owner.reserve * 2);
+   }
+
+   ilo_3d_resume_queries(hw3d);
+
+   assert(ilo_cp_space(hw3d->cp) >= hw3d->owner.reserve);
+}
+
+static void
+ilo_3d_release_cp(struct ilo_cp *cp, void *data)
+{
+   struct ilo_3d *hw3d = data;
+
+   ilo_3d_pause_queries(hw3d);
+}
+
  /**
   * Create a 3D context.
   */
@@ -419,8 +431,10 @@ ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
        return NULL;
  
     hw3d->cp = cp;
-   hw3d->owner.release_callback = ilo_3d_release_render_ring;
-   hw3d->owner.release_data = hw3d;
+   hw3d->owner.own = ilo_3d_own_cp;
+   hw3d->owner.release = ilo_3d_release_cp;
+   hw3d->owner.data = hw3d;
+   hw3d->owner.reserve = 0;
  
     hw3d->new_batch = true;
  
@@ -446,15 +460,11 @@ void
  ilo_3d_destroy(struct ilo_3d *hw3d)
  {
     ilo_3d_pipeline_destroy(hw3d->pipeline);
-
-   if (hw3d->kernel.bo)
-      intel_bo_unreference(hw3d->kernel.bo);
-
     FREE(hw3d);
  }
  
  static bool
-draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
+draw_vbo(struct ilo_3d *hw3d, const struct ilo_state_vector *vec,
           int *prim_generated, int *prim_emitted)
  {
     bool need_flush = false;
@@ -469,22 +479,22 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
         * happens in the middle of a batch buffer, we need to insert manual
         * flushes.
         */
-      need_flush = (ilo->dirty & ILO_DIRTY_FB);
+      need_flush = (vec->dirty & ILO_DIRTY_FB);
  
        /* same to SO target changes */
-      need_flush |= (ilo->dirty & ILO_DIRTY_SO);
+      need_flush |= (vec->dirty & ILO_DIRTY_SO);
     }
  
     /* make sure there is enough room first */
     max_len = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
-         ILO_3D_PIPELINE_DRAW, ilo);
+         ILO_3D_PIPELINE_DRAW, vec);
     if (need_flush) {
        max_len += ilo_3d_pipeline_estimate_size(hw3d->pipeline,
              ILO_3D_PIPELINE_FLUSH, NULL);
     }
  
     if (max_len > ilo_cp_space(hw3d->cp)) {
-      ilo_cp_flush(hw3d->cp, "out of space");
+      ilo_cp_submit(hw3d->cp, "out of space");
        need_flush = false;
        assert(max_len <= ilo_cp_space(hw3d->cp));
     }
@@ -492,7 +502,7 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
     if (need_flush)
        ilo_3d_pipeline_emit_flush(hw3d->pipeline);
  
-   return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo,
+   return ilo_3d_pipeline_emit_draw(hw3d->pipeline, vec,
           prim_generated, prim_emitted);
  }
  
@@ -537,6 +547,45 @@ ilo_3d_pass_render_condition(struct ilo_context *ilo)
        return true;
  }
  
+void
+ilo_3d_draw_rectlist(struct ilo_3d *hw3d, const struct ilo_blitter *blitter)
+{
+   ilo_3d_own_render_ring(hw3d);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 313:
+    *
+    *     "If other rendering operations have preceded this clear, a
+    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit
+    *      disabled must be issued before the rectangle primitive used for
+    *      the depth buffer clear operation."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 314:
+    *
+    *     "Depth buffer clear pass must be followed by a PIPE_CONTROL
+    *      command with DEPTH_STALL bit set and Then followed by Depth
+    *      FLUSH"
+    *
+    * But the pipeline has to be flushed both before and after not only
+    * because of these workarounds.  We need them for reasons such as
+    *
+    *  - we may sample from a texture that was rendered to
+    *  - we may sample from the fb shortly after
+    *
+    * Skip checking blitter->op and do the flushes.
+    *
+    * XXX need space check
+    */
+   if (!hw3d->new_batch)
+      ilo_3d_pipeline_emit_flush(hw3d->pipeline);
+
+   ilo_3d_pipeline_emit_rectlist(hw3d->pipeline, blitter);
+
+   ilo_3d_pipeline_emit_flush(hw3d->pipeline);
+
+   hw3d->new_batch = false;
+}
+
  #define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
  #define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
  
@@ -615,11 +664,11 @@ ilo_check_restart_index(const struct ilo_context *ilo, unsigned restart_index)
      * Haswell (GEN(7.5)) supports an arbitrary cut index, check everything
      * older.
      */
-   if (ilo->dev->gen >= ILO_GEN(7.5))
+   if (ilo_dev_gen(ilo->dev) >= ILO_GEN(7.5))
        return true;
  
     /* Note: indices must be unsigned byte, unsigned short or unsigned int */
-   switch (ilo->ib.index_size) {
+   switch (ilo->state_vector.ib.index_size) {
     case 1:
        return ((restart_index & 0xff) == 0xff);
        break;
@@ -651,7 +700,7 @@ ilo_check_restart_prim_type(const struct ilo_context *ilo, unsigned prim)
     case PIPE_PRIM_QUAD_STRIP:
     case PIPE_PRIM_QUADS:
     case PIPE_PRIM_TRIANGLE_FAN:
-      if (ilo->dev->gen >= ILO_GEN(7.5)) {
+      if (ilo_dev_gen(ilo->dev) >= ILO_GEN(7.5)) {
           /* Haswell and newer parts can handle these prim types. */
           return true;
        }
@@ -672,7 +721,7 @@ static void
  ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
                               const struct pipe_draw_info *info)
  {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
     struct pipe_draw_info *restart_info = NULL;
     int sub_prim_count = 1;
  
@@ -688,21 +737,22 @@ ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
        return;
     }
  
-   if (ilo->ib.buffer) {
+   if (vec->ib.buffer) {
        struct pipe_transfer *transfer;
        const void *map;
  
-      map = pipe_buffer_map(pipe, ilo->ib.buffer,
+      map = pipe_buffer_map(pipe, vec->ib.buffer,
              PIPE_TRANSFER_READ, &transfer);
  
-      sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.offset,
-            ilo->ib.index_size, info, restart_info);
+      sub_prim_count = ilo_find_sub_primitives(map + vec->ib.offset,
+            vec->ib.index_size, info, restart_info);
  
        pipe_buffer_unmap(pipe, transfer);
     }
     else {
-      sub_prim_count = ilo_find_sub_primitives(ilo->ib.user_buffer,
-               ilo->ib.index_size, info, restart_info);
+      sub_prim_count =
+         ilo_find_sub_primitives(vec->ib.user_buffer,
+               vec->ib.index_size, info, restart_info);
     }
  
     info = restart_info;
@@ -717,66 +767,6 @@ ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
     FREE(restart_info);
  }
  
-static bool
-upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc)
-{
-   bool incremental = true;
-   int upload;
-
-   upload = ilo_shader_cache_upload(shc,
-         NULL, hw3d->kernel.used, incremental);
-   if (!upload)
-      return true;
-
-   /*
-    * Allocate a new bo.  When this is a new batch, assume the bo is still in
-    * use by the previous batch and force allocation.
-    *
-    * Does it help to make shader cache upload with unsynchronized mapping,
-    * and remove the check for new batch here?
-    */
-   if (hw3d->kernel.used + upload > hw3d->kernel.size || hw3d->new_batch) {
-      unsigned new_size = (hw3d->kernel.size) ?
-         hw3d->kernel.size : (8 * 1024);
-
-      while (hw3d->kernel.used + upload > new_size)
-         new_size *= 2;
-
-      if (hw3d->kernel.bo)
-         intel_bo_unreference(hw3d->kernel.bo);
-
-      hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys,
-            "kernel bo", new_size, true);
-      if (!hw3d->kernel.bo) {
-         ilo_err("failed to allocate kernel bo\n");
-         return false;
-      }
-
-      hw3d->kernel.used = 0;
-      hw3d->kernel.size = new_size;
-      incremental = false;
-
-      assert(new_size >= ilo_shader_cache_upload(shc,
-            NULL, hw3d->kernel.used, incremental));
-
-      ilo_3d_pipeline_invalidate(hw3d->pipeline,
-            ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
-   }
-
-   upload = ilo_shader_cache_upload(shc,
-         hw3d->kernel.bo, hw3d->kernel.used, incremental);
-   if (upload < 0) {
-      ilo_err("failed to upload shaders\n");
-      return false;
-   }
-
-   hw3d->kernel.used += upload;
-
-   assert(hw3d->kernel.used <= hw3d->kernel.size);
-
-   return true;
-}
-
  static void
  ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
@@ -796,7 +786,7 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                 u_prim_name(info->mode), info->start, info->count);
        }
  
-      ilo_dump_dirty_flags(ilo->dirty);
+      ilo_state_vector_dump_dirty(&ilo->state_vector);
     }
  
     if (!ilo_3d_pass_render_condition(ilo))
@@ -816,21 +806,20 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  
     ilo_finalize_3d_states(ilo, info);
  
-   if (!upload_shaders(hw3d, ilo->shader_cache))
-      return;
+   ilo_shader_cache_upload(ilo->shader_cache, &hw3d->cp->builder);
  
     ilo_blit_resolve_framebuffer(ilo);
  
     /* If draw_vbo ever fails, return immediately. */
-   if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted))
+   if (!draw_vbo(hw3d, &ilo->state_vector, &prim_generated, &prim_emitted))
        return;
  
     /* clear dirty status */
-   ilo->dirty = 0x0;
+   ilo->state_vector.dirty = 0x0;
     hw3d->new_batch = false;
  
     /* avoid dangling pointer reference */
-   ilo->draw = NULL;
+   ilo->state_vector.draw = NULL;
  
     update_prim_count(hw3d, prim_generated, prim_emitted);
  
@@ -865,8 +854,8 @@ ilo_texture_barrier(struct pipe_context *pipe)
     ilo_3d_pipeline_emit_flush(hw3d->pipeline);
  
     /* don't know why */
-   if (ilo->dev->gen >= ILO_GEN(7))
-      ilo_cp_flush(hw3d->cp, "texture barrier");
+   if (ilo_dev_gen(ilo->dev) >= ILO_GEN(7))
+      ilo_cp_submit(hw3d->cp, "texture barrier");
  }
  
  static void