i965: Move buffer texture size calculation into a common helper function.

[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c

index 211fae58e9d0865659fffde6de4f0db4318a7252..b485e2cf8111d9d388da9f03de972c6bb076b5be 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -59,7 +59,7 @@
  UNUSED static void *
  emit_dwords(struct brw_context *brw, unsigned n)
  {
-   intel_batchbuffer_begin(brw, n, RENDER_RING);
+   intel_batchbuffer_begin(brw, n);
     uint32_t *map = brw->batch.map_next;
     brw->batch.map_next += n;
     intel_batchbuffer_advance(brw);
@@ -480,6 +480,65 @@ upload_format_size(uint32_t upload_format)
     }
  }
  
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+   return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache key's
+ * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
+ * of the address.  If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions.  (These collisions can happen within a single batch.)
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers.  So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround.  Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+   bool need_invalidate = true;
+   unsigned i;
+
+   for (i = 0; i < brw->vb.nr_buffers; i++) {
+      uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+      if (high_bits != brw->vb.last_bo_high_bits[i]) {
+         need_invalidate = true;
+         brw->vb.last_bo_high_bits[i] = high_bits;
+      }
+   }
+
+   /* Don't bother with draw parameter buffers - those are generated by
+    * the driver so we can select a consistent memory zone.
+    */
+
+   if (need_invalidate) {
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+   }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+   uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+   if (high_bits != brw->ib.last_bo_high_bits) {
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+      brw->ib.last_bo_high_bits = high_bits;
+   }
+#endif
+}
+
  static void
  genX(emit_vertices)(struct brw_context *brw)
  {
@@ -539,13 +598,20 @@ genX(emit_vertices)(struct brw_context *brw)
     }
  #endif
  
-   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
-                                    vs_prog_data->uses_baseinstance ||
+   const bool uses_draw_params =
+      vs_prog_data->uses_firstvertex ||
+      vs_prog_data->uses_baseinstance;
+
+   const bool uses_derived_draw_params =
+      vs_prog_data->uses_drawid ||
+      vs_prog_data->uses_is_indexed_draw;
+
+   const bool needs_sgvs_element = (uses_draw_params ||
                                      vs_prog_data->uses_instanceid ||
                                      vs_prog_data->uses_vertexid);
  
     unsigned nr_elements =
-      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
+      brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params;
  
  #if GEN_GEN < 8
     /* If any of the formats of vb.enabled needs more that one upload, we need
@@ -553,7 +619,8 @@ genX(emit_vertices)(struct brw_context *brw)
      */
     for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
        struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+      const struct gl_array_attributes *glattrib = input->glattrib;
+      uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
  
        if (uploads_needed(format, input->is_dual_slot) > 1)
           nr_elements++;
@@ -583,11 +650,10 @@ genX(emit_vertices)(struct brw_context *brw)
     }
  
     /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
-   const bool uses_draw_params =
-      vs_prog_data->uses_basevertex ||
-      vs_prog_data->uses_baseinstance;
     const unsigned nr_buffers = brw->vb.nr_buffers +
-      uses_draw_params + vs_prog_data->uses_drawid;
+      uses_draw_params + uses_derived_draw_params;
+
+   vf_invalidate_for_vb_48bit_transitions(brw);
  
     if (nr_buffers) {
        assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
@@ -621,11 +687,11 @@ genX(emit_vertices)(struct brw_context *brw)
                                               0 /* step rate */);
        }
  
-      if (vs_prog_data->uses_drawid) {
+      if (uses_derived_draw_params) {
           dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
-                                             brw->draw.draw_id_bo,
-                                             brw->draw.draw_id_offset,
-                                             brw->draw.draw_id_bo->size,
+                                             brw->draw.derived_draw_params_bo,
+                                             brw->draw.derived_draw_params_offset,
+                                             brw->draw.derived_draw_params_bo->size,
                                               0 /* stride */,
                                               0 /* step rate */);
        }
@@ -646,7 +712,8 @@ genX(emit_vertices)(struct brw_context *brw)
     unsigned i;
     for (i = 0; i < brw->vb.nr_enabled; i++) {
        const struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+      const struct gl_array_attributes *glattrib = input->glattrib;
+      uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
        uint32_t comp0 = VFCOMP_STORE_SRC;
        uint32_t comp1 = VFCOMP_STORE_SRC;
        uint32_t comp2 = VFCOMP_STORE_SRC;
@@ -687,17 +754,18 @@ genX(emit_vertices)(struct brw_context *brw)
            * entry. */
           const unsigned offset = input->offset + c * 16;
  
+         const struct gl_array_attributes *glattrib = input->glattrib;
           const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
-            upload_format_size(upload_format) : input->glarray->Size;
+            upload_format_size(upload_format) : glattrib->Size;
  
           switch (size) {
              case 0: comp0 = VFCOMP_STORE_0;
              case 1: comp1 = VFCOMP_STORE_0;
              case 2: comp2 = VFCOMP_STORE_0;
              case 3:
-               if (GEN_GEN >= 8 && input->glarray->Doubles) {
+               if (GEN_GEN >= 8 && glattrib->Doubles) {
                    comp3 = VFCOMP_STORE_0;
-               } else if (input->glarray->Integer) {
+               } else if (glattrib->Integer) {
                    comp3 = VFCOMP_STORE_1_INT;
                 } else {
                    comp3 = VFCOMP_STORE_1_FP;
@@ -722,7 +790,7 @@ genX(emit_vertices)(struct brw_context *brw)
            *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit
            *     vertex element."
            */
-         if (input->glarray->Doubles && !input->is_dual_slot) {
+         if (glattrib->Doubles && !input->is_dual_slot) {
              /* Store vertex elements which correspond to double and dvec2 vertex
               * shader inputs as 128-bit vertex elements, instead of 256-bits.
               */
@@ -763,8 +831,7 @@ genX(emit_vertices)(struct brw_context *brw)
        };
  
  #if GEN_GEN >= 8
-      if (vs_prog_data->uses_basevertex ||
-          vs_prog_data->uses_baseinstance) {
+      if (uses_draw_params) {
           elem_state.VertexBufferIndex = brw->vb.nr_buffers;
           elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
           elem_state.Component0Control = VFCOMP_STORE_SRC;
@@ -773,11 +840,10 @@ genX(emit_vertices)(struct brw_context *brw)
  #else
        elem_state.VertexBufferIndex = brw->vb.nr_buffers;
        elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
-      if (vs_prog_data->uses_basevertex)
+      if (uses_draw_params) {
           elem_state.Component0Control = VFCOMP_STORE_SRC;
-
-      if (vs_prog_data->uses_baseinstance)
           elem_state.Component1Control = VFCOMP_STORE_SRC;
+      }
  
        if (vs_prog_data->uses_vertexid)
           elem_state.Component2Control = VFCOMP_STORE_VID;
@@ -790,13 +856,13 @@ genX(emit_vertices)(struct brw_context *brw)
        dw += GENX(VERTEX_ELEMENT_STATE_length);
     }
  
-   if (vs_prog_data->uses_drawid) {
+   if (uses_derived_draw_params) {
        struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
           .Valid = true,
           .VertexBufferIndex = brw->vb.nr_buffers + 1,
-         .SourceElementFormat = ISL_FORMAT_R32_UINT,
+         .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
           .Component0Control = VFCOMP_STORE_SRC,
-         .Component1Control = VFCOMP_STORE_0,
+         .Component1Control = VFCOMP_STORE_SRC,
           .Component2Control = VFCOMP_STORE_0,
           .Component3Control = VFCOMP_STORE_0,
  #if GEN_GEN < 5
@@ -810,8 +876,8 @@ genX(emit_vertices)(struct brw_context *brw)
  
  #if GEN_GEN >= 6
     if (gen6_edgeflag_input) {
-      const uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+      const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib;
+      const uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
  
        struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
           .Valid = true,
@@ -881,6 +947,8 @@ genX(emit_index_buffer)(struct brw_context *brw)
     if (index_buffer == NULL)
        return;
  
+   vf_invalidate_for_ib_48bit_transition(brw);
+
     brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
  #if GEN_GEN < 8 && !GEN_IS_HASWELL
        ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
@@ -5594,7 +5662,7 @@ genX(init_atoms)(struct brw_context *brw)
  
        &genX(scissor_state),
  
-      &gen7_depthbuffer,
+      &brw_depthbuffer,
  
        &genX(polygon_stipple),
        &genX(polygon_stipple_offset),
@@ -5685,7 +5753,7 @@ genX(init_atoms)(struct brw_context *brw)
  
        &genX(scissor_state),
  
-      &gen7_depthbuffer,
+      &brw_depthbuffer,
  
        &genX(polygon_stipple),
        &genX(polygon_stipple_offset),