UNUSED static void *
emit_dwords(struct brw_context *brw, unsigned n)
{
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
}
}
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+ return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache key's
+ * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
+ * of the address. If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions. (These collisions can happen within a single batch.)
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers. So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround. Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ bool need_invalidate = true;
+ unsigned i;
+
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+ if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
+ }
+ }
+
+ /* Don't bother with draw parameter buffers - those are generated by
+ * the driver so we can select a consistent memory zone.
+ */
+
+ if (need_invalidate) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+ if (high_bits != brw->ib.last_bo_high_bits) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw->ib.last_bo_high_bits = high_bits;
+ }
+#endif
+}
+
static void
genX(emit_vertices)(struct brw_context *brw)
{
}
#endif
- const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance ||
+ const bool uses_draw_params =
+ vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance;
+
+ const bool uses_derived_draw_params =
+ vs_prog_data->uses_drawid ||
+ vs_prog_data->uses_is_indexed_draw;
+
+ const bool needs_sgvs_element = (uses_draw_params ||
vs_prog_data->uses_instanceid ||
vs_prog_data->uses_vertexid);
unsigned nr_elements =
- brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
+ brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params;
#if GEN_GEN < 8
/* If any of the formats of vb.enabled needs more that one upload, we need
*/
for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+ const struct gl_array_attributes *glattrib = input->glattrib;
+ uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
if (uploads_needed(format, input->is_dual_slot) > 1)
nr_elements++;
}
/* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
- const bool uses_draw_params =
- vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance;
const unsigned nr_buffers = brw->vb.nr_buffers +
- uses_draw_params + vs_prog_data->uses_drawid;
+ uses_draw_params + uses_derived_draw_params;
+
+ vf_invalidate_for_vb_48bit_transitions(brw);
if (nr_buffers) {
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
0 /* step rate */);
}
- if (vs_prog_data->uses_drawid) {
+ if (uses_derived_draw_params) {
dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
- brw->draw.draw_id_bo,
- brw->draw.draw_id_offset,
- brw->draw.draw_id_bo->size,
+ brw->draw.derived_draw_params_bo,
+ brw->draw.derived_draw_params_offset,
+ brw->draw.derived_draw_params_bo->size,
0 /* stride */,
0 /* step rate */);
}
unsigned i;
for (i = 0; i < brw->vb.nr_enabled; i++) {
const struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+ const struct gl_array_attributes *glattrib = input->glattrib;
+ uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
uint32_t comp0 = VFCOMP_STORE_SRC;
uint32_t comp1 = VFCOMP_STORE_SRC;
uint32_t comp2 = VFCOMP_STORE_SRC;
* entry. */
const unsigned offset = input->offset + c * 16;
+ const struct gl_array_attributes *glattrib = input->glattrib;
const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
- upload_format_size(upload_format) : input->glarray->Size;
+ upload_format_size(upload_format) : glattrib->Size;
switch (size) {
case 0: comp0 = VFCOMP_STORE_0;
case 1: comp1 = VFCOMP_STORE_0;
case 2: comp2 = VFCOMP_STORE_0;
case 3:
- if (GEN_GEN >= 8 && input->glarray->Doubles) {
+ if (GEN_GEN >= 8 && glattrib->Doubles) {
comp3 = VFCOMP_STORE_0;
- } else if (input->glarray->Integer) {
+ } else if (glattrib->Integer) {
comp3 = VFCOMP_STORE_1_INT;
} else {
comp3 = VFCOMP_STORE_1_FP;
* to be specified as VFCOMP_STORE_0 in order to output a 256-bit
* vertex element."
*/
- if (input->glarray->Doubles && !input->is_dual_slot) {
+ if (glattrib->Doubles && !input->is_dual_slot) {
/* Store vertex elements which correspond to double and dvec2 vertex
* shader inputs as 128-bit vertex elements, instead of 256-bits.
*/
};
#if GEN_GEN >= 8
- if (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance) {
+ if (uses_draw_params) {
elem_state.VertexBufferIndex = brw->vb.nr_buffers;
elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
elem_state.Component0Control = VFCOMP_STORE_SRC;
#else
elem_state.VertexBufferIndex = brw->vb.nr_buffers;
elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
- if (vs_prog_data->uses_basevertex)
+ if (uses_draw_params) {
elem_state.Component0Control = VFCOMP_STORE_SRC;
-
- if (vs_prog_data->uses_baseinstance)
elem_state.Component1Control = VFCOMP_STORE_SRC;
+ }
if (vs_prog_data->uses_vertexid)
elem_state.Component2Control = VFCOMP_STORE_VID;
dw += GENX(VERTEX_ELEMENT_STATE_length);
}
- if (vs_prog_data->uses_drawid) {
+ if (uses_derived_draw_params) {
struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
.Valid = true,
.VertexBufferIndex = brw->vb.nr_buffers + 1,
- .SourceElementFormat = ISL_FORMAT_R32_UINT,
+ .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
.Component0Control = VFCOMP_STORE_SRC,
- .Component1Control = VFCOMP_STORE_0,
+ .Component1Control = VFCOMP_STORE_SRC,
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
#if GEN_GEN < 5
#if GEN_GEN >= 6
if (gen6_edgeflag_input) {
- const uint32_t format =
- brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+ const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib;
+ const uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
.Valid = true,
if (index_buffer == NULL)
return;
+ vf_invalidate_for_ib_48bit_transition(brw);
+
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
#if GEN_GEN < 8 && !GEN_IS_HASWELL
ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
&genX(scissor_state),
- &gen7_depthbuffer,
+ &brw_depthbuffer,
&genX(polygon_stipple),
&genX(polygon_stipple_offset),
&genX(scissor_state),
- &gen7_depthbuffer,
+ &brw_depthbuffer,
&genX(polygon_stipple),
&genX(polygon_stipple_offset),