#define __gen_address_offset anv_address_add
#include "common/gen_mi_builder.h"
+static void genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t pipeline);
+
static void
emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
{
genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
+ UNUSED const struct gen_device_info *devinfo = &device->info;
uint32_t mocs = device->isl_dev.mocs.internal;
/* If we are emitting a new state base address we probably need to re-emit
pc.CommandStreamerStallEnable = true;
#if GEN_GEN >= 12
pc.TileCacheFlushEnable = true;
+#endif
+#if GEN_GEN == 12
+ /* GEN:BUG:1606662791:
+ *
+ * Software must program PIPE_CONTROL command with "HDC Pipeline
+ * Flush" prior to programming of the below two non-pipeline state :
+ * * STATE_BASE_ADDRESS
+ * * 3DSTATE_BINDING_TABLE_POOL_ALLOC
+ */
+ if (devinfo->revision == 0 /* A0 */)
+ pc.HDCPipelineFlushEnable = true;
#endif
}
+#if GEN_GEN == 12
+ /* GEN:BUG:1607854226:
+ *
+ * Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline
+ * mode by putting the pipeline temporarily in 3D mode.
+ */
+ uint32_t gen12_wa_pipeline = cmd_buffer->state.current_pipeline;
+ genX(flush_pipeline_select_3d)(cmd_buffer);
+#endif
+
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateMOCS = mocs;
# endif
}
+#if GEN_GEN == 12
+ /* GEN:BUG:1607854226:
+ *
+ * Put the pipeline back into its current mode.
+ */
+ if (gen12_wa_pipeline != UINT32_MAX)
+ genX(flush_pipeline_select)(cmd_buffer, gen12_wa_pipeline);
+#endif
+
/* After re-setting the surface state base address, we have to do some
* cache flusing so that the sampler engine will pick up the new
* SURFACE_STATE objects and binding tables. From the Broadwell PRM,
if (base_layer >= anv_image_aux_layers(image, aspect, base_level))
return;
- assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+ assert(image->planes[plane].surface.isl.tiling != ISL_TILING_LINEAR);
if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
* executing anything. The chances are fairly high that they will use
* blorp at least once per primary command buffer so it shouldn't be
* wasted.
+ *
+ * There is also a workaround on gen8 which requires us to invalidate the
+ * VF cache occasionally. It's easier if we can assume we start with a
+ * fresh cache (See also genX(cmd_buffer_set_binding_for_gen8_vb_flush).)
*/
- if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
- cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
/* We send an "Indirect State Pointers Disable" packet at
* EndCommandBuffer, so all push contant packets are ignored during a
anv_cmd_buffer_add_secondary(primary, secondary);
}
+ /* The secondary isn't counted in our VF cache tracking so we need to
+ * invalidate the whole thing.
+ */
+ if (GEN_GEN >= 8 && GEN_GEN <= 9) {
+ primary->state.pending_pipe_bits |=
+ ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ }
+
/* The secondary may have selected a different pipeline (3D or compute) and
* may have changed the current L3$ configuration. Reset our tracking
* variables to invalid values to ensure that we re-emit these in the case
bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
}
+ if ((GEN_GEN >= 8 && GEN_GEN <= 9) &&
+ (bits & ANV_PIPE_CS_STALL_BIT) &&
+ (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) {
+ /* If we are doing a VF cache invalidate AND a CS stall (it must be
+ * both) then we can reset our vertex cache tracking.
+ */
+ memset(cmd_buffer->state.gfx.vb_dirty_ranges, 0,
+ sizeof(cmd_buffer->state.gfx.vb_dirty_ranges));
+ memset(&cmd_buffer->state.gfx.ib_dirty_range, 0,
+ sizeof(cmd_buffer->state.gfx.ib_dirty_range));
+ }
+
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
#if GEN_GEN >= 12
pipe.RenderTargetCacheFlushEnable =
bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ /* GEN:BUG:1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
+ * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
+ */
+#if GEN_GEN >= 12
+ pipe.DepthStallEnable =
+ pipe.DepthCacheFlushEnable || (bits & ANV_PIPE_DEPTH_STALL_BIT);
+#else
pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT;
+#endif
+
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
}
static uint32_t
-flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
+flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_pipeline *pipeline)
{
- struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
-
VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
pipeline->active_stages;
}
}
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
static struct anv_address
get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
const struct anv_push_range *range)
{
-#if GEN_GEN >= 8 || GEN_IS_HASWELL
const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
switch (range->set) {
case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
}
}
}
-#else
- /* For Ivy Bridge, push constants are relative to dynamic state
- * base address and we only ever push actual push constants.
- */
- assert(range->length > 0);
- assert(range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
- struct anv_state state =
- anv_cmd_buffer_push_constants(cmd_buffer, stage);
- return (struct anv_address) {
- .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
- .offset = state.offset,
- };
-#endif
}
+#endif
static void
cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
/* The Skylake PRM contains the following restriction:
*
* "The driver must ensure The following case does not occur
c.ConstantBody.Buffer[i + shift] =
anv_address_add(addr, range->start * 32);
}
+#else
+ /* For Ivy Bridge, push constants are relative to dynamic state
+ * base address and we only ever push actual push constants.
+ */
+ if (bind_map->push_ranges[0].length > 0) {
+ assert(bind_map->push_ranges[0].set ==
+ ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
+ struct anv_state state =
+ anv_cmd_buffer_push_constants(cmd_buffer, stage);
+ c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
+ c.ConstantBody.Buffer[0].bo = NULL;
+ c.ConstantBody.Buffer[0].offset = state.offset;
+ }
+ assert(bind_map->push_ranges[1].length == 0);
+ assert(bind_map->push_ranges[2].length == 0);
+ assert(bind_map->push_ranges[3].length == 0);
+#endif
}
}
}
#endif
};
+#if GEN_GEN >= 8 && GEN_GEN <= 9
+ genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer, vb,
+ state.BufferStartingAddress,
+ state.BufferSize);
+#endif
+
GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state);
i++;
}
*/
uint32_t dirty = 0;
if (cmd_buffer->state.descriptors_dirty)
- dirty = flush_descriptor_sets(cmd_buffer);
+ dirty = flush_descriptor_sets(cmd_buffer, pipeline);
if (dirty || cmd_buffer->state.push_constants_dirty) {
/* Because we're pushing UBOs, we have to push whenever either
gen7_cmd_buffer_emit_scissor(cmd_buffer);
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
-
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
static void
.EndAddress = anv_address_add(addr, size),
#endif
});
+
+ genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer,
+ index, addr, size);
}
static void
emit_vertex_bo(cmd_buffer, addr, 4, ANV_DRAWID_VB_INDEX);
}
+static void
+update_dirty_vbs_for_gen8_vb_flush(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t access_type)
+{
+ struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ uint64_t vb_used = pipeline->vb_used;
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ vb_used |= 1ull << ANV_SVGS_VB_INDEX;
+ if (vs_prog_data->uses_drawid)
+ vb_used |= 1ull << ANV_DRAWID_VB_INDEX;
+
+ genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(cmd_buffer,
+ access_type == RANDOM,
+ vb_used);
+}
+
void genX(CmdDraw)(
VkCommandBuffer commandBuffer,
uint32_t vertexCount,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = 0;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
}
void genX(CmdDrawIndexed)(
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = vertexOffset;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
}
/* Auto-Draw / Indirect Registers */
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = pipeline->topology;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
}
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, false);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
+
offset += stride;
}
}
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
+
offset += stride;
}
}
}
#endif
-void genX(CmdDrawIndirectCountKHR)(
+void genX(CmdDrawIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, false);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
+
offset += stride;
}
}
-void genX(CmdDrawIndexedIndirectCountKHR)(
+void genX(CmdDrawIndexedIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
+
offset += stride;
}
}
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
-static VkResult
-flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
- struct anv_state surfaces = { 0, }, samplers = { 0, };
- VkResult result;
-
- result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS) {
- assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
-
- result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
- if (result != VK_SUCCESS)
- return result;
-
- /* Re-emit state base addresses so we get the new surface state base
- * address before we start emitting binding tables etc.
- */
- genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
-
- result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS) {
- anv_batch_set_error(&cmd_buffer->batch, result);
- return result;
- }
- }
-
- result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS) {
- anv_batch_set_error(&cmd_buffer->batch, result);
- return result;
- }
-
- uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
- struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
- .BindingTablePointer = surfaces.offset,
- .SamplerStatePointer = samplers.offset,
- };
- GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, iface_desc_data_dw, &desc);
-
- struct anv_state state =
- anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
- pipeline->interface_descriptor_data,
- GENX(INTERFACE_DESCRIPTOR_DATA_length),
- 64);
-
- uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
- anv_batch_emit(&cmd_buffer->batch,
- GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {
- mid.InterfaceDescriptorTotalLength = size;
- mid.InterfaceDescriptorDataStartAddress = state.offset;
- }
-
- return VK_SUCCESS;
-}
-
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
- VkResult result;
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
- /* FIXME: figure out descriptors for gen7 */
- result = flush_compute_descriptor_set(cmd_buffer);
- if (result != VK_SUCCESS)
- return;
+ flush_descriptor_sets(cmd_buffer, pipeline);
+
+ uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
+ struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
+ .BindingTablePointer =
+ cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
+ .SamplerStatePointer =
+ cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
+ };
+ GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, iface_desc_data_dw, &desc);
+
+ struct anv_state state =
+ anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
+ pipeline->interface_descriptor_data,
+ GENX(INTERFACE_DESCRIPTOR_DATA_length),
+ 64);
- cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
+ uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {
+ mid.InterfaceDescriptorTotalLength = size;
+ mid.InterfaceDescriptorDataStartAddress = state.offset;
+ }
}
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = state.offset,
};
+
+ /* The num_workgroups buffer goes in the binding table */
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
return;
#endif
- if (prog_data->uses_num_work_groups)
+ if (prog_data->uses_num_work_groups) {
cmd_buffer->state.compute.num_workgroups = addr;
+ /* The num_workgroups buffer goes in the binding table */
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
struct gen_mi_builder b;
vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2;
}
+
+ /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is
+ * invalid. Set the compute pipeline to dirty to force a re-emit of the
+ * pipeline in case we get back-to-back dispatch calls with the same
+ * pipeline and a PIPELINE_SELECT in between.
+ */
+ cmd_buffer->state.compute.pipeline_dirty = true;
}
#endif
pc.CommandStreamerStallEnable = true;
#if GEN_GEN >= 12
pc.TileCacheFlushEnable = true;
+
+ /* GEN:BUG:1409600907: "PIPE_CONTROL with Depth Stall Enable bit must be
+ * set with any PIPE_CONTROL with Depth Flush Enable bit set.
+ */
+ pc.DepthStallEnable = true;
#endif
}
}
}
+/* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
+ *
+ * "The VF cache needs to be invalidated before binding and then using
+ * Vertex Buffers that overlap with any previously bound Vertex Buffer
+ * (at a 64B granularity) since the last invalidation. A VF cache
+ * invalidate is performed by setting the "VF Cache Invalidation Enable"
+ * bit in PIPE_CONTROL."
+ *
+ * This is implemented by carefully tracking all vertex and index buffer
+ * bindings and flushing if the cache ever ends up with a range in the cache
+ * that would exceed 4 GiB. This is implemented in three parts:
+ *
+ * 1. genX(cmd_buffer_set_binding_for_gen8_vb_flush)() which must be called
+ * every time a 3DSTATE_VERTEX_BUFFER packet is emitted and informs the
+ * tracking code of the new binding. If this new binding would cause
+ * the cache to have a too-large range on the next draw call, a pipeline
+ * stall and VF cache invalidate are added to pending_pipeline_bits.
+ *
+ * 2. genX(cmd_buffer_apply_pipe_flushes)() resets the cache tracking to
+ * empty whenever we emit a VF invalidate.
+ *
+ * 3. genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)() must be called
+ * after every 3DPRIMITIVE and copies the bound range into the dirty
+ * range for each used buffer. This has to be a separate step because
+ * we don't always re-bind all buffers and so 1. can't know which
+ * buffers are actually bound.
+ */
+void
+genX(cmd_buffer_set_binding_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+ int vb_index,
+ struct anv_address vb_address,
+ uint32_t vb_size)
+{
+ if (GEN_GEN < 8 || GEN_GEN > 9 ||
+ !cmd_buffer->device->instance->physicalDevice.use_softpin)
+ return;
+
+ struct anv_vb_cache_range *bound, *dirty;
+ if (vb_index == -1) {
+ bound = &cmd_buffer->state.gfx.ib_bound_range;
+ dirty = &cmd_buffer->state.gfx.ib_dirty_range;
+ } else {
+ assert(vb_index >= 0);
+ assert(vb_index < ARRAY_SIZE(cmd_buffer->state.gfx.vb_bound_ranges));
+ assert(vb_index < ARRAY_SIZE(cmd_buffer->state.gfx.vb_dirty_ranges));
+ bound = &cmd_buffer->state.gfx.vb_bound_ranges[vb_index];
+ dirty = &cmd_buffer->state.gfx.vb_dirty_ranges[vb_index];
+ }
+
+ if (vb_size == 0) {
+ bound->start = 0;
+ bound->end = 0;
+ return;
+ }
+
+ assert(vb_address.bo && (vb_address.bo->flags & EXEC_OBJECT_PINNED));
+ bound->start = gen_48b_address(anv_address_physical(vb_address));
+ bound->end = bound->start + vb_size;
+ assert(bound->end > bound->start); /* No overflow */
+
+ /* Align everything to a cache line */
+ bound->start &= ~(64ull - 1ull);
+ bound->end = align_u64(bound->end, 64);
+
+ /* Compute the dirty range */
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+
+ /* If our range is larger than 32 bits, we have to flush */
+ assert(bound->end - bound->start <= (1ull << 32));
+ if (dirty->end - dirty->start > (1ull << 32)) {
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ }
+}
+
+void
+genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t access_type,
+ uint64_t vb_used)
+{
+ if (GEN_GEN < 8 || GEN_GEN > 9 ||
+ !cmd_buffer->device->instance->physicalDevice.use_softpin)
+ return;
+
+ if (access_type == RANDOM) {
+ /* We have an index buffer */
+ struct anv_vb_cache_range *bound = &cmd_buffer->state.gfx.ib_bound_range;
+ struct anv_vb_cache_range *dirty = &cmd_buffer->state.gfx.ib_dirty_range;
+
+ if (bound->end > bound->start) {
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+ }
+ }
+
+ uint64_t mask = vb_used;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ assert(i >= 0);
+ assert(i < ARRAY_SIZE(cmd_buffer->state.gfx.vb_bound_ranges));
+ assert(i < ARRAY_SIZE(cmd_buffer->state.gfx.vb_dirty_ranges));
+
+ struct anv_vb_cache_range *bound, *dirty;
+ bound = &cmd_buffer->state.gfx.vb_bound_ranges[i];
+ dirty = &cmd_buffer->state.gfx.vb_dirty_ranges[i];
+
+ if (bound->end > bound->start) {
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+ }
+ }
+}
+
/**
* Update the pixel hashing modes that determine the balancing of PS threads
* across subslices and slices.
cmd_buffer_begin_subpass(cmd_buffer, 0);
}
-void genX(CmdBeginRenderPass2KHR)(
+void genX(CmdBeginRenderPass2)(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBeginInfo,
const VkSubpassBeginInfoKHR* pSubpassBeginInfo)
cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
-void genX(CmdNextSubpass2KHR)(
+void genX(CmdNextSubpass2)(
VkCommandBuffer commandBuffer,
const VkSubpassBeginInfoKHR* pSubpassBeginInfo,
const VkSubpassEndInfoKHR* pSubpassEndInfo)
cmd_buffer->state.subpass = NULL;
}
-void genX(CmdEndRenderPass2KHR)(
+void genX(CmdEndRenderPass2)(
VkCommandBuffer commandBuffer,
const VkSubpassEndInfoKHR* pSubpassEndInfo)
{