#define __gen_address_offset anv_address_add
#include "common/gen_mi_builder.h"
+static void genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t pipeline);
+
static void
emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
{
genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
+ UNUSED const struct gen_device_info *devinfo = &device->info;
uint32_t mocs = device->isl_dev.mocs.internal;
/* If we are emitting a new state base address we probably need to re-emit
pc.CommandStreamerStallEnable = true;
#if GEN_GEN >= 12
pc.TileCacheFlushEnable = true;
+#endif
+#if GEN_GEN == 12
+ /* GEN:BUG:1606662791:
+ *
+ * Software must program PIPE_CONTROL command with "HDC Pipeline
+ * Flush" prior to programming of the below two non-pipeline state :
+ * * STATE_BASE_ADDRESS
+ * * 3DSTATE_BINDING_TABLE_POOL_ALLOC
+ */
+ if (devinfo->revision == 0 /* A0 */)
+ pc.HDCPipelineFlushEnable = true;
#endif
}
+#if GEN_GEN == 12
+ /* GEN:BUG:1607854226:
+ *
+ * Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline
+ * mode by putting the pipeline temporarily in 3D mode.
+ */
+ uint32_t gen12_wa_pipeline = cmd_buffer->state.current_pipeline;
+ genX(flush_pipeline_select_3d)(cmd_buffer);
+#endif
+
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateMOCS = mocs;
# endif
}
+#if GEN_GEN == 12
+ /* GEN:BUG:1607854226:
+ *
+ * Put the pipeline back into its current mode.
+ */
+ if (gen12_wa_pipeline != UINT32_MAX)
+ genX(flush_pipeline_select)(cmd_buffer, gen12_wa_pipeline);
+#endif
+
/* After re-setting the surface state base address, we have to do some
* cache flusing so that the sampler engine will pick up the new
* SURFACE_STATE objects and binding tables. From the Broadwell PRM,
if (base_layer >= anv_image_aux_layers(image, aspect, base_level))
return;
- assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+ assert(image->planes[plane].surface.isl.tiling != ISL_TILING_LINEAR);
if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
* executing anything. The chances are fairly high that they will use
* blorp at least once per primary command buffer so it shouldn't be
* wasted.
+ *
+ * There is also a workaround on gen8 which requires us to invalidate the
+ * VF cache occasionally. It's easier if we can assume we start with a
+ * fresh cache (See also genX(cmd_buffer_set_binding_for_gen8_vb_flush).)
*/
- if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
- cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
/* We send an "Indirect State Pointers Disable" packet at
* EndCommandBuffer, so all push contant packets are ignored during a
anv_cmd_buffer_add_secondary(primary, secondary);
}
+ /* The secondary isn't counted in our VF cache tracking so we need to
+ * invalidate the whole thing.
+ */
+ if (GEN_GEN >= 8 && GEN_GEN <= 9) {
+ primary->state.pending_pipe_bits |=
+ ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ }
+
/* The secondary may have selected a different pipeline (3D or compute) and
* may have changed the current L3$ configuration. Reset our tracking
* variables to invalid values to ensure that we re-emit these in the case
{
enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
+ if (cmd_buffer->device->instance->physicalDevice.always_flush_cache)
+ bits |= ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
+
/* Flushes are pipelined while invalidations are handled immediately.
* Therefore, if we're flushing anything then we need to schedule a stall
* before any invalidations can happen.
bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
}
+ if ((GEN_GEN >= 8 && GEN_GEN <= 9) &&
+ (bits & ANV_PIPE_CS_STALL_BIT) &&
+ (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) {
+ /* If we are doing a VF cache invalidate AND a CS stall (it must be
+ * both) then we can reset our vertex cache tracking.
+ */
+ memset(cmd_buffer->state.gfx.vb_dirty_ranges, 0,
+ sizeof(cmd_buffer->state.gfx.vb_dirty_ranges));
+ memset(&cmd_buffer->state.gfx.ib_dirty_range, 0,
+ sizeof(cmd_buffer->state.gfx.ib_dirty_range));
+ }
+
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
#if GEN_GEN >= 12
pipe.RenderTargetCacheFlushEnable =
bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ /* GEN:BUG:1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
+ * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
+ */
+#if GEN_GEN >= 12
+ pipe.DepthStallEnable =
+ pipe.DepthCacheFlushEnable || (bits & ANV_PIPE_DEPTH_STALL_BIT);
+#else
pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT;
+#endif
+
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
return VK_SUCCESS;
}
- struct anv_shader_bin *bin = pipeline->shaders[stage];
- struct anv_pipeline_bind_map *map = &bin->bind_map;
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->surface_count == 0) {
*bt_state = (struct anv_state) { 0, };
return VK_SUCCESS;
struct anv_state surface_state;
- if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
+ switch (binding->set) {
+ case ANV_DESCRIPTOR_SET_NULL:
+ bt_map[s] = 0;
+ break;
+
+ case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
/* Color attachment binding */
assert(stage == MESA_SHADER_FRAGMENT);
if (binding->index < subpass->color_count) {
}
bt_map[s] = surface_state.offset + state_offset;
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
+ break;
+
+ case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: {
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
bt_map[s] = surface_state.offset + state_offset;
add_surface_reloc(cmd_buffer, surface_state, constant_data);
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS) {
+ break;
+ }
+
+ case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
/* This is always the first binding for compute shaders */
assert(stage == MESA_SHADER_COMPUTE && s == 0);
- if (!get_cs_prog_data(pipeline)->uses_num_work_groups)
- continue;
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
add_surface_reloc(cmd_buffer, surface_state,
cmd_buffer->state.compute.num_workgroups);
}
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
+ break;
+ }
+
+ case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
/* This is a descriptor set buffer so the set index is actually
* given by binding->binding. (Yes, that's confusing.)
*/
bt_map[s] = set->desc_surface_state.offset + state_offset;
add_surface_reloc(cmd_buffer, set->desc_surface_state,
anv_descriptor_set_address(cmd_buffer, set));
- continue;
+ break;
}
- const struct anv_descriptor *desc =
- &pipe_state->descriptors[binding->set]->descriptors[binding->index];
+ default: {
+ assert(binding->set < MAX_SETS);
+ const struct anv_descriptor *desc =
+ &pipe_state->descriptors[binding->set]->descriptors[binding->index];
- switch (desc->type) {
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- /* Nothing for us to do here */
- continue;
+ switch (desc->type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ /* Nothing for us to do here */
+ continue;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
- struct anv_surface_state sstate =
- (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
- desc->image_view->planes[binding->plane].general_sampler_surface_state :
- desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
- break;
- }
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- assert(stage == MESA_SHADER_FRAGMENT);
- if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
- /* For depth and stencil input attachments, we treat it like any
- * old texture that a user may have bound.
- */
- assert(desc->image_view->n_planes == 1);
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
struct anv_surface_state sstate =
(desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
- desc->image_view->planes[0].general_sampler_surface_state :
- desc->image_view->planes[0].optimal_sampler_surface_state;
+ desc->image_view->planes[binding->plane].general_sampler_surface_state :
+ desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
surface_state = sstate.state;
assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate);
- } else {
- /* For color input attachments, we create the surface state at
- * vkBeginRenderPass time so that we can include aux and clear
- * color information.
- */
- assert(binding->input_attachment_index < subpass->input_count);
- const unsigned subpass_att = binding->input_attachment_index;
- const unsigned att = subpass->input_attachments[subpass_att].attachment;
- surface_state = cmd_buffer->state.attachments[att].input.state;
+ break;
}
- break;
-
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
- struct anv_surface_state sstate = (binding->write_only)
- ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
- : desc->image_view->planes[binding->plane].storage_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
- break;
- }
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ assert(stage == MESA_SHADER_FRAGMENT);
+ if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
+ /* For depth and stencil input attachments, we treat it like any
+ * old texture that a user may have bound.
+ */
+ assert(desc->image_view->n_planes == 1);
+ struct anv_surface_state sstate =
+ (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
+ desc->image_view->planes[0].general_sampler_surface_state :
+ desc->image_view->planes[0].optimal_sampler_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ } else {
+ /* For color input attachments, we create the surface state at
+ * vkBeginRenderPass time so that we can include aux and clear
+ * color information.
+ */
+ assert(binding->input_attachment_index < subpass->input_count);
+ const unsigned subpass_att = binding->input_attachment_index;
+ const unsigned att = subpass->input_attachments[subpass_att].attachment;
+ surface_state = cmd_buffer->state.attachments[att].input.state;
+ }
+ break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- surface_state = desc->buffer_view->surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
+ struct anv_surface_state sstate = (binding->write_only)
+ ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
+ : desc->image_view->planes[binding->plane].storage_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ break;
}
- break;
-
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- /* If the shader never does any UBO pulls (this is a fairly common
- * case) then we don't need to fill out those binding table entries.
- * The real cost savings here is that we don't have to build the
- * surface state for them which is surprisingly expensive when it's
- * on the hot-path.
- */
- if (!bin->prog_data->has_ubo_pull)
- continue;
- /* Fall through */
-
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- /* Compute the offset within the buffer */
- struct anv_push_constants *push =
- &cmd_buffer->state.push_constants[stage];
-
- uint32_t dynamic_offset =
- push->dynamic_offsets[binding->dynamic_offset_index];
- uint64_t offset = desc->offset + dynamic_offset;
- /* Clamp to the buffer size */
- offset = MIN2(offset, desc->buffer->size);
- /* Clamp the range to the buffer size */
- uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
-
- struct anv_address address =
- anv_address_add(desc->buffer->address, offset);
- surface_state =
- anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
- enum isl_format format =
- anv_isl_format_for_descriptor_type(desc->type);
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ surface_state = desc->buffer_view->surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ /* Compute the offset within the buffer */
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[binding->dynamic_offset_index];
+ uint64_t offset = desc->offset + dynamic_offset;
+ /* Clamp to the buffer size */
+ offset = MIN2(offset, desc->buffer->size);
+ /* Clamp the range to the buffer size */
+ uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
+
+ struct anv_address address =
+ anv_address_add(desc->buffer->address, offset);
+
+ surface_state =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+ enum isl_format format =
+ anv_isl_format_for_descriptor_type(desc->type);
+
+ anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+ format, address, range, 1);
+ if (need_client_mem_relocs)
+ add_surface_reloc(cmd_buffer, surface_state, address);
+ break;
+ }
- anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
- format, address, range, 1);
- if (need_client_mem_relocs)
- add_surface_reloc(cmd_buffer, surface_state, address);
- break;
- }
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ surface_state = (binding->write_only)
+ ? desc->buffer_view->writeonly_storage_surface_state
+ : desc->buffer_view->storage_surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ break;
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- surface_state = (binding->write_only)
- ? desc->buffer_view->writeonly_storage_surface_state
- : desc->buffer_view->storage_surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ default:
+ assert(!"Invalid descriptor type");
+ continue;
}
+ bt_map[s] = surface_state.offset + state_offset;
break;
-
- default:
- assert(!"Invalid descriptor type");
- continue;
}
-
- bt_map[s] = surface_state.offset + state_offset;
+ }
}
return VK_SUCCESS;
}
static uint32_t
-flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
+flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_pipeline *pipeline)
{
- struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
-
VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
pipeline->active_stages;
}
}
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+static struct anv_address
+get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage,
+ const struct anv_push_range *range)
+{
+ const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+ switch (range->set) {
+ case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
+ /* This is a descriptor set buffer so the set index is
+ * actually given by binding->binding. (Yes, that's
+ * confusing.)
+ */
+ struct anv_descriptor_set *set =
+ gfx_state->base.descriptors[range->index];
+ return anv_descriptor_set_address(cmd_buffer, set);
+ break;
+ }
+
+ case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
+ struct anv_state state =
+ anv_cmd_buffer_push_constants(cmd_buffer, stage);
+ return (struct anv_address) {
+ .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+ .offset = state.offset,
+ };
+ break;
+ }
+
+ default: {
+ assert(range->set < MAX_SETS);
+ struct anv_descriptor_set *set =
+ gfx_state->base.descriptors[range->set];
+ const struct anv_descriptor *desc =
+ &set->descriptors[range->index];
+
+ if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ return desc->buffer_view->address;
+ } else {
+ assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[range->dynamic_offset_index];
+ return anv_address_add(desc->buffer->address,
+ desc->offset + dynamic_offset);
+ }
+ }
+ }
+}
+#endif
+
static void
-cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
- VkShaderStageFlags dirty_stages)
+cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage, unsigned buffer_count)
{
const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
[MESA_SHADER_COMPUTE] = 0,
};
- VkShaderStageFlags flushed = 0;
+ assert(stage < ARRAY_SIZE(push_constant_opcodes));
+ assert(push_constant_opcodes[stage] > 0);
- anv_foreach_stage(stage, dirty_stages) {
- assert(stage < ARRAY_SIZE(push_constant_opcodes));
- assert(push_constant_opcodes[stage] > 0);
-
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
- c._3DCommandSubOpcode = push_constant_opcodes[stage];
-
- if (anv_pipeline_has_stage(pipeline, stage)) {
- const struct anv_pipeline_bind_map *bind_map =
- &pipeline->shaders[stage]->bind_map;
-
- for (unsigned i = 0; i < 4; i++) {
- const struct anv_push_range *range = &bind_map->push_ranges[i];
- if (range->length == 0)
- continue;
-
- struct anv_address addr;
- switch (range->set) {
- case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
- /* This is a descriptor set buffer so the set index is
- * actually given by binding->binding. (Yes, that's
- * confusing.)
- */
- struct anv_descriptor_set *set =
- gfx_state->base.descriptors[range->index];
- addr = anv_descriptor_set_address(cmd_buffer, set);
- break;
- }
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
+ c._3DCommandSubOpcode = push_constant_opcodes[stage];
- case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
- struct anv_state state =
- anv_cmd_buffer_push_constants(cmd_buffer, stage);
- addr = (struct anv_address) {
- .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
- .offset = state.offset,
- };
- break;
- }
+ if (anv_pipeline_has_stage(pipeline, stage)) {
+ const struct anv_pipeline_bind_map *bind_map =
+ &pipeline->shaders[stage]->bind_map;
- default: {
- assert(range->set < MAX_SETS);
- struct anv_descriptor_set *set =
- gfx_state->base.descriptors[range->set];
- const struct anv_descriptor *desc =
- &set->descriptors[range->index];
-
- if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
- addr = desc->buffer_view->address;
- } else {
- assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
- struct anv_push_constants *push =
- &cmd_buffer->state.push_constants[stage];
- uint32_t dynamic_offset =
- push->dynamic_offsets[range->dynamic_offset_index];
- addr = anv_address_add(desc->buffer->address,
- desc->offset + dynamic_offset);
- }
- }
- }
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ /* The Skylake PRM contains the following restriction:
+ *
+ * "The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the buffers in the highest slots.
+ * This way, slot 0 is only used if slot 3 is also used.
+ */
+ assert(buffer_count <= 4);
+ const unsigned shift = 4 - buffer_count;
+ for (unsigned i = 0; i < buffer_count; i++) {
+ const struct anv_push_range *range = &bind_map->push_ranges[i];
- c.ConstantBody.ReadLength[i] = range->length;
- c.ConstantBody.Buffer[i] =
- anv_address_add(addr, range->start * 32);
- }
+ /* At this point we only have non-empty ranges */
+ assert(range->length > 0);
+
+ /* For Ivy Bridge, make sure we only set the first range (actual
+ * push constants)
+ */
+ assert((GEN_GEN >= 8 || GEN_IS_HASWELL) || i == 0);
+
+ const struct anv_address addr =
+ get_push_range_address(cmd_buffer, stage, range);
+ c.ConstantBody.ReadLength[i + shift] = range->length;
+ c.ConstantBody.Buffer[i + shift] =
+ anv_address_add(addr, range->start * 32);
+ }
+#else
+ /* For Ivy Bridge, push constants are relative to dynamic state
+ * base address and we only ever push actual push constants.
+ */
+ if (bind_map->push_ranges[0].length > 0) {
+ assert(bind_map->push_ranges[0].set ==
+ ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
+ struct anv_state state =
+ anv_cmd_buffer_push_constants(cmd_buffer, stage);
+ c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
+ c.ConstantBody.Buffer[0].bo = NULL;
+ c.ConstantBody.Buffer[0].offset = state.offset;
}
+ assert(bind_map->push_ranges[1].length == 0);
+ assert(bind_map->push_ranges[2].length == 0);
+ assert(bind_map->push_ranges[3].length == 0);
+#endif
}
+ }
+}
+#if GEN_GEN >= 12
+static void
+cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t shader_mask, uint32_t count)
+{
+ if (count == 0) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
+ c.ShaderUpdateEnable = shader_mask;
+ }
+ return;
+ }
+
+ const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+ const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+
+ static const uint32_t push_constant_opcodes[] = {
+ [MESA_SHADER_VERTEX] = 21,
+ [MESA_SHADER_TESS_CTRL] = 25, /* HS */
+ [MESA_SHADER_TESS_EVAL] = 26, /* DS */
+ [MESA_SHADER_GEOMETRY] = 22,
+ [MESA_SHADER_FRAGMENT] = 23,
+ [MESA_SHADER_COMPUTE] = 0,
+ };
+
+ gl_shader_stage stage = vk_to_mesa_shader_stage(shader_mask);
+ assert(stage < ARRAY_SIZE(push_constant_opcodes));
+ assert(push_constant_opcodes[stage] > 0);
+
+ const struct anv_pipeline_bind_map *bind_map =
+ &pipeline->shaders[stage]->bind_map;
+
+ uint32_t *dw;
+ const uint32_t buffers = (1 << count) - 1;
+ const uint32_t num_dwords = 2 + 2 * count;
+
+ dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
+ GENX(3DSTATE_CONSTANT_ALL),
+ .ShaderUpdateEnable = shader_mask,
+ .PointerBufferMask = buffers);
+
+ for (int i = 0; i < count; i++) {
+ const struct anv_push_range *range = &bind_map->push_ranges[i];
+ const struct anv_address addr =
+ get_push_range_address(cmd_buffer, stage, range);
+
+ GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
+ &cmd_buffer->batch, dw + 2 + i * 2,
+ &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
+ .PointerToConstantBuffer = anv_address_add(addr, range->start * 32),
+ .ConstantBufferReadLength = range->length,
+ });
+ }
+}
+#endif
+
+static void
+cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
+ VkShaderStageFlags dirty_stages)
+{
+ VkShaderStageFlags flushed = 0;
+ const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+ const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+
+#if GEN_GEN >= 12
+ uint32_t nobuffer_stages = 0;
+#endif
+
+ anv_foreach_stage(stage, dirty_stages) {
+ unsigned buffer_count = 0;
flushed |= mesa_to_vk_shader_stage(stage);
+ uint32_t max_push_range = 0;
+
+ if (anv_pipeline_has_stage(pipeline, stage)) {
+ const struct anv_pipeline_bind_map *bind_map =
+ &pipeline->shaders[stage]->bind_map;
+
+ for (unsigned i = 0; i < 4; i++) {
+ const struct anv_push_range *range = &bind_map->push_ranges[i];
+ if (range->length > 0) {
+ buffer_count++;
+ if (GEN_GEN >= 12 && range->length > max_push_range)
+ max_push_range = range->length;
+ }
+ }
+ }
+
+#if GEN_GEN >= 12
+ /* If this stage doesn't have any push constants, emit it later in a
+ * single CONSTANT_ALL packet.
+ */
+ if (buffer_count == 0) {
+ nobuffer_stages |= 1 << stage;
+ continue;
+ }
+
+ /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL
+ * contains only 5 bits, so we can only use it for buffers smaller than
+ * 32.
+ */
+ if (max_push_range < 32) {
+ cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage,
+ buffer_count);
+ continue;
+ }
+#endif
+
+ cmd_buffer_emit_push_constant(cmd_buffer, stage, buffer_count);
}
+#if GEN_GEN >= 12
+ if (nobuffer_stages)
+ cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, 0);
+#endif
+
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
#endif
};
+#if GEN_GEN >= 8 && GEN_GEN <= 9
+ genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer, vb,
+ state.BufferStartingAddress,
+ state.BufferSize);
+#endif
+
GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state);
i++;
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
- /* The exact descriptor layout is pulled from the pipeline, so we need
- * to re-emit binding tables on every pipeline change.
- */
- cmd_buffer->state.descriptors_dirty |= pipeline->active_stages;
-
/* If the pipeline changed, we may need to re-allocate push constant
* space in the URB.
*/
*/
uint32_t dirty = 0;
if (cmd_buffer->state.descriptors_dirty)
- dirty = flush_descriptor_sets(cmd_buffer);
+ dirty = flush_descriptor_sets(cmd_buffer, pipeline);
if (dirty || cmd_buffer->state.push_constants_dirty) {
/* Because we're pushing UBOs, we have to push whenever either
gen7_cmd_buffer_emit_scissor(cmd_buffer);
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
-
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
static void
.VertexBufferIndex = index,
.AddressModifyEnable = true,
.BufferPitch = 0,
- .MOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
+ .MOCS = addr.bo ? anv_mocs_for_bo(cmd_buffer->device, addr.bo) : 0,
+ .NullVertexBuffer = size == 0,
#if (GEN_GEN >= 8)
.BufferStartingAddress = addr,
.BufferSize = size
.EndAddress = anv_address_add(addr, size),
#endif
});
+
+ genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer,
+ index, addr, size);
}
static void
emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr)
{
- emit_vertex_bo(cmd_buffer, addr, 8, ANV_SVGS_VB_INDEX);
+ emit_vertex_bo(cmd_buffer, addr, addr.bo ? 8 : 0, ANV_SVGS_VB_INDEX);
}
static void
emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer,
uint32_t base_vertex, uint32_t base_instance)
{
- struct anv_state id_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
+ if (base_vertex == 0 && base_instance == 0) {
+ emit_base_vertex_instance_bo(cmd_buffer, ANV_NULL_ADDRESS);
+ } else {
+ struct anv_state id_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
- ((uint32_t *)id_state.map)[0] = base_vertex;
- ((uint32_t *)id_state.map)[1] = base_instance;
+ ((uint32_t *)id_state.map)[0] = base_vertex;
+ ((uint32_t *)id_state.map)[1] = base_instance;
- struct anv_address addr = {
- .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
- .offset = id_state.offset,
- };
+ struct anv_address addr = {
+ .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+ .offset = id_state.offset,
+ };
- emit_base_vertex_instance_bo(cmd_buffer, addr);
+ emit_base_vertex_instance_bo(cmd_buffer, addr);
+ }
}
static void
emit_vertex_bo(cmd_buffer, addr, 4, ANV_DRAWID_VB_INDEX);
}
+static void
+update_dirty_vbs_for_gen8_vb_flush(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t access_type)
+{
+ struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+ uint64_t vb_used = pipeline->vb_used;
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ vb_used |= 1ull << ANV_SVGS_VB_INDEX;
+ if (vs_prog_data->uses_drawid)
+ vb_used |= 1ull << ANV_DRAWID_VB_INDEX;
+
+ genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(cmd_buffer,
+ access_type == RANDOM,
+ vb_used);
+}
+
void genX(CmdDraw)(
VkCommandBuffer commandBuffer,
uint32_t vertexCount,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = 0;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
}
void genX(CmdDrawIndexed)(
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = vertexOffset;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
}
/* Auto-Draw / Indirect Registers */
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. We need to multiply instanceCount by the view count.
*/
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = pipeline->topology;
}
+
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
}
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, false);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
+
offset += stride;
}
}
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
+
offset += stride;
}
}
}
#endif
-void genX(CmdDrawIndirectCountKHR)(
+void genX(CmdDrawIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, false);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
+
offset += stride;
}
}
-void genX(CmdDrawIndexedIndirectCountKHR)(
+void genX(CmdDrawIndexedIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, i);
+ /* Emitting draw index or vertex index BOs may result in needing
+ * additional VF cache flushes.
+ */
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
load_indirect_parameters(cmd_buffer, draw, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PrimitiveTopologyType = pipeline->topology;
}
+ update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
+
offset += stride;
}
}
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
-static VkResult
-flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
- struct anv_state surfaces = { 0, }, samplers = { 0, };
- VkResult result;
-
- result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS) {
- assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
-
- result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
- if (result != VK_SUCCESS)
- return result;
-
- /* Re-emit state base addresses so we get the new surface state base
- * address before we start emitting binding tables etc.
- */
- genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
-
- result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS) {
- anv_batch_set_error(&cmd_buffer->batch, result);
- return result;
- }
- }
-
- result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS) {
- anv_batch_set_error(&cmd_buffer->batch, result);
- return result;
- }
-
- uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
- struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
- .BindingTablePointer = surfaces.offset,
- .SamplerStatePointer = samplers.offset,
- };
- GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, iface_desc_data_dw, &desc);
-
- struct anv_state state =
- anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
- pipeline->interface_descriptor_data,
- GENX(INTERFACE_DESCRIPTOR_DATA_length),
- 64);
-
- uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
- anv_batch_emit(&cmd_buffer->batch,
- GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {
- mid.InterfaceDescriptorTotalLength = size;
- mid.InterfaceDescriptorDataStartAddress = state.offset;
- }
-
- return VK_SUCCESS;
-}
-
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
- VkResult result;
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+ /* The workgroup size of the pipeline affects our push constant layout
+ * so flag push constants as dirty if we change the pipeline.
+ */
+ cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
- /* FIXME: figure out descriptors for gen7 */
- result = flush_compute_descriptor_set(cmd_buffer);
- if (result != VK_SUCCESS)
- return;
+ flush_descriptor_sets(cmd_buffer, pipeline);
+
+ uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
+ struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
+ .BindingTablePointer =
+ cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
+ .SamplerStatePointer =
+ cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
+ };
+ GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, iface_desc_data_dw, &desc);
- cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
+ struct anv_state state =
+ anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
+ pipeline->interface_descriptor_data,
+ GENX(INTERFACE_DESCRIPTOR_DATA_length),
+ 64);
+
+ uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {
+ mid.InterfaceDescriptorTotalLength = size;
+ mid.InterfaceDescriptorDataStartAddress = state.offset;
+ }
}
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = state.offset,
};
+
+ /* The num_workgroups buffer goes in the binding table */
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
return;
#endif
- if (prog_data->uses_num_work_groups)
+ if (prog_data->uses_num_work_groups) {
cmd_buffer->state.compute.num_workgroups = addr;
+ /* The num_workgroups buffer goes in the binding table */
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
struct gen_mi_builder b;
vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2;
}
+
+ /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is
+ * invalid. Set the compute pipeline to dirty to force a re-emit of the
+ * pipeline in case we get back-to-back dispatch calls with the same
+ * pipeline and a PIPELINE_SELECT in between.
+ */
+ cmd_buffer->state.compute.pipeline_dirty = true;
}
#endif
pc.CommandStreamerStallEnable = true;
#if GEN_GEN >= 12
pc.TileCacheFlushEnable = true;
+
+ /* GEN:BUG:1409600907: "PIPE_CONTROL with Depth Stall Enable bit must be
+ * set with any PIPE_CONTROL with Depth Flush Enable bit set.
+ */
+ pc.DepthStallEnable = true;
#endif
}
}
}
+/* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
+ *
+ * "The VF cache needs to be invalidated before binding and then using
+ * Vertex Buffers that overlap with any previously bound Vertex Buffer
+ * (at a 64B granularity) since the last invalidation. A VF cache
+ * invalidate is performed by setting the "VF Cache Invalidation Enable"
+ * bit in PIPE_CONTROL."
+ *
+ * This is implemented by carefully tracking all vertex and index buffer
+ * bindings and flushing if the cache ever ends up with a range in the cache
+ * that would exceed 4 GiB. This is implemented in three parts:
+ *
+ * 1. genX(cmd_buffer_set_binding_for_gen8_vb_flush)() which must be called
+ * every time a 3DSTATE_VERTEX_BUFFER packet is emitted and informs the
+ * tracking code of the new binding. If this new binding would cause
+ * the cache to have a too-large range on the next draw call, a pipeline
+ * stall and VF cache invalidate are added to pending_pipeline_bits.
+ *
+ * 2. genX(cmd_buffer_apply_pipe_flushes)() resets the cache tracking to
+ * empty whenever we emit a VF invalidate.
+ *
+ * 3. genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)() must be called
+ * after every 3DPRIMITIVE and copies the bound range into the dirty
+ * range for each used buffer. This has to be a separate step because
+ * we don't always re-bind all buffers and so 1. can't know which
+ * buffers are actually bound.
+ */
+void
+genX(cmd_buffer_set_binding_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+ int vb_index,
+ struct anv_address vb_address,
+ uint32_t vb_size)
+{
+ if (GEN_GEN < 8 || GEN_GEN > 9 ||
+ !cmd_buffer->device->instance->physicalDevice.use_softpin)
+ return;
+
+ struct anv_vb_cache_range *bound, *dirty;
+ if (vb_index == -1) {
+ bound = &cmd_buffer->state.gfx.ib_bound_range;
+ dirty = &cmd_buffer->state.gfx.ib_dirty_range;
+ } else {
+ assert(vb_index >= 0);
+ assert(vb_index < ARRAY_SIZE(cmd_buffer->state.gfx.vb_bound_ranges));
+ assert(vb_index < ARRAY_SIZE(cmd_buffer->state.gfx.vb_dirty_ranges));
+ bound = &cmd_buffer->state.gfx.vb_bound_ranges[vb_index];
+ dirty = &cmd_buffer->state.gfx.vb_dirty_ranges[vb_index];
+ }
+
+ if (vb_size == 0) {
+ bound->start = 0;
+ bound->end = 0;
+ return;
+ }
+
+ assert(vb_address.bo && (vb_address.bo->flags & EXEC_OBJECT_PINNED));
+ bound->start = gen_48b_address(anv_address_physical(vb_address));
+ bound->end = bound->start + vb_size;
+ assert(bound->end > bound->start); /* No overflow */
+
+ /* Align everything to a cache line */
+ bound->start &= ~(64ull - 1ull);
+ bound->end = align_u64(bound->end, 64);
+
+ /* Compute the dirty range */
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+
+ /* If our range is larger than 32 bits, we have to flush */
+ assert(bound->end - bound->start <= (1ull << 32));
+ if (dirty->end - dirty->start > (1ull << 32)) {
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ }
+}
+
+void
+genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t access_type,
+ uint64_t vb_used)
+{
+ if (GEN_GEN < 8 || GEN_GEN > 9 ||
+ !cmd_buffer->device->instance->physicalDevice.use_softpin)
+ return;
+
+ if (access_type == RANDOM) {
+ /* We have an index buffer */
+ struct anv_vb_cache_range *bound = &cmd_buffer->state.gfx.ib_bound_range;
+ struct anv_vb_cache_range *dirty = &cmd_buffer->state.gfx.ib_dirty_range;
+
+ if (bound->end > bound->start) {
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+ }
+ }
+
+ uint64_t mask = vb_used;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ assert(i >= 0);
+ assert(i < ARRAY_SIZE(cmd_buffer->state.gfx.vb_bound_ranges));
+ assert(i < ARRAY_SIZE(cmd_buffer->state.gfx.vb_dirty_ranges));
+
+ struct anv_vb_cache_range *bound, *dirty;
+ bound = &cmd_buffer->state.gfx.vb_bound_ranges[i];
+ dirty = &cmd_buffer->state.gfx.vb_dirty_ranges[i];
+
+ if (bound->end > bound->start) {
+ dirty->start = MIN2(dirty->start, bound->start);
+ dirty->end = MAX2(dirty->end, bound->end);
+ }
+ }
+}
+
/**
* Update the pixel hashing modes that determine the balancing of PS threads
* across subslices and slices.
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
+ if (GEN_GEN >= 12) {
+ /* GEN:BUG:1408224581
+ *
+ * Workaround: Gen12LP Astep only An additional pipe control with
+ * post-sync = store dword operation would be required.( w/a is to
+ * have an additional pipe control after the stencil state whenever
+ * the surface state bits of this state is changing).
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.PostSyncOperation = WriteImmediateData;
+ pc.Address =
+ (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+ }
+ }
cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
}
cmd_buffer_begin_subpass(cmd_buffer, 0);
}
-void genX(CmdBeginRenderPass2KHR)(
+void genX(CmdBeginRenderPass2)(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBeginInfo,
const VkSubpassBeginInfoKHR* pSubpassBeginInfo)
cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
-void genX(CmdNextSubpass2KHR)(
+void genX(CmdNextSubpass2)(
VkCommandBuffer commandBuffer,
const VkSubpassBeginInfoKHR* pSubpassBeginInfo,
const VkSubpassEndInfoKHR* pSubpassEndInfo)
cmd_buffer->state.subpass = NULL;
}
-void genX(CmdEndRenderPass2KHR)(
+void genX(CmdEndRenderPass2)(
VkCommandBuffer commandBuffer,
const VkSubpassEndInfoKHR* pSubpassEndInfo)
{