{
enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
+ if (cmd_buffer->device->instance->physicalDevice.always_flush_cache)
+ bits |= ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
+
/* Flushes are pipelined while invalidations are handled immediately.
* Therefore, if we're flushing anything then we need to schedule a stall
* before any invalidations can happen.
return VK_SUCCESS;
}
- struct anv_shader_bin *bin = pipeline->shaders[stage];
- struct anv_pipeline_bind_map *map = &bin->bind_map;
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->surface_count == 0) {
*bt_state = (struct anv_state) { 0, };
return VK_SUCCESS;
struct anv_state surface_state;
- if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
+ switch (binding->set) {
+ case ANV_DESCRIPTOR_SET_NULL:
+ bt_map[s] = 0;
+ break;
+
+ case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
/* Color attachment binding */
assert(stage == MESA_SHADER_FRAGMENT);
if (binding->index < subpass->color_count) {
}
bt_map[s] = surface_state.offset + state_offset;
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
+ break;
+
+ case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: {
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
bt_map[s] = surface_state.offset + state_offset;
add_surface_reloc(cmd_buffer, surface_state, constant_data);
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS) {
+ break;
+ }
+
+ case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
/* This is always the first binding for compute shaders */
assert(stage == MESA_SHADER_COMPUTE && s == 0);
- if (!get_cs_prog_data(pipeline)->uses_num_work_groups)
- continue;
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
add_surface_reloc(cmd_buffer, surface_state,
cmd_buffer->state.compute.num_workgroups);
}
- continue;
- } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
+ break;
+ }
+
+ case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
/* This is a descriptor set buffer so the set index is actually
* given by binding->binding. (Yes, that's confusing.)
*/
bt_map[s] = set->desc_surface_state.offset + state_offset;
add_surface_reloc(cmd_buffer, set->desc_surface_state,
anv_descriptor_set_address(cmd_buffer, set));
- continue;
+ break;
}
- const struct anv_descriptor *desc =
- &pipe_state->descriptors[binding->set]->descriptors[binding->index];
+ default: {
+ assert(binding->set < MAX_SETS);
+ const struct anv_descriptor *desc =
+ &pipe_state->descriptors[binding->set]->descriptors[binding->index];
- switch (desc->type) {
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- /* Nothing for us to do here */
- continue;
+ switch (desc->type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ /* Nothing for us to do here */
+ continue;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
- struct anv_surface_state sstate =
- (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
- desc->image_view->planes[binding->plane].general_sampler_surface_state :
- desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
- break;
- }
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- assert(stage == MESA_SHADER_FRAGMENT);
- if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
- /* For depth and stencil input attachments, we treat it like any
- * old texture that a user may have bound.
- */
- assert(desc->image_view->n_planes == 1);
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
struct anv_surface_state sstate =
(desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
- desc->image_view->planes[0].general_sampler_surface_state :
- desc->image_view->planes[0].optimal_sampler_surface_state;
+ desc->image_view->planes[binding->plane].general_sampler_surface_state :
+ desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
surface_state = sstate.state;
assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate);
- } else {
- /* For color input attachments, we create the surface state at
- * vkBeginRenderPass time so that we can include aux and clear
- * color information.
- */
- assert(binding->input_attachment_index < subpass->input_count);
- const unsigned subpass_att = binding->input_attachment_index;
- const unsigned att = subpass->input_attachments[subpass_att].attachment;
- surface_state = cmd_buffer->state.attachments[att].input.state;
+ break;
}
- break;
-
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
- struct anv_surface_state sstate = (binding->write_only)
- ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
- : desc->image_view->planes[binding->plane].storage_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
- break;
- }
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ assert(stage == MESA_SHADER_FRAGMENT);
+ if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
+ /* For depth and stencil input attachments, we treat it like any
+ * old texture that a user may have bound.
+ */
+ assert(desc->image_view->n_planes == 1);
+ struct anv_surface_state sstate =
+ (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
+ desc->image_view->planes[0].general_sampler_surface_state :
+ desc->image_view->planes[0].optimal_sampler_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ } else {
+ /* For color input attachments, we create the surface state at
+ * vkBeginRenderPass time so that we can include aux and clear
+ * color information.
+ */
+ assert(binding->input_attachment_index < subpass->input_count);
+ const unsigned subpass_att = binding->input_attachment_index;
+ const unsigned att = subpass->input_attachments[subpass_att].attachment;
+ surface_state = cmd_buffer->state.attachments[att].input.state;
+ }
+ break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- surface_state = desc->buffer_view->surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
+ struct anv_surface_state sstate = (binding->write_only)
+ ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
+ : desc->image_view->planes[binding->plane].storage_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ break;
}
- break;
-
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- /* If the shader never does any UBO pulls (this is a fairly common
- * case) then we don't need to fill out those binding table entries.
- * The real cost savings here is that we don't have to build the
- * surface state for them which is surprisingly expensive when it's
- * on the hot-path.
- */
- if (!bin->prog_data->has_ubo_pull)
- continue;
- /* Fall through */
-
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- /* Compute the offset within the buffer */
- struct anv_push_constants *push =
- &cmd_buffer->state.push_constants[stage];
-
- uint32_t dynamic_offset =
- push->dynamic_offsets[binding->dynamic_offset_index];
- uint64_t offset = desc->offset + dynamic_offset;
- /* Clamp to the buffer size */
- offset = MIN2(offset, desc->buffer->size);
- /* Clamp the range to the buffer size */
- uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
-
- struct anv_address address =
- anv_address_add(desc->buffer->address, offset);
- surface_state =
- anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
- enum isl_format format =
- anv_isl_format_for_descriptor_type(desc->type);
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ surface_state = desc->buffer_view->surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ /* Compute the offset within the buffer */
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[binding->dynamic_offset_index];
+ uint64_t offset = desc->offset + dynamic_offset;
+ /* Clamp to the buffer size */
+ offset = MIN2(offset, desc->buffer->size);
+ /* Clamp the range to the buffer size */
+ uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
+
+ struct anv_address address =
+ anv_address_add(desc->buffer->address, offset);
+
+ surface_state =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+ enum isl_format format =
+ anv_isl_format_for_descriptor_type(desc->type);
+
+ anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+ format, address, range, 1);
+ if (need_client_mem_relocs)
+ add_surface_reloc(cmd_buffer, surface_state, address);
+ break;
+ }
- anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
- format, address, range, 1);
- if (need_client_mem_relocs)
- add_surface_reloc(cmd_buffer, surface_state, address);
- break;
- }
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ surface_state = (binding->write_only)
+ ? desc->buffer_view->writeonly_storage_surface_state
+ : desc->buffer_view->storage_surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ break;
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- surface_state = (binding->write_only)
- ? desc->buffer_view->writeonly_storage_surface_state
- : desc->buffer_view->storage_surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ default:
+ assert(!"Invalid descriptor type");
+ continue;
}
+ bt_map[s] = surface_state.offset + state_offset;
break;
-
- default:
- assert(!"Invalid descriptor type");
- continue;
}
-
- bt_map[s] = surface_state.offset + state_offset;
+ }
}
return VK_SUCCESS;
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ unsigned buffer_count = 0;
for (unsigned i = 0; i < 4; i++) {
const struct anv_push_range *range = &bind_map->push_ranges[i];
- if (range->length == 0)
- continue;
+ if (range->length > 0)
+ buffer_count++;
+ }
+
+ /* The Skylake PRM contains the following restriction:
+ *
+ * "The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the buffers in the highest slots.
+ * This way, slot 0 is only used if slot 3 is also used.
+ */
+ assert(buffer_count <= 4);
+ const unsigned shift = 4 - buffer_count;
+ for (unsigned i = 0; i < buffer_count; i++) {
+ const struct anv_push_range *range = &bind_map->push_ranges[i];
+
+ /* At this point we only have non-empty ranges */
+ assert(range->length > 0);
struct anv_address addr;
switch (range->set) {
}
}
- c.ConstantBody.ReadLength[i] = range->length;
- c.ConstantBody.Buffer[i] =
+ c.ConstantBody.ReadLength[i + shift] = range->length;
+ c.ConstantBody.Buffer[i + shift] =
anv_address_add(addr, range->start * 32);
}
+#else
+ /* For Ivy Bridge, push constants are relative to dynamic state
+ * base address and we only ever push actual push constants.
+ */
+ if (bind_map->push_ranges[0].length > 0) {
+ assert(bind_map->push_ranges[0].set ==
+ ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
+ struct anv_state state =
+ anv_cmd_buffer_push_constants(cmd_buffer, stage);
+ c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
+ c.ConstantBody.Buffer[0].offset = state.offset;
+ }
+ assert(bind_map->push_ranges[1].length == 0);
+ assert(bind_map->push_ranges[2].length == 0);
+ assert(bind_map->push_ranges[3].length == 0);
+#endif
}
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
- /* The exact descriptor layout is pulled from the pipeline, so we need
- * to re-emit binding tables on every pipeline change.
- */
- cmd_buffer->state.descriptors_dirty |= pipeline->active_stages;
-
/* If the pipeline changed, we may need to re-allocate push constant
* space in the URB.
*/
.VertexBufferIndex = index,
.AddressModifyEnable = true,
.BufferPitch = 0,
- .MOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
+ .MOCS = addr.bo ? anv_mocs_for_bo(cmd_buffer->device, addr.bo) : 0,
+ .NullVertexBuffer = size == 0,
#if (GEN_GEN >= 8)
.BufferStartingAddress = addr,
.BufferSize = size
emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr)
{
- emit_vertex_bo(cmd_buffer, addr, 8, ANV_SVGS_VB_INDEX);
+ emit_vertex_bo(cmd_buffer, addr, addr.bo ? 8 : 0, ANV_SVGS_VB_INDEX);
}
static void
emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer,
uint32_t base_vertex, uint32_t base_instance)
{
- struct anv_state id_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
+ if (base_vertex == 0 && base_instance == 0) {
+ emit_base_vertex_instance_bo(cmd_buffer, ANV_NULL_ADDRESS);
+ } else {
+ struct anv_state id_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
- ((uint32_t *)id_state.map)[0] = base_vertex;
- ((uint32_t *)id_state.map)[1] = base_instance;
+ ((uint32_t *)id_state.map)[0] = base_vertex;
+ ((uint32_t *)id_state.map)[1] = base_instance;
- struct anv_address addr = {
- .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
- .offset = id_state.offset,
- };
+ struct anv_address addr = {
+ .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+ .offset = id_state.offset,
+ };
- emit_base_vertex_instance_bo(cmd_buffer, addr);
+ emit_base_vertex_instance_bo(cmd_buffer, addr);
+ }
}
static void
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+ /* The workgroup size of the pipeline affects our push constant layout
+ * so flag push constants as dirty if we change the pipeline.
+ */
+ cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
+ if (GEN_GEN >= 12) {
+ /* GEN:BUG:1408224581
+ *
+ * Workaround: Gen12LP Astep only An additional pipe control with
+ * post-sync = store dword operation would be required.( w/a is to
+ * have an additional pipe control after the stencil state whenever
+ * the surface state bits of this state is changing).
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.PostSyncOperation = WriteImmediateData;
+ pc.Address =
+ (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+ }
+ }
cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
}