#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
-/* We reserve GPR 14 and 15 for conditional rendering */
+/* We reserve :
+ * - GPR 14 for secondary command buffer returns
+ * - GPR 15 for conditional rendering
+ */
#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 14
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
return memcmp(surf_pack, view_pack, sizeof(surf_pack)) != 0;
}
-static void
-color_attachment_compute_aux_usage(struct anv_device * device,
- struct anv_cmd_state * cmd_state,
- uint32_t att, VkRect2D render_area)
-{
- struct anv_attachment_state *att_state = &cmd_state->attachments[att];
- struct anv_image_view *iview = cmd_state->attachments[att].image_view;
-
- assert(iview->n_planes == 1);
-
- if (iview->planes[0].isl.base_array_layer >=
- anv_image_aux_layers(iview->image, VK_IMAGE_ASPECT_COLOR_BIT,
- iview->planes[0].isl.base_level)) {
- /* There is no aux buffer which corresponds to the level and layer(s)
- * being accessed.
- */
- att_state->aux_usage = ISL_AUX_USAGE_NONE;
- att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
- return;
- }
-
- att_state->aux_usage =
- anv_layout_to_aux_usage(&device->info, iview->image,
- VK_IMAGE_ASPECT_COLOR_BIT,
- VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
- VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
-
- /* If we don't have aux, then we should have returned early in the layer
- * check above. If we got here, we must have something.
- */
- assert(att_state->aux_usage != ISL_AUX_USAGE_NONE);
-
- if (att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
- att_state->aux_usage == ISL_AUX_USAGE_MCS) {
- att_state->input_aux_usage = att_state->aux_usage;
- } else {
- /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
- *
- * "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
- * setting is only allowed if Surface Format supported for Fast
- * Clear. In addition, if the surface is bound to the sampling
- * engine, Surface Format must be supported for Render Target
- * Compression for surfaces bound to the sampling engine."
- *
- * In other words, we can only sample from a fast-cleared image if it
- * also supports color compression.
- */
- if (isl_format_supports_ccs_e(&device->info, iview->planes[0].isl.format) &&
- isl_format_supports_ccs_d(&device->info, iview->planes[0].isl.format)) {
- att_state->input_aux_usage = ISL_AUX_USAGE_CCS_D;
-
- /* While fast-clear resolves and partial resolves are fairly cheap in the
- * case where you render to most of the pixels, full resolves are not
- * because they potentially involve reading and writing the entire
- * framebuffer. If we can't texture with CCS_E, we should leave it off and
- * limit ourselves to fast clears.
- */
- if (cmd_state->pass->attachments[att].first_subpass_layout ==
- VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
- anv_perf_warn(device, iview->image,
- "Not temporarily enabling CCS_E.");
- }
- } else {
- att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
- }
- }
-
- assert(iview->image->planes[0].aux_surface.isl.usage &
- (ISL_SURF_USAGE_CCS_BIT | ISL_SURF_USAGE_MCS_BIT));
-
- union isl_color_value clear_color = {};
- anv_clear_color_from_att_state(&clear_color, att_state, iview);
-
- att_state->clear_color_is_zero =
- isl_color_value_is_zero(clear_color, iview->planes[0].isl.format);
-}
-
static bool
anv_can_fast_clear_color_view(struct anv_device * device,
struct anv_image_view *iview,
const uint32_t num_layers = iview->planes[0].isl.array_len;
att_state->pending_clear_views = (1 << num_layers) - 1;
- if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
- anv_assert(iview->n_planes == 1);
- assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- color_attachment_compute_aux_usage(cmd_buffer->device,
- state, i, begin->renderArea);
-
- if (clear_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
- assert(clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- att_state->fast_clear =
- anv_can_fast_clear_color_view(cmd_buffer->device, iview,
- pass_att->first_subpass_layout,
- vk_to_isl_color(att_state->clear_value.color),
- framebuffer->layers,
- begin->renderArea);
- }
- } else {
- /* These will be initialized after the first subpass transition. */
- att_state->aux_usage = ISL_AUX_USAGE_NONE;
- att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ /* This will be initialized after the first subpass transition. */
+ att_state->aux_usage = ISL_AUX_USAGE_NONE;
+
+ att_state->fast_clear = false;
+ if (clear_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
+ assert(clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ att_state->fast_clear =
+ anv_can_fast_clear_color_view(cmd_buffer->device, iview,
+ pass_att->first_subpass_layout,
+ vk_to_isl_color(att_state->clear_value.color),
+ framebuffer->layers,
+ begin->renderArea);
+ } else if (clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT)) {
att_state->fast_clear =
anv_can_hiz_clear_ds_view(cmd_buffer->device, iview,
pass_att->first_subpass_layout,
* ensured that we have the table even if this command buffer doesn't
* initialize any images.
*/
- cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
+ if (cmd_buffer->device->info.has_aux_map)
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
/* We send an "Indirect State Pointers Disable" packet at
* EndCommandBuffer, so all push contant packets are ignored during a
}
anv_cmd_buffer_add_secondary(primary, secondary);
+
+ assert(secondary->perf_query_pool == NULL || primary->perf_query_pool == NULL ||
+ secondary->perf_query_pool == primary->perf_query_pool);
+ if (secondary->perf_query_pool)
+ primary->perf_query_pool = secondary->perf_query_pool;
}
/* The secondary isn't counted in our VF cache tracking so we need to
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
const struct gen_l3_config *cfg)
{
- assert(cfg);
+ assert(cfg || GEN_GEN >= 12);
if (cfg == cmd_buffer->state.current_l3_config)
return;
if (bits & ANV_PIPE_END_OF_PIPE_SYNC_BIT) {
pipe.CommandStreamerStallEnable = true;
pipe.PostSyncOperation = WriteImmediateData;
- pipe.Address = (struct anv_address) {
- .bo = cmd_buffer->device->workaround_bo,
- .offset = 0
- };
+ pipe.Address = cmd_buffer->device->workaround_address;
}
/*
*/
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
lrm.RegisterAddress = 0x243C; /* GEN7_3DPRIM_START_INSTANCE */
- lrm.MemoryAddress = (struct anv_address) {
- .bo = cmd_buffer->device->workaround_bo,
- .offset = 0
- };
+ lrm.MemoryAddress = cmd_buffer->device->workaround_address;
}
}
*/
if (GEN_GEN == 9 && pipe.VFCacheInvalidationEnable) {
pipe.PostSyncOperation = WriteImmediateData;
- pipe.Address =
- (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+ pipe.Address = cmd_buffer->device->workaround_address;
}
}
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
- struct anv_surface_state sstate =
- (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
- desc->image_view->planes[binding->plane].general_sampler_surface_state :
- desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
+ if (desc->image_view) {
+ struct anv_surface_state sstate =
+ (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
+ desc->image_view->planes[binding->plane].general_sampler_surface_state :
+ desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ } else {
+ surface_state = cmd_buffer->device->null_surface_state;
+ }
break;
}
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(desc->image_view != NULL);
if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
/* For depth and stencil input attachments, we treat it like any
* old texture that a user may have bound.
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
- struct anv_surface_state sstate = (binding->write_only)
- ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
- : desc->image_view->planes[binding->plane].storage_surface_state;
- surface_state = sstate.state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs)
- add_surface_state_relocs(cmd_buffer, sstate);
+ if (desc->image_view) {
+ struct anv_surface_state sstate = (binding->write_only)
+ ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
+ : desc->image_view->planes[binding->plane].storage_surface_state;
+ surface_state = sstate.state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs)
+ add_surface_state_relocs(cmd_buffer, sstate);
+ } else {
+ surface_state = cmd_buffer->device->null_surface_state;
+ }
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- surface_state = desc->buffer_view->surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ if (desc->buffer_view) {
+ surface_state = desc->buffer_view->surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ } else {
+ surface_state = cmd_buffer->device->null_surface_state;
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- /* Compute the offset within the buffer */
- struct anv_push_constants *push =
- &cmd_buffer->state.push_constants[shader->stage];
-
- uint32_t dynamic_offset =
- push->dynamic_offsets[binding->dynamic_offset_index];
- uint64_t offset = desc->offset + dynamic_offset;
- /* Clamp to the buffer size */
- offset = MIN2(offset, desc->buffer->size);
- /* Clamp the range to the buffer size */
- uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
-
- /* Align the range for consistency */
- if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
- range = align_u32(range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
-
- struct anv_address address =
- anv_address_add(desc->buffer->address, offset);
-
- surface_state =
- anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
- enum isl_format format =
- anv_isl_format_for_descriptor_type(desc->type);
-
- anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
- format, address, range, 1);
- if (need_client_mem_relocs)
- add_surface_reloc(cmd_buffer, surface_state, address);
+ if (desc->buffer) {
+ /* Compute the offset within the buffer */
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[shader->stage];
+
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[binding->dynamic_offset_index];
+ uint64_t offset = desc->offset + dynamic_offset;
+ /* Clamp to the buffer size */
+ offset = MIN2(offset, desc->buffer->size);
+ /* Clamp the range to the buffer size */
+ uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
+
+ /* Align the range for consistency */
+ if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+ range = align_u32(range, ANV_UBO_ALIGNMENT);
+
+ struct anv_address address =
+ anv_address_add(desc->buffer->address, offset);
+
+ surface_state =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+ enum isl_format format =
+ anv_isl_format_for_descriptor_type(desc->type);
+
+ anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+ format, address, range, 1);
+ if (need_client_mem_relocs)
+ add_surface_reloc(cmd_buffer, surface_state, address);
+ } else {
+ surface_state = cmd_buffer->device->null_surface_state;
+ }
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- surface_state = (binding->write_only)
- ? desc->buffer_view->writeonly_storage_surface_state
- : desc->buffer_view->storage_surface_state;
- assert(surface_state.alloc_size);
- if (need_client_mem_relocs) {
- add_surface_reloc(cmd_buffer, surface_state,
- desc->buffer_view->address);
+ if (desc->buffer_view) {
+ surface_state = (binding->write_only)
+ ? desc->buffer_view->writeonly_storage_surface_state
+ : desc->buffer_view->storage_surface_state;
+ assert(surface_state.alloc_size);
+ if (need_client_mem_relocs) {
+ add_surface_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->address);
+ }
+ } else {
+ surface_state = cmd_buffer->device->null_surface_state;
}
break;
&set->descriptors[range->index];
if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
- return desc->buffer_view->address;
+ if (desc->buffer_view)
+ return desc->buffer_view->address;
} else {
assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
- struct anv_push_constants *push =
- &cmd_buffer->state.push_constants[stage];
- uint32_t dynamic_offset =
- push->dynamic_offsets[range->dynamic_offset_index];
- return anv_address_add(desc->buffer->address,
- desc->offset + dynamic_offset);
+ if (desc->buffer) {
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[range->dynamic_offset_index];
+ return anv_address_add(desc->buffer->address,
+ desc->offset + dynamic_offset);
+ }
}
+
+ /* For NULL UBOs, we just return an address in the workaround BO. We do
+ * writes to it for workarounds but always at the bottom. The higher
+ * bytes should be all zeros.
+ */
+ assert(range->length * 32 <= 2048);
+ return (struct anv_address) {
+ .bo = cmd_buffer->device->workaround_bo,
+ .offset = 1024,
+ };
}
}
}
&set->descriptors[range->index];
if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ if (!desc->buffer_view)
+ return 0;
+
+ if (range->start * 32 > desc->buffer_view->range)
+ return 0;
+
return desc->buffer_view->range;
} else {
+ if (!desc->buffer)
+ return 0;
+
assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
/* Compute the offset within the buffer */
struct anv_push_constants *push =
uint32_t bound_range = MIN2(desc->range, desc->buffer->size - offset);
/* Align the range for consistency */
- bound_range = align_u32(bound_range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+ bound_range = align_u32(bound_range, ANV_UBO_ALIGNMENT);
return bound_range;
}
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
-#if GEN_GEN >= 12
+#if GEN_GEN >= 9
+ /* This field exists since Gen8. However, the Broadwell PRM says:
+ *
+ * "Constant Buffer Object Control State must be always programmed
+ * to zero."
+ *
+ * This restriction does not exist on any newer platforms.
+ *
+ * We only have one MOCS field for the whole packet, not one per
+ * buffer. We could go out of our way here to walk over all of the
+ * buffers and see if any of them are used externally and use the
+ * external MOCS. However, the notion that someone would use the
+ * same bit of memory for both scanout and a UBO is nuts. Let's not
+ * bother and assume it's all internal.
+ */
c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
#endif
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
+static void
+cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
+{
+ const uint32_t clip_states =
+#if GEN_GEN <= 7
+ ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
+ ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
+#endif
+ ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+ ANV_CMD_DIRTY_PIPELINE;
+
+ if ((cmd_buffer->state.gfx.dirty & clip_states) == 0)
+ return;
+
+#if GEN_GEN <= 7
+ const struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
+#endif
+ struct GENX(3DSTATE_CLIP) clip = {
+ GENX(3DSTATE_CLIP_header),
+#if GEN_GEN <= 7
+ .FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
+ .CullMode = genX(vk_to_gen_cullmode)[d->cull_mode],
+#endif
+ };
+ uint32_t dwords[GENX(3DSTATE_CLIP_length)];
+
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ const struct brw_vue_prog_data *last =
+ anv_pipeline_get_last_vue_prog_data(pipeline);
+ if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
+ clip.MaximumVPIndex =
+ cmd_buffer->state.gfx.dynamic.viewport.count > 0 ?
+ cmd_buffer->state.gfx.dynamic.viewport.count - 1 : 0;
+ }
+
+ GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
+ anv_batch_emit_merge(&cmd_buffer->batch, dwords,
+ pipeline->gen7.clip);
+}
+
void
genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
- struct GENX(VERTEX_BUFFER_STATE) state = {
- .VertexBufferIndex = vb,
+ /* If dynamic, use stride/size from vertex binding, otherwise use
+ * stride/size that was setup in the pipeline object.
+ */
+ bool dynamic_stride = cmd_buffer->state.gfx.dynamic.dyn_vbo_stride;
+ bool dynamic_size = cmd_buffer->state.gfx.dynamic.dyn_vbo_size;
+
+ uint32_t stride = dynamic_stride ?
+ cmd_buffer->state.vertex_bindings[vb].stride : pipeline->vb[vb].stride;
+ uint32_t size = dynamic_size ?
+ cmd_buffer->state.vertex_bindings[vb].size : buffer->size;
+
+ struct GENX(VERTEX_BUFFER_STATE) state;
+ if (buffer) {
+ state = (struct GENX(VERTEX_BUFFER_STATE)) {
+ .VertexBufferIndex = vb,
- .MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo),
+ .MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo),
#if GEN_GEN <= 7
- .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
- .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
+ .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
+ .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
#endif
-
- .AddressModifyEnable = true,
- .BufferPitch = pipeline->vb[vb].stride,
- .BufferStartingAddress = anv_address_add(buffer->address, offset),
+ .AddressModifyEnable = true,
+ .BufferPitch = stride,
+ .BufferStartingAddress = anv_address_add(buffer->address, offset),
+ .NullVertexBuffer = offset >= buffer->size,
#if GEN_GEN >= 8
- .BufferSize = buffer->size - offset
+ .BufferSize = size - offset
#else
- .EndAddress = anv_address_add(buffer->address, buffer->size - 1),
+ .EndAddress = anv_address_add(buffer->address, size - 1),
#endif
- };
+ };
+ } else {
+ state = (struct GENX(VERTEX_BUFFER_STATE)) {
+ .VertexBufferIndex = vb,
+ .NullVertexBuffer = true,
+ };
+ }
#if GEN_GEN >= 8 && GEN_GEN <= 9
genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer, vb,
cmd_buffer_alloc_push_constants(cmd_buffer);
}
+ if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
+ cmd_buffer->state.gfx.primitive_topology = pipeline->topology;
+
#if GEN_GEN <= 7
if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT ||
cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DepthStallEnable = true;
pc.PostSyncOperation = WriteImmediateData;
- pc.Address =
- (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+ pc.Address = cmd_buffer->device->workaround_address;
}
}
#endif
if (dirty)
cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
+ cmd_buffer_emit_clip(cmd_buffer);
+
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
gen8_cmd_buffer_emit_viewport(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = SEQUENTIAL;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
prim.VertexCountPerInstance = vertexCount;
prim.StartVertexLocation = firstVertex;
prim.InstanceCount = instanceCount;
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = RANDOM;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
prim.VertexCountPerInstance = indexCount;
prim.StartVertexLocation = firstIndex;
prim.InstanceCount = instanceCount;
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.IndirectParameterEnable = true;
prim.VertexAccessType = SEQUENTIAL;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
prim.IndirectParameterEnable = true;
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = SEQUENTIAL;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
prim.IndirectParameterEnable = true;
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = RANDOM;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
}
}
-#define TMP_DRAW_COUNT_REG 0x2670 /* MI_ALU_REG14 */
-
-static void
+static struct gen_mi_value
prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+ struct gen_mi_builder *b,
struct anv_address count_address,
const bool conditional_render_enabled)
{
- struct gen_mi_builder b;
- gen_mi_builder_init(&b, &cmd_buffer->batch);
+ struct gen_mi_value ret = gen_mi_imm(0);
if (conditional_render_enabled) {
#if GEN_GEN >= 8 || GEN_IS_HASWELL
- gen_mi_store(&b, gen_mi_reg64(TMP_DRAW_COUNT_REG),
- gen_mi_mem32(count_address));
+ ret = gen_mi_new_gpr(b);
+ gen_mi_store(b, gen_mi_value_ref(b, ret), gen_mi_mem32(count_address));
#endif
} else {
/* Upload the current draw count from the draw parameters buffer to
* MI_PREDICATE_SRC0.
*/
- gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0),
- gen_mi_mem32(count_address));
+ gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC0),
+ gen_mi_mem32(count_address));
- gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_SRC1 + 4), gen_mi_imm(0));
+ gen_mi_store(b, gen_mi_reg32(MI_PREDICATE_SRC1 + 4), gen_mi_imm(0));
}
+
+ return ret;
}
static void
emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+ struct gen_mi_builder *b,
uint32_t draw_index)
{
- struct gen_mi_builder b;
- gen_mi_builder_init(&b, &cmd_buffer->batch);
-
/* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
- gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_SRC1), gen_mi_imm(draw_index));
+ gen_mi_store(b, gen_mi_reg32(MI_PREDICATE_SRC1), gen_mi_imm(draw_index));
if (draw_index == 0) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
static void
emit_draw_count_predicate_with_conditional_render(
struct anv_cmd_buffer *cmd_buffer,
- uint32_t draw_index)
+ struct gen_mi_builder *b,
+ uint32_t draw_index,
+ struct gen_mi_value max)
{
- struct gen_mi_builder b;
- gen_mi_builder_init(&b, &cmd_buffer->batch);
-
- struct gen_mi_value pred = gen_mi_ult(&b, gen_mi_imm(draw_index),
- gen_mi_reg64(TMP_DRAW_COUNT_REG));
- pred = gen_mi_iand(&b, pred, gen_mi_reg64(ANV_PREDICATE_RESULT_REG));
+ struct gen_mi_value pred = gen_mi_ult(b, gen_mi_imm(draw_index), max);
+ pred = gen_mi_iand(b, pred, gen_mi_reg64(ANV_PREDICATE_RESULT_REG));
#if GEN_GEN >= 8
- gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_RESULT), pred);
+ gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_RESULT), pred);
#else
/* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
* so we emit MI_PREDICATE to set it.
*/
- gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0), pred);
- gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
+ gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC0), pred);
+ gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
genX(cmd_buffer_flush_state)(cmd_buffer);
+ struct gen_mi_builder b;
+ gen_mi_builder_init(&b, &cmd_buffer->batch);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
-
- prepare_for_draw_count_predicate(cmd_buffer, count_address,
- cmd_state->conditional_render_enabled);
+ struct gen_mi_value max =
+ prepare_for_draw_count_predicate(cmd_buffer, &b, count_address,
+ cmd_state->conditional_render_enabled);
for (uint32_t i = 0; i < maxDrawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
#if GEN_GEN >= 8 || GEN_IS_HASWELL
if (cmd_state->conditional_render_enabled) {
- emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
+ emit_draw_count_predicate_with_conditional_render(
+ cmd_buffer, &b, i, gen_mi_value_ref(&b, max));
} else {
- emit_draw_count_predicate(cmd_buffer, i);
+ emit_draw_count_predicate(cmd_buffer, &b, i);
}
#else
- emit_draw_count_predicate(cmd_buffer, i);
+ emit_draw_count_predicate(cmd_buffer, &b, i);
#endif
if (vs_prog_data->uses_firstvertex ||
prim.IndirectParameterEnable = true;
prim.PredicateEnable = true;
prim.VertexAccessType = SEQUENTIAL;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, SEQUENTIAL);
offset += stride;
}
+
+ gen_mi_value_unref(&b, max);
}
void genX(CmdDrawIndexedIndirectCount)(
genX(cmd_buffer_flush_state)(cmd_buffer);
+ struct gen_mi_builder b;
+ gen_mi_builder_init(&b, &cmd_buffer->batch);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
-
- prepare_for_draw_count_predicate(cmd_buffer, count_address,
- cmd_state->conditional_render_enabled);
+ struct gen_mi_value max =
+ prepare_for_draw_count_predicate(cmd_buffer, &b, count_address,
+ cmd_state->conditional_render_enabled);
for (uint32_t i = 0; i < maxDrawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
#if GEN_GEN >= 8 || GEN_IS_HASWELL
if (cmd_state->conditional_render_enabled) {
- emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
+ emit_draw_count_predicate_with_conditional_render(
+ cmd_buffer, &b, i, gen_mi_value_ref(&b, max));
} else {
- emit_draw_count_predicate(cmd_buffer, i);
+ emit_draw_count_predicate(cmd_buffer, &b, i);
}
#else
- emit_draw_count_predicate(cmd_buffer, i);
+ emit_draw_count_predicate(cmd_buffer, &b, i);
#endif
/* TODO: We need to stomp base vertex to 0 somehow */
prim.IndirectParameterEnable = true;
prim.PredicateEnable = true;
prim.VertexAccessType = RANDOM;
- prim.PrimitiveTopologyType = pipeline->topology;
+ prim.PrimitiveTopologyType = cmd_buffer->state.gfx.primitive_topology;
}
update_dirty_vbs_for_gen8_vb_flush(cmd_buffer, RANDOM);
offset += stride;
}
+
+ gen_mi_value_unref(&b, max);
}
void genX(CmdBeginTransformFeedbackEXT)(
genX(CmdDispatchBase)(commandBuffer, 0, 0, 0, x, y, z);
}
+static inline void
+emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_compute_pipeline *pipeline, bool indirect,
+ const struct brw_cs_prog_data *prog_data,
+ uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ bool predicate = (GEN_GEN <= 7 && indirect) ||
+ cmd_buffer->state.conditional_render_enabled;
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
+ ggw.IndirectParameterEnable = indirect;
+ ggw.PredicateEnable = predicate;
+ ggw.SIMDSize = cs_params.simd_size / 16;
+ ggw.ThreadDepthCounterMaximum = 0;
+ ggw.ThreadHeightCounterMaximum = 0;
+ ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
+ ggw.ThreadGroupIDXDimension = groupCountX;
+ ggw.ThreadGroupIDYDimension = groupCountY;
+ ggw.ThreadGroupIDZDimension = groupCountZ;
+ ggw.RightExecutionMask = pipeline->cs_right_mask;
+ ggw.BottomExecutionMask = 0xffffffff;
+ }
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH), msf);
+}
+
void genX(CmdDispatchBase)(
VkCommandBuffer commandBuffer,
uint32_t baseGroupX,
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
- anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
- ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
- ggw.ThreadDepthCounterMaximum = 0;
- ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
- ggw.ThreadGroupIDXDimension = groupCountX;
- ggw.ThreadGroupIDYDimension = groupCountY;
- ggw.ThreadGroupIDZDimension = groupCountZ;
- ggw.RightExecutionMask = pipeline->cs_right_mask;
- ggw.BottomExecutionMask = 0xffffffff;
- }
-
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH), msf);
+ emit_gpgpu_walker(cmd_buffer, pipeline, false, prog_data, groupCountX,
+ groupCountY, groupCountZ);
}
#define GPGPU_DISPATCHDIMX 0x2500
struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
struct anv_address addr = anv_address_add(buffer->address, offset);
- struct anv_batch *batch = &cmd_buffer->batch;
+ UNUSED struct anv_batch *batch = &cmd_buffer->batch;
anv_cmd_buffer_push_base_group_id(cmd_buffer, 0, 0, 0);
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
#endif
- anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
- ggw.IndirectParameterEnable = true;
- ggw.PredicateEnable = GEN_GEN <= 7 ||
- cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
- ggw.ThreadDepthCounterMaximum = 0;
- ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
- ggw.RightExecutionMask = pipeline->cs_right_mask;
- ggw.BottomExecutionMask = 0xffffffff;
- }
-
- anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH), msf);
+ emit_gpgpu_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
}
static void
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.PostSyncOperation = WriteImmediateData;
- pc.Address =
- (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+ pc.Address = cmd_buffer->device->workaround_address;
}
}
cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage);
struct anv_image_view *iview = cmd_state->attachments[a].image_view;
const struct anv_image *image = iview->image;
- /* A resolve is necessary before use as an input attachment if the clear
- * color or auxiliary buffer usage isn't supported by the sampler.
- */
- const bool input_needs_resolve =
- (att_state->fast_clear && !att_state->clear_color_is_zero) ||
- att_state->input_aux_usage != att_state->aux_usage;
-
- VkImageLayout target_layout;
- if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV &&
- !input_needs_resolve) {
- /* Layout transitions before the final only help to enable sampling
- * as an input attachment. If the input attachment supports sampling
- * using the auxiliary surface, we can skip such transitions by
- * making the target layout one that is CCS-aware.
- */
- target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- } else {
- target_layout = subpass->attachments[i].layout;
- }
-
+ VkImageLayout target_layout = subpass->attachments[i].layout;
VkImageLayout target_stencil_layout =
subpass->attachments[i].stencil_layout;
iview->planes[0].isl.base_level, 1,
base_layer, layer_count,
att_state->current_layout, target_layout);
+ att_state->aux_usage =
+ anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ target_layout);
}
if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
if (is_multiview)
att_state->pending_clear_views &= ~1;
- if (att_state->clear_color_is_zero) {
+ if (isl_color_value_is_zero(clear_color,
+ iview->planes[0].isl.format)) {
/* This image has the auxiliary buffer enabled. We can mark the
* subresource as not needing a resolve because the clear color
* will match what's in every RENDER_SURFACE_STATE object when
} else if (att_usage == VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
surface_state = &att_state->input;
isl_surf_usage = ISL_SURF_USAGE_TEXTURE_BIT;
- isl_aux_usage = att_state->input_aux_usage;
+ isl_aux_usage =
+ anv_layout_to_aux_usage(&cmd_buffer->device->info, iview->image,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+ att_state->current_layout);
} else {
continue;
}