X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_cmd_buffer.c;h=efc05889f79a8d3ee4b0517924c5778e2f3a4ef7;hb=ffc84eac0d5a0c30e445fcdb2f0cfd0c5bf5321d;hp=22d4f79d28dc24ed422c3751648814efe909c19d;hpb=cd40110420b48b3005c9d1d4ea30e2cbcc9a3d40;p=mesa.git diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 22d4f79d28d..efc05889f79 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -802,6 +802,7 @@ static void anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, enum isl_format format, + struct isl_swizzle swizzle, VkImageAspectFlagBits aspect, uint32_t level, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -826,14 +827,15 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_D) resolve_op = ISL_AUX_OP_FULL_RESOLVE; - anv_image_ccs_op(cmd_buffer, image, format, aspect, level, - array_layer, 1, resolve_op, NULL, true); + anv_image_ccs_op(cmd_buffer, image, format, swizzle, aspect, + level, array_layer, 1, resolve_op, NULL, true); } static void anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, enum isl_format format, + struct isl_swizzle swizzle, VkImageAspectFlagBits aspect, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -847,7 +849,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, aspect, 0, array_layer, resolve_op, fast_clear_supported); - anv_image_mcs_op(cmd_buffer, image, format, aspect, + anv_image_mcs_op(cmd_buffer, image, format, swizzle, aspect, array_layer, 1, resolve_op, NULL, true); #else unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail"); @@ -1021,7 +1023,7 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer, * with not having this stall in some cases if we were really careful but * it's better to play it safe. Full stall the GPU. */ - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); struct gen_mi_builder b; @@ -1233,6 +1235,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, anv_image_ccs_op(cmd_buffer, image, image->planes[plane].surface.isl.format, + ISL_SWIZZLE_IDENTITY, aspect, level, base_layer, level_layer_count, ISL_AUX_OP_AMBIGUATE, NULL, false); @@ -1252,6 +1255,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, assert(base_level == 0 && level_count == 1); anv_image_mcs_op(cmd_buffer, image, image->planes[plane].surface.isl.format, + ISL_SWIZZLE_IDENTITY, aspect, base_layer, layer_count, ISL_AUX_OP_FAST_CLEAR, NULL, false); } @@ -1331,6 +1335,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (image->samples == 1) { anv_cmd_predicated_ccs_resolve(cmd_buffer, image, image->planes[plane].surface.isl.format, + ISL_SWIZZLE_IDENTITY, aspect, level, array_layer, resolve_op, final_fast_clear); } else { @@ -1344,6 +1349,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, anv_cmd_predicated_mcs_resolve(cmd_buffer, image, image->planes[plane].surface.isl.format, + ISL_SWIZZLE_IDENTITY, aspect, array_layer, resolve_op, final_fast_clear); } @@ -2036,7 +2042,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) * add extra flushes in the case it knows that the engine is already * IDLE." */ - if (GEN_GEN == 12 && ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) + if (GEN_GEN == 12 && (bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)) bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT; /* If we're going to do an invalidate and we have a pending end-of-pipe @@ -2379,7 +2385,7 @@ static void cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) { VkShaderStageFlags stages = - cmd_buffer->state.gfx.base.pipeline->active_stages; + cmd_buffer->state.gfx.pipeline->active_stages; /* In order to avoid thrash, we assume that vertex and fragment stages * always exist. In the rare case where one is missing *and* the other @@ -2467,44 +2473,16 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer, } } -static struct anv_cmd_pipeline_state * -pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_COMPUTE: - return &cmd_buffer->state.compute.base; - - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_FRAGMENT: - return &cmd_buffer->state.gfx.base; - - default: - unreachable("invalid stage"); - } -} - static VkResult emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, + struct anv_cmd_pipeline_state *pipe_state, + struct anv_shader_bin *shader, struct anv_state *bt_state) { struct anv_subpass *subpass = cmd_buffer->state.subpass; uint32_t state_offset; - struct anv_cmd_pipeline_state *pipe_state = - pipe_state_for_stage(cmd_buffer, stage); - struct anv_pipeline *pipeline = pipe_state->pipeline; - - if (!anv_pipeline_has_stage(pipeline, stage)) { - *bt_state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; + struct anv_pipeline_bind_map *map = &shader->bind_map; if (map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -2536,7 +2514,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: /* Color attachment binding */ - assert(stage == MESA_SHADER_FRAGMENT); + assert(shader->stage == MESA_SHADER_FRAGMENT); if (binding->index < subpass->color_count) { const unsigned att = subpass->color_attachments[binding->index].attachment; @@ -2564,11 +2542,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_alloc_surface_state(cmd_buffer); struct anv_address constant_data = { - .bo = pipeline->device->dynamic_state_pool.block_pool.bo, - .offset = pipeline->shaders[stage]->constant_data.offset, + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = shader->constant_data.offset, }; - unsigned constant_data_size = - pipeline->shaders[stage]->constant_data_size; + unsigned constant_data_size = shader->constant_data_size; const enum isl_format format = anv_isl_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); @@ -2583,7 +2560,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: { /* This is always the first binding for compute shaders */ - assert(stage == MESA_SHADER_COMPUTE && s == 0); + assert(shader->stage == MESA_SHADER_COMPUTE && s == 0); struct anv_state surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); @@ -2639,7 +2616,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, break; } case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - assert(stage == MESA_SHADER_FRAGMENT); + assert(shader->stage == MESA_SHADER_FRAGMENT); if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) { /* For depth and stencil input attachments, we treat it like any * old texture that a user may have bound. @@ -2691,7 +2668,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { /* Compute the offset within the buffer */ struct anv_push_constants *push = - &cmd_buffer->state.push_constants[stage]; + &cmd_buffer->state.push_constants[shader->stage]; uint32_t dynamic_offset = push->dynamic_offsets[binding->dynamic_offset_index]; @@ -2701,6 +2678,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* Clamp the range to the buffer size */ uint32_t range = MIN2(desc->range, desc->buffer->size - offset); + /* Align the range for consistency */ + if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) + range = align_u32(range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT); + struct anv_address address = anv_address_add(desc->buffer->address, offset); @@ -2742,19 +2723,11 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, static VkResult emit_samplers(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, + struct anv_cmd_pipeline_state *pipe_state, + struct anv_shader_bin *shader, struct anv_state *state) { - struct anv_cmd_pipeline_state *pipe_state = - pipe_state_for_stage(cmd_buffer, stage); - struct anv_pipeline *pipeline = pipe_state->pipeline; - - if (!anv_pipeline_has_stage(pipeline, stage)) { - *state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; + struct anv_pipeline_bind_map *map = &shader->bind_map; if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -2792,20 +2765,33 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer, static uint32_t flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer, - struct anv_pipeline *pipeline) + struct anv_cmd_pipeline_state *pipe_state, + struct anv_shader_bin **shaders, + uint32_t num_shaders) { - VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & - pipeline->active_stages; + const VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty; + VkShaderStageFlags flushed = 0; VkResult result = VK_SUCCESS; - anv_foreach_stage(s, dirty) { - result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]); + for (uint32_t i = 0; i < num_shaders; i++) { + if (!shaders[i]) + continue; + + gl_shader_stage stage = shaders[i]->stage; + VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage); + if ((vk_stage & dirty) == 0) + continue; + + result = emit_samplers(cmd_buffer, pipe_state, shaders[i], + &cmd_buffer->state.samplers[stage]); if (result != VK_SUCCESS) break; - result = emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); + result = emit_binding_table(cmd_buffer, pipe_state, shaders[i], + &cmd_buffer->state.binding_tables[stage]); if (result != VK_SUCCESS) break; + + flushed |= vk_stage; } if (result != VK_SUCCESS) { @@ -2821,25 +2807,34 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_emit_state_base_address)(cmd_buffer); /* Re-emit all active binding tables */ - dirty |= pipeline->active_stages; - anv_foreach_stage(s, dirty) { - result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]); + flushed = 0; + + for (uint32_t i = 0; i < num_shaders; i++) { + if (!shaders[i]) + continue; + + gl_shader_stage stage = shaders[i]->stage; + + result = emit_samplers(cmd_buffer, pipe_state, shaders[i], + &cmd_buffer->state.samplers[stage]); if (result != VK_SUCCESS) { anv_batch_set_error(&cmd_buffer->batch, result); return 0; } - result = emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); + result = emit_binding_table(cmd_buffer, pipe_state, shaders[i], + &cmd_buffer->state.binding_tables[stage]); if (result != VK_SUCCESS) { anv_batch_set_error(&cmd_buffer->batch, result); return 0; } + + flushed |= mesa_to_vk_shader_stage(stage); } } - cmd_buffer->state.descriptors_dirty &= ~dirty; + cmd_buffer->state.descriptors_dirty &= ~flushed; - return dirty; + return flushed; } static void @@ -2886,7 +2881,6 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, } } -#if GEN_GEN >= 8 || GEN_IS_HASWELL static struct anv_address get_push_range_address(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, @@ -2902,7 +2896,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor_set *set = gfx_state->base.descriptors[range->index]; return anv_descriptor_set_address(cmd_buffer, set); - break; } case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: { @@ -2912,7 +2905,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer, .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = state.offset, }; - break; } default: { @@ -2936,14 +2928,73 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer, } } } -#endif + + +/** Returns the size in bytes of the bound buffer + * + * The range is relative to the start of the buffer, not the start of the + * range. The returned range may be smaller than + * + * (range->start + range->length) * 32; + */ +static uint32_t +get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + const struct anv_push_range *range) +{ + assert(stage != MESA_SHADER_COMPUTE); + const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; + switch (range->set) { + case ANV_DESCRIPTOR_SET_DESCRIPTORS: { + struct anv_descriptor_set *set = + gfx_state->base.descriptors[range->index]; + assert(range->start * 32 < set->desc_mem.alloc_size); + assert((range->start + range->length) * 32 <= set->desc_mem.alloc_size); + return set->desc_mem.alloc_size; + } + + case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: + return (range->start + range->length) * 32; + + default: { + assert(range->set < MAX_SETS); + struct anv_descriptor_set *set = + gfx_state->base.descriptors[range->set]; + const struct anv_descriptor *desc = + &set->descriptors[range->index]; + + if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + return desc->buffer_view->range; + } else { + assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); + /* Compute the offset within the buffer */ + struct anv_push_constants *push = + &cmd_buffer->state.push_constants[stage]; + uint32_t dynamic_offset = + push->dynamic_offsets[range->dynamic_offset_index]; + uint64_t offset = desc->offset + dynamic_offset; + /* Clamp to the buffer size */ + offset = MIN2(offset, desc->buffer->size); + /* Clamp the range to the buffer size */ + uint32_t bound_range = MIN2(desc->range, desc->buffer->size - offset); + + /* Align the range for consistency */ + bound_range = align_u32(bound_range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT); + + return bound_range; + } + } + } +} static void cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, unsigned buffer_count) + gl_shader_stage stage, + struct anv_address *buffers, + unsigned buffer_count) { const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; - const struct anv_pipeline *pipeline = gfx_state->base.pipeline; + const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; static const uint32_t push_constant_opcodes[] = { [MESA_SHADER_VERTEX] = 21, @@ -2993,24 +3044,23 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, */ assert((GEN_GEN >= 8 || GEN_IS_HASWELL) || i == 0); - const struct anv_address addr = - get_push_range_address(cmd_buffer, stage, range); c.ConstantBody.ReadLength[i + shift] = range->length; c.ConstantBody.Buffer[i + shift] = - anv_address_add(addr, range->start * 32); + anv_address_add(buffers[i], range->start * 32); } #else /* For Ivy Bridge, push constants are relative to dynamic state * base address and we only ever push actual push constants. */ if (bind_map->push_ranges[0].length > 0) { + assert(buffer_count == 1); assert(bind_map->push_ranges[0].set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS); - struct anv_state state = - anv_cmd_buffer_push_constants(cmd_buffer, stage); + assert(buffers[0].bo == + cmd_buffer->device->dynamic_state_pool.block_pool.bo); c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length; c.ConstantBody.Buffer[0].bo = NULL; - c.ConstantBody.Buffer[0].offset = state.offset; + c.ConstantBody.Buffer[0].offset = buffers[0].offset; } assert(bind_map->push_ranges[1].length == 0); assert(bind_map->push_ranges[2].length == 0); @@ -3023,9 +3073,11 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, #if GEN_GEN >= 12 static void cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, - uint32_t shader_mask, uint32_t count) + uint32_t shader_mask, + struct anv_address *buffers, + uint32_t buffer_count) { - if (count == 0) { + if (buffer_count == 0) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) { c.ShaderUpdateEnable = shader_mask; c.MOCS = cmd_buffer->device->isl_dev.mocs.internal; @@ -3034,7 +3086,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, } const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; - const struct anv_pipeline *pipeline = gfx_state->base.pipeline; + const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; static const uint32_t push_constant_opcodes[] = { [MESA_SHADER_VERTEX] = 21, @@ -3053,24 +3105,22 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, &pipeline->shaders[stage]->bind_map; uint32_t *dw; - const uint32_t buffers = (1 << count) - 1; - const uint32_t num_dwords = 2 + 2 * count; + const uint32_t buffer_mask = (1 << buffer_count) - 1; + const uint32_t num_dwords = 2 + 2 * buffer_count; dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords, GENX(3DSTATE_CONSTANT_ALL), .ShaderUpdateEnable = shader_mask, - .PointerBufferMask = buffers, + .PointerBufferMask = buffer_mask, .MOCS = cmd_buffer->device->isl_dev.mocs.internal); - for (int i = 0; i < count; i++) { + for (int i = 0; i < buffer_count; i++) { const struct anv_push_range *range = &bind_map->push_ranges[i]; - const struct anv_address addr = - get_push_range_address(cmd_buffer, stage, range); - GENX(3DSTATE_CONSTANT_ALL_DATA_pack)( &cmd_buffer->batch, dw + 2 + i * 2, &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) { - .PointerToConstantBuffer = anv_address_add(addr, range->start * 32), + .PointerToConstantBuffer = + anv_address_add(buffers[i], range->start * 32), .ConstantBufferReadLength = range->length, }); } @@ -3083,7 +3133,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, { VkShaderStageFlags flushed = 0; const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; - const struct anv_pipeline *pipeline = gfx_state->base.pipeline; + const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; #if GEN_GEN >= 12 uint32_t nobuffer_stages = 0; @@ -3092,20 +3142,64 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, anv_foreach_stage(stage, dirty_stages) { unsigned buffer_count = 0; flushed |= mesa_to_vk_shader_stage(stage); - uint32_t max_push_range = 0; + UNUSED uint32_t max_push_range = 0; + struct anv_address buffers[4] = {}; if (anv_pipeline_has_stage(pipeline, stage)) { const struct anv_pipeline_bind_map *bind_map = &pipeline->shaders[stage]->bind_map; + struct anv_push_constants *push = + &cmd_buffer->state.push_constants[stage]; + + if (cmd_buffer->device->robust_buffer_access) { + push->push_reg_mask = 0; + /* Start of the current range in the shader, relative to the start + * of push constants in the shader. + */ + unsigned range_start_reg = 0; + for (unsigned i = 0; i < 4; i++) { + const struct anv_push_range *range = &bind_map->push_ranges[i]; + if (range->length == 0) + continue; + + unsigned bound_size = + get_push_range_bound_size(cmd_buffer, stage, range); + if (bound_size >= range->start * 32) { + unsigned bound_regs = + MIN2(DIV_ROUND_UP(bound_size, 32) - range->start, + range->length); + assert(range_start_reg + bound_regs <= 64); + push->push_reg_mask |= BITFIELD64_RANGE(range_start_reg, + bound_regs); + } + + cmd_buffer->state.push_constants_dirty |= + mesa_to_vk_shader_stage(stage); + range_start_reg += range->length; + } + } + + /* We have to gather buffer addresses as a second step because the + * loop above puts data into the push constant area and the call to + * get_push_range_address is what locks our push constants and copies + * them into the actual GPU buffer. If we did the two loops at the + * same time, we'd risk only having some of the sizes in the push + * constant buffer when we did the copy. + */ for (unsigned i = 0; i < 4; i++) { const struct anv_push_range *range = &bind_map->push_ranges[i]; - if (range->length > 0) { - buffer_count++; - if (GEN_GEN >= 12 && range->length > max_push_range) - max_push_range = range->length; - } + if (range->length == 0) + break; + + buffers[i] = get_push_range_address(cmd_buffer, stage, range); + max_push_range = MAX2(max_push_range, range->length); + buffer_count++; } + + /* We have at most 4 buffers but they should be tightly packed */ + for (unsigned i = buffer_count; i < 4; i++) + assert(bind_map->push_ranges[i].length == 0); } #if GEN_GEN >= 12 @@ -3123,17 +3217,17 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, */ if (max_push_range < 32) { cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage, - buffer_count); + buffers, buffer_count); continue; } #endif - cmd_buffer_emit_push_constant(cmd_buffer, stage, buffer_count); + cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count); } #if GEN_GEN >= 12 if (nobuffer_stages) - cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, 0); + cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0); #endif cmd_buffer->state.push_constants_dirty &= ~flushed; @@ -3142,21 +3236,21 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; uint32_t *p; - uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used; - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) - vb_emit |= pipeline->vb_used; - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config); + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1); genX(flush_pipeline_select_3d)(cmd_buffer); + uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used; + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) + vb_emit |= pipeline->vb_used; + if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); const uint32_t num_dwords = 1 + num_buffers * 4; @@ -3235,7 +3329,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) #endif if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) { - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch); /* If the pipeline changed, we may need to re-allocate push constant * space in the URB. @@ -3277,8 +3371,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. */ uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) - dirty = flush_descriptor_sets(cmd_buffer, pipeline); + if (cmd_buffer->state.descriptors_dirty) { + dirty = flush_descriptor_sets(cmd_buffer, + &cmd_buffer->state.gfx.base, + pipeline->shaders, + ARRAY_SIZE(pipeline->shaders)); + } if (dirty || cmd_buffer->state.push_constants_dirty) { /* Because we're pushing UBOs, we have to push whenever either @@ -3385,7 +3483,7 @@ static void update_dirty_vbs_for_gen8_vb_flush(struct anv_cmd_buffer *cmd_buffer, uint32_t access_type) { - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); uint64_t vb_used = pipeline->vb_used; @@ -3408,7 +3506,7 @@ void genX(CmdDraw)( uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -3433,7 +3531,8 @@ void genX(CmdDraw)( /* Our implementation of VK_KHR_multiview uses instancing to draw the * different views. We need to multiply instanceCount by the view count. */ - instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); + if (!pipeline->use_primitive_replication) + instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; @@ -3458,7 +3557,7 @@ void genX(CmdDrawIndexed)( uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -3483,7 +3582,8 @@ void genX(CmdDrawIndexed)( /* Our implementation of VK_KHR_multiview uses instancing to draw the * different views. We need to multiply instanceCount by the view count. */ - instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); + if (!pipeline->use_primitive_replication) + instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; @@ -3519,7 +3619,7 @@ void genX(CmdDrawIndirectByteCountEXT)( #if GEN_IS_HASWELL || GEN_GEN >= 8 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); /* firstVertex is always zero for this draw function */ @@ -3544,7 +3644,8 @@ void genX(CmdDrawIndirectByteCountEXT)( /* Our implementation of VK_KHR_multiview uses instancing to draw the * different views. We need to multiply instanceCount by the view count. */ - instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); + if (!pipeline->use_primitive_replication) + instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); struct gen_mi_builder b; gen_mi_builder_init(&b, &cmd_buffer->batch); @@ -3621,7 +3722,7 @@ void genX(CmdDrawIndirect)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -3670,7 +3771,7 @@ void genX(CmdDrawIndexedIndirect)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -3814,7 +3915,7 @@ void genX(CmdDrawIndirectCount)( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); struct anv_cmd_state *cmd_state = &cmd_buffer->state; - struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -3880,7 +3981,7 @@ void genX(CmdDrawIndexedIndirectCount)( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); struct anv_cmd_state *cmd_state = &cmd_buffer->state; - struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; + struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) @@ -4038,11 +4139,11 @@ void genX(CmdEndTransformFeedbackEXT)( void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; + struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + assert(pipeline->cs); - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config); + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); genX(flush_pipeline_select_gpgpu)(cmd_buffer); @@ -4058,7 +4159,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch); /* The workgroup size of the pipeline affects our push constant layout * so flag push constants as dirty if we change the pipeline. @@ -4068,7 +4169,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || cmd_buffer->state.compute.pipeline_dirty) { - flush_descriptor_sets(cmd_buffer, pipeline); + flush_descriptor_sets(cmd_buffer, + &cmd_buffer->state.compute.base, + &pipeline->cs, 1); uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { @@ -4172,7 +4275,7 @@ void genX(CmdDispatchBase)( uint32_t groupCountZ) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; + struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); anv_cmd_buffer_push_base_group_id(cmd_buffer, baseGroupX, @@ -4207,7 +4310,7 @@ void genX(CmdDispatchBase)( ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; + ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1; ggw.ThreadGroupIDXDimension = groupCountX; ggw.ThreadGroupIDYDimension = groupCountY; ggw.ThreadGroupIDZDimension = groupCountZ; @@ -4229,7 +4332,7 @@ void genX(CmdDispatchIndirect)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; + struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); struct anv_address addr = anv_address_add(buffer->address, offset); struct anv_batch *batch = &cmd_buffer->batch; @@ -4323,7 +4426,7 @@ void genX(CmdDispatchIndirect)( ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; + ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1; ggw.RightExecutionMask = pipeline->cs_right_mask; ggw.BottomExecutionMask = 0xffffffff; } @@ -4959,6 +5062,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, if (iview->image->samples == 1) { anv_image_ccs_op(cmd_buffer, image, iview->planes[0].isl.format, + iview->planes[0].isl.swizzle, VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, @@ -4966,6 +5070,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } else { anv_image_mcs_op(cmd_buffer, image, iview->planes[0].isl.format, + iview->planes[0].isl.swizzle, VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, @@ -5487,6 +5592,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) if (image->samples == 1) { anv_cmd_predicated_ccs_resolve(cmd_buffer, image, iview->planes[0].isl.format, + iview->planes[0].isl.swizzle, VK_IMAGE_ASPECT_COLOR_BIT, iview->planes[0].isl.base_level, array_layer, @@ -5495,6 +5601,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) } else { anv_cmd_predicated_mcs_resolve(cmd_buffer, image, iview->planes[0].isl.format, + iview->planes[0].isl.swizzle, VK_IMAGE_ASPECT_COLOR_BIT, base_layer, ISL_AUX_OP_PARTIAL_RESOLVE,