X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_cmd_buffer.c;h=4aa5df69674b6a623dd681583c26e0873d9586ca;hb=8e03250fcf4fc5de31e92ca4919959d932888a69;hp=c62d275fd95d4d59dfcd3b668a0228b3f430cd6c;hpb=2ab2be092d15ddb449b4a215609705bae68089a0;p=mesa.git diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c62d275fd95..4aa5df69674 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -38,9 +38,9 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - int src_family, - int dst_family, - VkImageSubresourceRange range, + uint32_t src_family, + uint32_t dst_family, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears); const struct radv_dynamic_state default_dynamic_state = { @@ -331,13 +331,15 @@ static unsigned radv_pack_float_12p4(float x) } static uint32_t -shader_stage_to_user_data_0(gl_shader_stage stage) +shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs) { switch (stage) { case MESA_SHADER_FRAGMENT: return R_00B030_SPI_SHADER_USER_DATA_PS_0; case MESA_SHADER_VERTEX: - return R_00B130_SPI_SHADER_USER_DATA_VS_0; + return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0; + case MESA_SHADER_GEOMETRY: + return R_00B230_SPI_SHADER_USER_DATA_GS_0; case MESA_SHADER_COMPUTE: return R_00B900_COMPUTE_USER_DATA_0; default: @@ -360,7 +362,7 @@ radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, int idx, uint64_t va) { struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); - uint32_t base_reg = shader_stage_to_user_data_0(stage); + uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline)); if (loc->sgpr_idx == -1) return; assert(loc->num_sgprs == 2); @@ -415,6 +417,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, case 16: src = cmd_buffer->device->sample_locations_16x; break; + default: + unreachable("unknown number of samples"); } memcpy(samples_ptr, src, num_samples * 4 * 2); @@ -451,47 +455,38 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, } static void -radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) +radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline, + struct radv_shader_variant *shader) { struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radv_shader_variant *vs; - uint64_t va; + uint64_t va = ws->buffer_get_va(shader->bo); unsigned export_count; - unsigned clip_dist_mask, cull_dist_mask, total_mask; - assert (pipeline->shaders[MESA_SHADER_VERTEX]); + ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - vs = pipeline->shaders[MESA_SHADER_VERTEX]; - va = ws->buffer_get_va(vs->bo); - ws->cs_add_buffer(cmd_buffer->cs, vs->bo, 8); - - clip_dist_mask = vs->info.vs.clip_dist_mask; - cull_dist_mask = vs->info.vs.cull_dist_mask; - total_mask = clip_dist_mask | cull_dist_mask; - radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0); - radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); - - export_count = MAX2(1, vs->info.vs.param_exports); + export_count = MAX2(1, shader->info.vs.param_exports); radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(export_count - 1)); + radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(vs->info.vs.pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(shader->info.vs.pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(vs->info.vs.pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(shader->info.vs.pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(vs->info.vs.pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(shader->info.vs.pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, vs->rsrc1); - radeon_emit(cmd_buffer->cs, vs->rsrc2); + radeon_emit(cmd_buffer->cs, shader->rsrc1); + radeon_emit(cmd_buffer->cs, shader->rsrc2); radeon_set_context_reg(cmd_buffer->cs, R_028818_PA_CL_VTE_CNTL, S_028818_VTX_W0_FMT(1) | @@ -499,13 +494,18 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); + unsigned clip_dist_mask, cull_dist_mask, total_mask; + clip_dist_mask = shader->info.vs.clip_dist_mask; + cull_dist_mask = shader->info.vs.cull_dist_mask; + total_mask = clip_dist_mask | cull_dist_mask; + radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(vs->info.vs.writes_pointsize) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(vs->info.vs.writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(vs->info.vs.writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(vs->info.vs.writes_pointsize || - vs->info.vs.writes_layer || - vs->info.vs.writes_viewport_index) | + S_02881C_USE_VTX_POINT_SIZE(shader->info.vs.writes_pointsize) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(shader->info.vs.writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(shader->info.vs.writes_viewport_index) | + S_02881C_VS_OUT_MISC_VEC_ENA(shader->info.vs.writes_pointsize || + shader->info.vs.writes_layer || + shader->info.vs.writes_viewport_index) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | pipeline->graphics.raster.pa_cl_vs_out_cntl | @@ -513,10 +513,132 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, clip_dist_mask); radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, - S_028AB4_REUSE_OFF(vs->info.vs.writes_viewport_index)); + S_028AB4_REUSE_OFF(shader->info.vs.writes_viewport_index)); +} + +static void +radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, + struct radv_shader_variant *shader) +{ + struct radeon_winsys *ws = cmd_buffer->device->ws; + uint64_t va = ws->buffer_get_va(shader->bo); + + ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); + + radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, + shader->info.vs.esgs_itemsize / 4); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); + radeon_emit(cmd_buffer->cs, va >> 8); + radeon_emit(cmd_buffer->cs, va >> 40); + radeon_emit(cmd_buffer->cs, shader->rsrc1); + radeon_emit(cmd_buffer->cs, shader->rsrc2); +} + +static void +radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *vs; + + assert (pipeline->shaders[MESA_SHADER_VERTEX]); + + vs = pipeline->shaders[MESA_SHADER_VERTEX]; + + if (vs->info.vs.as_es) + radv_emit_hw_es(cmd_buffer, vs); + else + radv_emit_hw_vs(cmd_buffer, pipeline, vs); + + radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); } +static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) +{ + unsigned gs_max_vert_out = gs->info.gs.vertices_out; + unsigned cut_mode; + if (gs_max_vert_out <= 128) { + cut_mode = V_028A40_GS_CUT_128; + } else if (gs_max_vert_out <= 256) { + cut_mode = V_028A40_GS_CUT_256; + } else if (gs_max_vert_out <= 512) { + cut_mode = V_028A40_GS_CUT_512; + } else { + assert(gs_max_vert_out <= 1024); + cut_mode = V_028A40_GS_CUT_1024; + } + + return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | + S_028A40_CUT_MODE(cut_mode)| + S_028A40_ES_WRITE_OPTIMIZE(1) | + S_028A40_GS_WRITE_OPTIMIZE(1); +} + +static void +radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline) +{ + struct radeon_winsys *ws = cmd_buffer->device->ws; + struct radv_shader_variant *gs; + uint64_t va; + + gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; + if (!gs) { + radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0); + return; + } + + radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs)); + + uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; + + radeon_set_context_reg_seq(cmd_buffer->cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); + radeon_emit(cmd_buffer->cs, gsvs_itemsize); + radeon_emit(cmd_buffer->cs, gsvs_itemsize); + radeon_emit(cmd_buffer->cs, gsvs_itemsize); + + radeon_set_context_reg(cmd_buffer->cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); + + radeon_set_context_reg(cmd_buffer->cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); + + uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size; + radeon_set_context_reg_seq(cmd_buffer->cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); + radeon_emit(cmd_buffer->cs, gs_vert_itemsize >> 2); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); + + uint32_t gs_num_invocations = gs->info.gs.invocations; + radeon_set_context_reg(cmd_buffer->cs, R_028B90_VGT_GS_INSTANCE_CNT, + S_028B90_CNT(MIN2(gs_num_invocations, 127)) | + S_028B90_ENABLE(gs_num_invocations > 0)); + + va = ws->buffer_get_va(gs->bo); + ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); + radeon_emit(cmd_buffer->cs, va >> 8); + radeon_emit(cmd_buffer->cs, va >> 40); + radeon_emit(cmd_buffer->cs, gs->rsrc1); + radeon_emit(cmd_buffer->cs, gs->rsrc2); + + radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader); + + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, + AC_UD_GS_VS_RING_STRIDE_ENTRIES); + if (loc->sgpr_idx != -1) { + uint32_t stride = gs->info.gs.max_gsvs_emit_size; + uint32_t num_entries = 64; + bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; + + if (is_vi) + num_entries *= stride; + + stride = S_008F04_STRIDE(stride); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2); + radeon_emit(cmd_buffer->cs, stride); + radeon_emit(cmd_buffer->cs, num_entries); + } +} static void radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, @@ -532,7 +654,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - vs = pipeline->shaders[MESA_SHADER_VERTEX]; + vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); @@ -583,7 +705,27 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, unsigned val; val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ps_offset = 1; + ps_offset++; + } + + if (ps->info.fs.prim_id_input && (vs->info.vs.prim_id_output != 0xffffffff)) { + unsigned vs_offset, flat_shade; + unsigned val; + vs_offset = vs->info.vs.prim_id_output; + flat_shade = true; + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); + ++ps_offset; + } + + if (ps->info.fs.layer_input && (vs->info.vs.layer_output != 0xffffffff)) { + unsigned vs_offset, flat_shade; + unsigned val; + vs_offset = vs->info.vs.layer_output; + flat_shade = true; + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); + ++ps_offset; } for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { @@ -602,6 +744,14 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, } vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1)); + if (vs->info.vs.prim_id_output != 0xffffffff) { + if (vs_offset >= vs->info.vs.prim_id_output) + vs_offset++; + } + if (vs->info.vs.layer_output != 0xffffffff) { + if (vs_offset >= vs->info.vs.layer_output) + vs_offset++; + } flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); @@ -622,11 +772,19 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer, radv_emit_graphics_raster_state(cmd_buffer, pipeline); radv_update_multisample_state(cmd_buffer, pipeline); radv_emit_vertex_shader(cmd_buffer, pipeline); + radv_emit_geometry_shader(cmd_buffer, pipeline); radv_emit_fragment_shader(cmd_buffer, pipeline); radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, pipeline->graphics.prim_restart_enable); + cmd_buffer->scratch_size_needed = + MAX2(cmd_buffer->scratch_size_needed, + pipeline->max_waves * pipeline->scratch_bytes_per_wave); + + radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE, + S_0286E8_WAVES(pipeline->max_waves) | + S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); cmd_buffer->state.emitted_pipeline = pipeline; } @@ -1002,7 +1160,7 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx]; - uint32_t base_reg = shader_stage_to_user_data_0(stage); + uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline)); if (desc_set_loc->sgpr_idx == -1) return; @@ -1032,6 +1190,11 @@ radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, idx, set->va, MESA_SHADER_VERTEX); + if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(pipeline)) + emit_stage_descriptor_set_userdata(cmd_buffer, pipeline, + idx, set->va, + MESA_SHADER_GEOMETRY); + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) emit_stage_descriptor_set_userdata(cmd_buffer, pipeline, idx, set->va, @@ -1092,6 +1255,10 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT, AC_UD_PUSH_CONSTANTS, va); + if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(pipeline)) + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_GEOMETRY, + AC_UD_PUSH_CONSTANTS, va); + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE, AC_UD_PUSH_CONSTANTS, va); @@ -1100,7 +1267,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, } static void -radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) +radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_or_indirect_draw, + uint32_t draw_vertex_count) { struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; struct radv_device *device = cmd_buffer->device; @@ -1164,17 +1332,30 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) radv_emit_scissor(cmd_buffer); + ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_or_indirect_draw, draw_vertex_count); + if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); + else + radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); + cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param; + } + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) { - radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, 0); - ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer); + uint32_t stages = 0; + + if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) + stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(1) | + S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); + + radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages); if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim); } else { radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim); - radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); } radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out); @@ -1222,11 +1403,67 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, } } +static void radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, + VkAccessFlags src_flags) +{ + enum radv_cmd_flush_bits flush_bits = 0; + uint32_t b; + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + default: + break; + } + } + cmd_buffer->state.flush_bits |= flush_bits; +} + +static void radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, + VkAccessFlags dst_flags) +{ + enum radv_cmd_flush_bits flush_bits = 0; + uint32_t b; + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; + break; + case VK_ACCESS_SHADER_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_TRANSFER_WRITE_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: + flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; + default: + break; + } + } + cmd_buffer->state.flush_bits |= flush_bits; +} + static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) { + radv_src_access_flush(cmd_buffer, barrier->src_access_mask); radv_stage_flush(cmd_buffer, barrier->src_stage_mask); - - /* TODO: actual cache flushes */ + radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask); } static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, @@ -1244,7 +1481,7 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf radv_handle_image_transition(cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_layout, - att.layout, 0, 0, range, + att.layout, 0, 0, &range, cmd_buffer->state.attachments[idx].pending_clear_aspects); cmd_buffer->state.attachments[idx].current_layout = att.layout; @@ -1344,6 +1581,9 @@ VkResult radv_AllocateCommandBuffers( VkResult result = VK_SUCCESS; uint32_t i; + memset(pCommandBuffers, 0, + sizeof(*pCommandBuffers)*pAllocateInfo->commandBufferCount); + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]); @@ -1402,12 +1642,19 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) free(up); } + cmd_buffer->scratch_size_needed = 0; + cmd_buffer->compute_scratch_size_needed = 0; + cmd_buffer->esgs_ring_size_needed = 0; + cmd_buffer->gsvs_ring_size_needed = 0; + if (cmd_buffer->upload.upload_bo) cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, cmd_buffer->upload.upload_bo, 8); cmd_buffer->upload.offset = 0; cmd_buffer->record_fail = false; + + cmd_buffer->ring_offsets_idx = -1; } VkResult radv_ResetCommandBuffer( @@ -1419,6 +1666,20 @@ VkResult radv_ResetCommandBuffer( return VK_SUCCESS; } +static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_device *device = cmd_buffer->device; + if (device->gfx_init) { + uint64_t va = device->ws->buffer_get_va(device->gfx_init); + device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, (va >> 32) & 0xffff); + radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff); + } else + si_init_config(cmd_buffer); +} + VkResult radv_BeginCommandBuffer( VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) @@ -1432,26 +1693,11 @@ VkResult radv_BeginCommandBuffer( if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { switch (cmd_buffer->queue_family_index) { case RADV_QUEUE_GENERAL: - /* Flush read caches at the beginning of CS not flushed by the kernel. */ - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | - RADV_CMD_FLAG_INV_GLOBAL_L2; - si_init_config(cmd_buffer->device->physical_device, cmd_buffer); + emit_gfx_buffer_state(cmd_buffer); radv_set_db_count_control(cmd_buffer); - si_emit_cache_flush(cmd_buffer); break; case RADV_QUEUE_COMPUTE: - cmd_buffer->state.flush_bits = RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2; - si_init_compute(cmd_buffer->device->physical_device, cmd_buffer); - si_emit_cache_flush(cmd_buffer); + si_init_compute(cmd_buffer); break; case RADV_QUEUE_TRANSFER: default: @@ -1594,6 +1840,7 @@ VkResult radv_EndCommandBuffer( if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) si_emit_cache_flush(cmd_buffer); + if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) || cmd_buffer->record_fail) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -1629,9 +1876,15 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, compute_shader->rsrc1); radeon_emit(cmd_buffer->cs, compute_shader->rsrc2); + + cmd_buffer->compute_scratch_size_needed = + MAX2(cmd_buffer->compute_scratch_size_needed, + pipeline->max_waves * pipeline->scratch_bytes_per_wave); + /* change these once we have scratch support */ radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE, - S_00B860_WAVES(32) | S_00B860_WAVESIZE(0)); + S_00B860_WAVES(pipeline->max_waves) | + S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cmd_buffer->cs, @@ -1674,6 +1927,20 @@ void radv_CmdBindPipeline( radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &pipeline->dynamic_state, pipeline->dynamic_state_mask); + + if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) + cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; + if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) + cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; + + if (radv_pipeline_has_gs(pipeline)) { + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, + AC_UD_SCRATCH_RING_OFFSETS); + if (cmd_buffer->ring_offsets_idx == -1) + cmd_buffer->ring_offsets_idx = loc->sgpr_idx; + else if (loc->sgpr_idx != -1) + assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx); + } break; default: assert(!"invalid bind point"); @@ -1821,6 +2088,22 @@ void radv_CmdExecuteCommands( for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); + primary->scratch_size_needed = MAX2(primary->scratch_size_needed, + secondary->scratch_size_needed); + primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, + secondary->compute_scratch_size_needed); + + if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) + primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; + if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) + primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; + + if (secondary->ring_offsets_idx != -1) { + if (primary->ring_offsets_idx == -1) + primary->ring_offsets_idx = secondary->ring_offsets_idx; + else + assert(secondary->ring_offsets_idx == primary->ring_offsets_idx); + } primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); } @@ -1895,6 +2178,13 @@ VkResult radv_ResetCommandPool( return VK_SUCCESS; } +void radv_TrimCommandPoolKHR( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlagsKHR flags) +{ +} + void radv_CmdBeginRenderPass( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, @@ -1944,16 +2234,19 @@ void radv_CmdDraw( uint32_t firstInstance) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), vertexCount); + + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2); + uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline)); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); radeon_emit(cmd_buffer->cs, firstVertex); radeon_emit(cmd_buffer->cs, firstInstance); + radeon_emit(cmd_buffer->cs, 0); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -1993,10 +2286,10 @@ void radv_CmdDrawIndexed( uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), indexCount); radv_emit_primitive_reset_index(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type); @@ -2004,9 +2297,11 @@ void radv_CmdDrawIndexed( struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2); + uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline)); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); radeon_emit(cmd_buffer->cs, vertexOffset); radeon_emit(cmd_buffer->cs, firstInstance); + radeon_emit(cmd_buffer->cs, 0); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -2055,6 +2350,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); + uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline)); assert(loc->sgpr_idx != -1); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); radeon_emit(cs, 1); @@ -2065,9 +2361,11 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, PKT3_DRAW_INDIRECT_MULTI, 8, false)); radeon_emit(cs, 0); - radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); /* draw_index and count_indirect enable */ + radeon_emit(cs, ((base_reg + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, ((base_reg + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, (((base_reg + (loc->sgpr_idx + 2) * 4) - SI_SH_REG_OFFSET) >> 2) | + S_2C3_DRAW_INDEX_ENABLE(1) | + S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); radeon_emit(cs, draw_count); /* count */ radeon_emit(cs, count_va); /* count_addr */ radeon_emit(cs, count_va >> 32); @@ -2086,7 +2384,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer command uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, true, 0); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); @@ -2111,7 +2409,7 @@ radv_cmd_draw_indexed_indirect_count( int index_size = cmd_buffer->state.index_type ? 4 : 2; uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, true, 0); radv_emit_primitive_reset_index(cmd_buffer); index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo); @@ -2381,7 +2679,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && @@ -2402,12 +2700,12 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe !radv_layout_has_htile(image, dst_layout)) || (radv_layout_is_htile_compressed(image, src_layout) && !radv_layout_is_htile_compressed(image, dst_layout))) { + VkImageSubresourceRange local_range = *range; + local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + local_range.baseMipLevel = 0; + local_range.levelCount = 1; - range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - range.baseMipLevel = 0; - range.levelCount = 1; - - radv_decompress_depth_image_inplace(cmd_buffer, image, &range); + radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range); } } @@ -2432,7 +2730,7 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { @@ -2442,7 +2740,7 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe radv_initialise_cmask(cmd_buffer, image, 0xffffffffu); } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image); + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } } @@ -2469,14 +2767,14 @@ static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer, VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { radv_initialize_dcc(cmd_buffer, image, 0x20202020u); } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image); + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } } @@ -2484,9 +2782,9 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - int src_family, - int dst_family, - VkImageSubresourceRange range, + uint32_t src_family, + uint32_t dst_family, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (image->exclusive && src_family != dst_family) { @@ -2506,8 +2804,8 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, return; } - unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family); - unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family); + unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index); + unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index); if (image->htile.size) radv_handle_depth_image_transition(cmd_buffer, image, src_layout, @@ -2541,7 +2839,7 @@ void radv_CmdPipelineBarrier( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VkAccessFlags src_flags = 0; VkAccessFlags dst_flags = 0; - uint32_t b; + for (uint32_t i = 0; i < memoryBarrierCount; i++) { src_flags |= pMemoryBarriers[i].srcAccessMask; dst_flags |= pMemoryBarriers[i].dstAccessMask; @@ -2557,26 +2855,7 @@ void radv_CmdPipelineBarrier( dst_flags |= pImageMemoryBarriers[i].dstAccessMask; } - enum radv_cmd_flush_bits flush_bits = 0; - for_each_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - break; - default: - break; - } - } - cmd_buffer->state.flush_bits |= flush_bits; + radv_src_access_flush(cmd_buffer, src_flags); for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); @@ -2585,35 +2864,14 @@ void radv_CmdPipelineBarrier( pImageMemoryBarriers[i].newLayout, pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, - pImageMemoryBarriers[i].subresourceRange, + &pImageMemoryBarriers[i].subresourceRange, 0); } - flush_bits = 0; - - for_each_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; - break; - case VK_ACCESS_UNIFORM_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; - break; - case VK_ACCESS_SHADER_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - case VK_ACCESS_TRANSFER_READ_BIT: - case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; - default: - break; - } - } + radv_dst_access_flush(cmd_buffer, dst_flags); - flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + /* TODO reduce this */ + enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= flush_bits; @@ -2719,7 +2977,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, pImageMemoryBarriers[i].newLayout, pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, - pImageMemoryBarriers[i].subresourceRange, + &pImageMemoryBarriers[i].subresourceRange, 0); }