From: Samuel Pitoiset Date: Thu, 27 Aug 2020 12:00:54 +0000 (-0700) Subject: radv: emit {CB,DB}_RMI_L2_CACHE_CONTROL at framebuffer time X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d6bc0f26c918a67ebf85595d06628e8092e271d0;p=mesa.git radv: emit {CB,DB}_RMI_L2_CACHE_CONTROL at framebuffer time The upcoming patch will set BIG_PAGE if needed. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 0e6a310cf8e..f6c053cb0ad 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1736,6 +1736,17 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface); if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* Enable HTILE caching in L2 for small chips. */ + unsigned meta_write_policy, meta_read_policy; + /* TODO: investigate whether LRU improves performance on other chips too */ + if (cmd_buffer->device->physical_device->rad_info.num_render_backends <= 4) { + meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ + meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ + } else { + meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */ + meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */ + } + radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); @@ -1748,12 +1759,20 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, ds->db_z_read_base); radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 6); radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); + radeon_emit(cmd_buffer->cs, + S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) | + S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) | + S_02807C_HTILE_WR_POLICY(meta_write_policy) | + S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) | + S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) | + S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) | + S_02807C_HTILE_RD_POLICY(meta_read_policy)); } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); @@ -2294,6 +2313,29 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); } + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* Enable CMASK/FMASK/DCC caching in L2 for small chips. */ + unsigned meta_write_policy, meta_read_policy; + /* TODO: investigate whether LRU improves performance on other chips too */ + if (cmd_buffer->device->physical_device->rad_info.num_render_backends <= 4) { + meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ + meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ + } else { + meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */ + meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */ + } + + radeon_set_context_reg(cmd_buffer->cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, + S_028410_CMASK_WR_POLICY(meta_write_policy) | + S_028410_FMASK_WR_POLICY(meta_write_policy) | + S_028410_DCC_WR_POLICY(meta_write_policy) | + S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) | + S_028410_CMASK_RD_POLICY(meta_read_policy) | + S_028410_FMASK_RD_POLICY(meta_read_policy) | + S_028410_DCC_RD_POLICY(meta_read_policy) | + S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD)); + } + if (cmd_buffer->device->dfsm_allowed) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 8af0d831c3a..ac5f8e7036e 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -408,37 +408,6 @@ si_emit_graphics(struct radv_device *device, radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); - - /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */ - unsigned meta_write_policy, meta_read_policy; - - /* TODO: investigate whether LRU improves performance on other chips too */ - if (physical_device->rad_info.num_render_backends <= 4) { - meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ - meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ - } else { - meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */ - meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */ - } - - radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL, - S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) | - S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) | - S_02807C_HTILE_WR_POLICY(meta_write_policy) | - S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) | - S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) | - S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) | - S_02807C_HTILE_RD_POLICY(meta_read_policy)); - - radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, - S_028410_CMASK_WR_POLICY(meta_write_policy) | - S_028410_FMASK_WR_POLICY(meta_write_policy) | - S_028410_DCC_WR_POLICY(meta_write_policy) | - S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) | - S_028410_CMASK_RD_POLICY(meta_read_policy) | - S_028410_FMASK_RD_POLICY(meta_read_policy) | - S_028410_DCC_RD_POLICY(meta_read_policy) | - S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD)); radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0); radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);