RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT,
+ RADEON_FLAG_32BIT |
+ RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_UPLOAD_BUFFER);
if (!bo) {
static void
radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline, enum ring_type ring)
+ struct radv_pipeline *pipeline)
{
struct radv_device *device = cmd_buffer->device;
+ enum ring_type ring;
uint32_t data[2];
uint64_t va;
va = radv_buffer_get_va(device->trace_bo);
+ ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+
switch (ring) {
case RING_GFX:
va += 8;
pipeline->gs_copy_shader->bo);
if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
+ radv_save_pipeline(cmd_buffer, pipeline);
cmd_buffer->state.emitted_pipeline = pipeline;
radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* Enable HTILE caching in L2 for small chips. */
+ unsigned meta_write_policy, meta_read_policy;
+ /* TODO: investigate whether LRU improves performance on other chips too */
+ if (cmd_buffer->device->physical_device->rad_info.num_render_backends <= 4) {
+ meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
+ meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
+ } else {
+ meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */
+ meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */
+ }
+
+ bool zs_big_page = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3 &&
+ (image->alignment % (64 * 1024) == 0);
+
radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 6);
radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
+ radeon_emit(cmd_buffer->cs,
+ S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+ S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+ S_02807C_HTILE_WR_POLICY(meta_write_policy) |
+ S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+ S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) |
+ S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) |
+ S_02807C_HTILE_RD_POLICY(meta_read_policy) |
+ S_02807C_Z_BIG_PAGE(zs_big_page) |
+ S_02807C_S_BIG_PAGE(zs_big_page));
} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
int i;
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ bool color_big_page = true;
/* this may happen for inherited secondary recording */
if (!framebuffer)
radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout, in_render_loop);
radv_load_color_clear_metadata(cmd_buffer, iview, i);
+
+ /* BIG_PAGE is an optimization that can only be enabled if all
+ * color targets are compatible.
+ */
+ color_big_page &= cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3 &&
+ (iview->image->alignment % (64 * 1024) == 0);
}
if (subpass->depth_stencil_attachment) {
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
}
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* Enable CMASK/FMASK/DCC caching in L2 for small chips. */
+ unsigned meta_write_policy, meta_read_policy;
+ /* TODO: investigate whether LRU improves performance on other chips too */
+ if (cmd_buffer->device->physical_device->rad_info.num_render_backends <= 4) {
+ meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
+ meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
+ } else {
+ meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */
+ meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */
+ }
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
+ S_028410_CMASK_WR_POLICY(meta_write_policy) |
+ S_028410_FMASK_WR_POLICY(meta_write_policy) |
+ S_028410_DCC_WR_POLICY(meta_write_policy) |
+ S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+ S_028410_CMASK_RD_POLICY(meta_read_policy) |
+ S_028410_FMASK_RD_POLICY(meta_read_policy) |
+ S_028410_DCC_RD_POLICY(meta_read_policy) |
+ S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD) |
+ S_028410_FMASK_BIG_PAGE(color_big_page) |
+ S_028410_COLOR_BIG_PAGE(color_big_page));
+ }
+
if (cmd_buffer->device->dfsm_allowed) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
state->attachments[i].current_layout = att->initial_layout;
+ state->attachments[i].current_in_render_loop = false;
state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
state->attachments[i].sample_location.count = 0;
pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+ radv_save_pipeline(cmd_buffer, pipeline);
}
static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,