From de887ba90ce077a0243269aa0c72a1ab0d2d3ff4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 1 Dec 2015 14:56:54 +0100 Subject: [PATCH] radeonsi: implement RB+ for Stoney (v2) v2: fix dual source blending Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 3 + src/gallium/drivers/radeon/r600_texture.c | 6 + src/gallium/drivers/radeonsi/si_state.c | 159 +++++++++++++++++- src/gallium/drivers/radeonsi/sid.h | 3 + 5 files changed, 170 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 8899ba4d55b..ba541acfd75 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = { { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, { "nodcc", DBG_NO_DCC, "Disable DCC." }, { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, + { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 8c6c0c37e50..dd23ed5be89 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -86,6 +86,7 @@ #define DBG_CHECK_VM (1llu << 42) #define DBG_NO_DCC (1llu << 43) #define DBG_NO_DCC_CLEAR (1llu << 44) +#define DBG_NO_RB_PLUS (1llu << 45) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -250,6 +251,8 @@ struct r600_surface { unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ unsigned cb_color_mask; /* R600 only */ + unsigned sx_ps_downconvert; /* Stoney only */ + unsigned sx_blend_opt_epsilon; /* Stoney only */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 6515a829b5a..de2d1cb53b3 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1389,6 +1389,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, return; for (i = 0; i < fb->nr_cbufs; i++) { + struct r600_surface *surf; struct r600_texture *tex; unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; @@ -1399,6 +1400,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (!(*buffers & clear_bit)) continue; + surf = (struct r600_surface *)fb->cbufs[i]; tex = (struct r600_texture *)fb->cbufs[i]->texture; /* 128-bit formats are unusupported */ @@ -1445,6 +1447,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (clear_words_needed) tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; } else { + /* RB+ doesn't work with CMASK fast clear. */ + if (surf->sx_ps_downconvert) + continue; + /* ensure CMASK is enabled */ r600_texture_alloc_cmask_separate(rctx->screen, tex); if (tex->cmask.size == 0) { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bbe15497b67..1cc03f75045 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact) return 0; } +static uint32_t si_translate_blend_opt_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return V_028760_OPT_COMB_ADD; + case PIPE_BLEND_SUBTRACT: + return V_028760_OPT_COMB_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028760_OPT_COMB_REVSUBTRACT; + case PIPE_BLEND_MIN: + return V_028760_OPT_COMB_MIN; + case PIPE_BLEND_MAX: + return V_028760_OPT_COMB_MAX; + default: + return V_028760_OPT_COMB_BLEND_DISABLED; + } +} + +static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ZERO: + return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; + case PIPE_BLENDFACTOR_ONE: + return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 + : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 + : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE + : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; + default: + return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; + } +} + static void *si_create_blend_state_mode(struct pipe_context *ctx, const struct pipe_blend_state *state, unsigned mode) { + struct si_context *sctx = (struct si_context*)ctx; struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); struct si_pm4_state *pm4 = &blend->pm4; @@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } else { color_control |= S_028808_MODE(V_028808_CB_DISABLE); } - si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); + if (sctx->b.family == CHIP_STONEY) { + uint32_t sx_blend_opt_control = 0; + + for (int i = 0; i < 8; i++) { + const int j = state->independent_blend_enable ? i : 0; + + /* TODO: We can also set this if the surface doesn't contain RGB. */ + if (!state->rt[j].blend_enable || + !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B))) + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i); + + /* TODO: We can also set this if the surface doesn't contain alpha. */ + if (!state->rt[j].blend_enable || + !(state->rt[j].colormask & PIPE_MASK_A)) + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i); + + if (!state->rt[j].blend_enable) { + si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED)); + continue; + } + + si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, + S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) | + S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) | + S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) | + S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) | + S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) | + S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func))); + } + + si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control); + + /* RB+ doesn't work with dual source blending */ + if (blend->dual_src_blend) + color_control |= S_028808_DISABLE_DUAL_QUAD(1); + } + + si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); return blend; } @@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; + if (sctx->b.family == CHIP_STONEY && + sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) + db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); + radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, db_shader_control); } @@ -1970,6 +2057,61 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->export_16bpc = true; } + if (sctx->b.family == CHIP_STONEY && + !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) { + switch (desc->channel[0].size) { + case 32: + if (desc->nr_channels == 1) { + if (swap == V_0280A0_SWAP_STD) + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; + else if (swap == V_0280A0_SWAP_ALT_REV) + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A; + } + break; + case 16: + /* For 1-channel formats, use the superset thereof. */ + if (desc->nr_channels <= 2) { + if (swap == V_0280A0_SWAP_STD || + swap == V_0280A0_SWAP_STD_REV) + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR; + else + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR; + } + break; + case 11: + if (desc->nr_channels == 3) { + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11; + surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT; + } + break; + case 10: + if (desc->nr_channels == 4) { + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10; + surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT; + } + break; + case 8: + /* For 1 and 2-channel formats, use the superset thereof. */ + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8; + surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT; + break; + case 5: + if (desc->nr_channels == 3) { + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5; + surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT; + } else if (desc->nr_channels == 4) { + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5; + surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT; + } + break; + case 4: + /* For 1 nad 2-channel formats, use the superset thereof. */ + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4; + surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT; + break; + } + } + surf->color_initialized = true; } @@ -2238,6 +2380,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom unsigned i, nr_cbufs = state->nr_cbufs; struct r600_texture *tex = NULL; struct r600_surface *cb = NULL; + uint32_t sx_ps_downconvert = 0; + uint32_t sx_blend_opt_epsilon = 0; /* Colorbuffers. */ for (i = 0; i < nr_cbufs; i++) { @@ -2288,18 +2432,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (sctx->b.chip_class >= VI) radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ + + sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i); + sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i); } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0] && sctx->framebuffer.dirty_cbufs & (1 << 0)) { radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, cb->cb_color_info | tex->cb_color_info); + sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i); + sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i); i++; } for (; i < 8 ; i++) if (sctx->framebuffer.dirty_cbufs & (1 << i)) radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); + if (sctx->b.family == CHIP_STONEY) { + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2); + radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ + radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ + } + /* ZS buffer. */ if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; @@ -3460,7 +3615,7 @@ static void si_init_config(struct si_context *sctx) } if (sctx->b.family == CHIP_STONEY) - si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0); + si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); if (sctx->b.chip_class >= CIK) diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index d2648e93c14..573ab78b482 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -6771,6 +6771,9 @@ #define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1) #define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF #define R_028808_CB_COLOR_CONTROL 0x028808 +#define S_028808_DISABLE_DUAL_QUAD(x) (((x) & 0x1) << 0) +#define G_028808_DISABLE_DUAL_QUAD(x) (((x) >> 0) & 0x1) +#define C_028808_DISABLE_DUAL_QUAD 0xFFFFFFFE #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -- 2.30.2