radeonsi: implement RB+ for Stoney (v2)
authorMarek Olšák <marek.olsak@amd.com>
Tue, 1 Dec 2015 13:56:54 +0000 (14:56 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 11 Dec 2015 14:25:12 +0000 (15:25 +0100)
v2: fix dual source blending

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/sid.h

index 8899ba4d55b8875488f91e62a8cd4b24327c6a22..ba541acfd757ad465467b40d330318b568e3951a 100644 (file)
@@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
        { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
        { "nodcc", DBG_NO_DCC, "Disable DCC." },
        { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
+       { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
 
        DEBUG_NAMED_VALUE_END /* must be last */
 };
index 8c6c0c37e50b03d6f384093983ec08890d2fec9e..dd23ed5be890ed56c7cf1f0dc4e000647617f4f6 100644 (file)
@@ -86,6 +86,7 @@
 #define DBG_CHECK_VM           (1llu << 42)
 #define DBG_NO_DCC             (1llu << 43)
 #define DBG_NO_DCC_CLEAR       (1llu << 44)
+#define DBG_NO_RB_PLUS         (1llu << 45)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
@@ -250,6 +251,8 @@ struct r600_surface {
        unsigned cb_color_fmask_slice;  /* EG and later */
        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
        unsigned cb_color_mask;         /* R600 only */
+       unsigned sx_ps_downconvert;     /* Stoney only */
+       unsigned sx_blend_opt_epsilon;  /* Stoney only */
        struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
        struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
 
index 6515a829b5aa3cf84595e7165700c3ae283cf5c3..de2d1cb53b3afeda647b512a6ea5355b099a720c 100644 (file)
@@ -1389,6 +1389,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                return;
 
        for (i = 0; i < fb->nr_cbufs; i++) {
+               struct r600_surface *surf;
                struct r600_texture *tex;
                unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
 
@@ -1399,6 +1400,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                if (!(*buffers & clear_bit))
                        continue;
 
+               surf = (struct r600_surface *)fb->cbufs[i];
                tex = (struct r600_texture *)fb->cbufs[i]->texture;
 
                /* 128-bit formats are unusupported */
@@ -1445,6 +1447,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                        if (clear_words_needed)
                                tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
                } else {
+                       /* RB+ doesn't work with CMASK fast clear. */
+                       if (surf->sx_ps_downconvert)
+                               continue;
+
                        /* ensure CMASK is enabled */
                        r600_texture_alloc_cmask_separate(rctx->screen, tex);
                        if (tex->cmask.size == 0) {
index bbe15497b67ac276351b78bd0b3c60d50ca907de..1cc03f75045e775d05d0abfce49096dae583a590 100644 (file)
@@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
        return 0;
 }
 
+static uint32_t si_translate_blend_opt_function(int blend_func)
+{
+       switch (blend_func) {
+       case PIPE_BLEND_ADD:
+               return V_028760_OPT_COMB_ADD;
+       case PIPE_BLEND_SUBTRACT:
+               return V_028760_OPT_COMB_SUBTRACT;
+       case PIPE_BLEND_REVERSE_SUBTRACT:
+               return V_028760_OPT_COMB_REVSUBTRACT;
+       case PIPE_BLEND_MIN:
+               return V_028760_OPT_COMB_MIN;
+       case PIPE_BLEND_MAX:
+               return V_028760_OPT_COMB_MAX;
+       default:
+               return V_028760_OPT_COMB_BLEND_DISABLED;
+       }
+}
+
+static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
+{
+       switch (blend_fact) {
+       case PIPE_BLENDFACTOR_ZERO:
+               return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+       case PIPE_BLENDFACTOR_ONE:
+               return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+       case PIPE_BLENDFACTOR_SRC_COLOR:
+               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+                               : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+       case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+                               : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+       case PIPE_BLENDFACTOR_SRC_ALPHA:
+               return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+       case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+               return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+       case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+                               : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+       default:
+               return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+       }
+}
+
 static void *si_create_blend_state_mode(struct pipe_context *ctx,
                                        const struct pipe_blend_state *state,
                                        unsigned mode)
 {
+       struct si_context *sctx = (struct si_context*)ctx;
        struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
        struct si_pm4_state *pm4 = &blend->pm4;
 
@@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
        } else {
                color_control |= S_028808_MODE(V_028808_CB_DISABLE);
        }
-       si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
 
+       if (sctx->b.family == CHIP_STONEY) {
+               uint32_t sx_blend_opt_control = 0;
+
+               for (int i = 0; i < 8; i++) {
+                       const int j = state->independent_blend_enable ? i : 0;
+
+                       /* TODO: We can also set this if the surface doesn't contain RGB. */
+                       if (!state->rt[j].blend_enable ||
+                           !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
+                               sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
+
+                       /* TODO: We can also set this if the surface doesn't contain alpha. */
+                       if (!state->rt[j].blend_enable ||
+                           !(state->rt[j].colormask & PIPE_MASK_A))
+                               sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
+
+                       if (!state->rt[j].blend_enable) {
+                               si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+                                              S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+                                              S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
+                               continue;
+                       }
+
+                       si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+                               S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
+                               S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
+                               S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
+                               S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
+                               S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
+                               S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
+               }
+
+               si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
+
+               /* RB+ doesn't work with dual source blending */
+               if (blend->dual_src_blend)
+                       color_control |= S_028808_DISABLE_DUAL_QUAD(1);
+       }
+
+       si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
        return blend;
 }
 
@@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
        if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
                db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
+       if (sctx->b.family == CHIP_STONEY &&
+           sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
+               db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
+
        radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
                               db_shader_control);
 }
@@ -1970,6 +2057,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
                surf->export_16bpc = true;
        }
 
+       if (sctx->b.family == CHIP_STONEY &&
+           !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
+               switch (desc->channel[0].size) {
+               case 32:
+                       if (desc->nr_channels == 1) {
+                               if (swap == V_0280A0_SWAP_STD)
+                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
+                               else if (swap == V_0280A0_SWAP_ALT_REV)
+                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
+                       }
+                       break;
+               case 16:
+                       /* For 1-channel formats, use the superset thereof. */
+                       if (desc->nr_channels <= 2) {
+                               if (swap == V_0280A0_SWAP_STD ||
+                                   swap == V_0280A0_SWAP_STD_REV)
+                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
+                               else
+                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
+                       }
+                       break;
+               case 11:
+                       if (desc->nr_channels == 3) {
+                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
+                               surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
+                       }
+                       break;
+               case 10:
+                       if (desc->nr_channels == 4) {
+                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
+                               surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
+                       }
+                       break;
+               case 8:
+                       /* For 1 and 2-channel formats, use the superset thereof. */
+                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
+                       surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
+                       break;
+               case 5:
+                       if (desc->nr_channels == 3) {
+                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
+                               surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
+                       } else if (desc->nr_channels == 4) {
+                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
+                               surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
+                       }
+                       break;
+               case 4:
+                       /* For 1 nad 2-channel formats, use the superset thereof. */
+                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
+                       surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
+                       break;
+               }
+       }
+
        surf->color_initialized = true;
 }
 
@@ -2238,6 +2380,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
        unsigned i, nr_cbufs = state->nr_cbufs;
        struct r600_texture *tex = NULL;
        struct r600_surface *cb = NULL;
+       uint32_t sx_ps_downconvert = 0;
+       uint32_t sx_blend_opt_epsilon = 0;
 
        /* Colorbuffers. */
        for (i = 0; i < nr_cbufs; i++) {
@@ -2288,18 +2432,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
                if (sctx->b.chip_class >= VI)
                        radeon_emit(cs, cb->cb_dcc_base);       /* R_028C94_CB_COLOR0_DCC_BASE */
+
+               sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+               sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
        }
        /* set CB_COLOR1_INFO for possible dual-src blending */
        if (i == 1 && state->cbufs[0] &&
            sctx->framebuffer.dirty_cbufs & (1 << 0)) {
                radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
                                       cb->cb_color_info | tex->cb_color_info);
+               sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+               sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
                i++;
        }
        for (; i < 8 ; i++)
                if (sctx->framebuffer.dirty_cbufs & (1 << i))
                        radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
+       if (sctx->b.family == CHIP_STONEY) {
+               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
+               radeon_emit(cs, sx_ps_downconvert);     /* R_028754_SX_PS_DOWNCONVERT */
+               radeon_emit(cs, sx_blend_opt_epsilon);  /* R_028758_SX_BLEND_OPT_EPSILON */
+       }
+
        /* ZS buffer. */
        if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
                struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
@@ -3460,7 +3615,7 @@ static void si_init_config(struct si_context *sctx)
        }
 
        if (sctx->b.family == CHIP_STONEY)
-               si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+               si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
        si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
        if (sctx->b.chip_class >= CIK)
index d2648e93c140d978499eb0d6cb9344c81e042f01..573ab78b4827d001487eb67cae8db0aa2ddcc9a2 100644 (file)
 #define   G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) >> 27) & 0x1)
 #define   C_028804_ENABLE_POSTZ_OVERRASTERIZATION                     0xF7FFFFFF
 #define R_028808_CB_COLOR_CONTROL                                       0x028808
+#define   S_028808_DISABLE_DUAL_QUAD(x)                               (((x) & 0x1) << 0)
+#define   G_028808_DISABLE_DUAL_QUAD(x)                               (((x) >> 0) & 0x1)
+#define   C_028808_DISABLE_DUAL_QUAD                                  0xFFFFFFFE
 #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
 #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
 #define   C_028808_DEGAMMA_ENABLE                                     0xFFFFFFF7