radeonsi: Ensure fmask_format is initialized in release builds.
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index bdd41b455344a70a1699d9e5cdf8f5e15ec7de96..6410e45a5cd4281d6b70a69b47df89fb0dc6ccf9 100644 (file)
 #include "util/u_helpers.h"
 #include "util/u_math.h"
 #include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_format_s3tc.h"
 #include "tgsi/tgsi_parse.h"
 #include "radeonsi_pipe.h"
 #include "radeonsi_shader.h"
 #include "si_state.h"
 #include "sid.h"
 
+static uint32_t cik_num_banks(uint32_t nbanks)
+{
+       switch (nbanks) {
+       case 2:
+               return V_02803C_ADDR_SURF_2_BANK;
+       case 4:
+               return V_02803C_ADDR_SURF_4_BANK;
+       case 8:
+       default:
+               return V_02803C_ADDR_SURF_8_BANK;
+       case 16:
+               return V_02803C_ADDR_SURF_16_BANK;
+       }
+}
+
+
+static unsigned cik_tile_split(unsigned tile_split)
+{
+       switch (tile_split) {
+       case 64:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
+               break;
+       case 128:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
+               break;
+       case 256:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
+               break;
+       case 512:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
+               break;
+       default:
+       case 1024:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
+               break;
+       case 2048:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
+               break;
+       case 4096:
+               tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
+               break;
+       }
+       return tile_split;
+}
+
+static unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
+{
+       switch (macro_tile_aspect) {
+       default:
+       case 1:
+               macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
+               break;
+       case 2:
+               macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
+               break;
+       case 4:
+               macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
+               break;
+       case 8:
+               macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
+               break;
+       }
+       return macro_tile_aspect;
+}
+
+static unsigned cik_bank_wh(unsigned bankwh)
+{
+       switch (bankwh) {
+       default:
+       case 1:
+               bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
+               break;
+       case 2:
+               bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
+               break;
+       case 4:
+               bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
+               break;
+       case 8:
+               bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
+               break;
+       }
+       return bankwh;
+}
+
+static unsigned cik_db_pipe_config(unsigned tile_pipes,
+                                  unsigned num_rbs)
+{
+       unsigned pipe_config;
+
+       switch (tile_pipes) {
+       case 8:
+               pipe_config = V_02803C_X_ADDR_SURF_P8_32X32_16X16;
+               break;
+       case 4:
+       default:
+               if (num_rbs == 4)
+                       pipe_config = V_02803C_X_ADDR_SURF_P4_16X16;
+               else
+                       pipe_config = V_02803C_X_ADDR_SURF_P4_8X16;
+               break;
+       case 2:
+                       pipe_config = V_02803C_ADDR_SURF_P2;
+               break;
+       }
+       return pipe_config;
+}
+
 /*
  * inferred framebuffer and blender state
  */
@@ -48,7 +158,7 @@ static void si_update_fb_blend_state(struct r600_context *rctx)
        if (blend == NULL)
                return;
 
-       pm4 = CALLOC_STRUCT(si_pm4_state);
+       pm4 = si_pm4_alloc_state(rctx);
        if (pm4 == NULL)
                return;
 
@@ -133,8 +243,9 @@ static uint32_t si_translate_blend_factor(int blend_fact)
        return 0;
 }
 
-static void *si_create_blend_state(struct pipe_context *ctx,
-                                  const struct pipe_blend_state *state)
+static void *si_create_blend_state_mode(struct pipe_context *ctx,
+                                       const struct pipe_blend_state *state,
+                                       unsigned mode)
 {
        struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
        struct si_pm4_state *pm4 = &blend->pm4;
@@ -144,7 +255,9 @@ static void *si_create_blend_state(struct pipe_context *ctx,
        if (blend == NULL)
                return NULL;
 
-       color_control = S_028808_MODE(V_028808_CB_NORMAL);
+       blend->alpha_to_one = state->alpha_to_one;
+
+       color_control = S_028808_MODE(mode);
        if (state->logicop_enable) {
                color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
        } else {
@@ -152,8 +265,12 @@ static void *si_create_blend_state(struct pipe_context *ctx,
        }
        si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
 
-       si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, ~0);
-       si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, ~0);
+       si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
+                      S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
+                      S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
+                      S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
+                      S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
+                      S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
        blend->cb_target_mask = 0;
        for (int i = 0; i < 8; i++) {
@@ -194,6 +311,12 @@ static void *si_create_blend_state(struct pipe_context *ctx,
        return blend;
 }
 
+static void *si_create_blend_state(struct pipe_context *ctx,
+                                  const struct pipe_blend_state *state)
+{
+       return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
+}
+
 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
@@ -211,7 +334,7 @@ static void si_set_blend_color(struct pipe_context *ctx,
                               const struct pipe_blend_color *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
 
         if (pm4 == NULL)
                 return;
@@ -232,7 +355,8 @@ static void si_set_clip_state(struct pipe_context *ctx,
                              const struct pipe_clip_state *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+       struct pipe_constant_buffer cb;
 
        if (pm4 == NULL)
                return;
@@ -248,14 +372,23 @@ static void si_set_clip_state(struct pipe_context *ctx,
                               fui(state->ucp[i][3]));
         }
 
+       cb.buffer = NULL;
+       cb.user_buffer = state->ucp;
+       cb.buffer_offset = 0;
+       cb.buffer_size = 4*4*8;
+       ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, &cb);
+       pipe_resource_reference(&cb.buffer, NULL);
+
        si_pm4_set_state(rctx, clip, pm4);
 }
 
-static void si_set_scissor_state(struct pipe_context *ctx,
-                                const struct pipe_scissor_state *state)
+static void si_set_scissor_states(struct pipe_context *ctx,
+                                  unsigned start_slot,
+                                  unsigned num_scissors,
+                                  const struct pipe_scissor_state *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
        uint32_t tl, br;
 
        if (pm4 == NULL)
@@ -275,8 +408,10 @@ static void si_set_scissor_state(struct pipe_context *ctx,
        si_pm4_set_state(rctx, scissor, pm4);
 }
 
-static void si_set_viewport_state(struct pipe_context *ctx,
-                                 const struct pipe_viewport_state *state)
+static void si_set_viewport_states(struct pipe_context *ctx,
+                                   unsigned start_slot,
+                                   unsigned num_viewports,
+                                   const struct pipe_viewport_state *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
@@ -335,7 +470,11 @@ static void si_update_fb_rs_state(struct r600_context *rctx)
                return;
        }
 
-       pm4 = CALLOC_STRUCT(si_pm4_state);
+       pm4 = si_pm4_alloc_state(rctx);
+
+       if (pm4 == NULL)
+               return;
+
        /* FIXME some of those reg can be computed with cso */
        offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
        si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
@@ -383,6 +522,8 @@ static void *si_create_rs_state(struct pipe_context *ctx,
        }
 
        rs->two_side = state->light_twoside;
+       rs->multisample_enable = state->multisample;
+       rs->clip_plane_enable = state->clip_plane_enable;
 
        polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
                                state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -411,9 +552,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
                S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
                S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
                S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
-       rs->pa_cl_vs_out_cntl =
-               S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-               S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
 
        clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
 
@@ -455,11 +593,12 @@ static void *si_create_rs_state(struct pipe_context *ctx,
        tmp = (unsigned)state->line_width * 8;
        si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
        si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
-                       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable));
+                      S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
+                      S_028A48_MSAA_ENABLE(state->multisample));
 
-       si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, 0x00000400);
        si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
-                       S_028BE4_PIX_CENTER(state->gl_rasterization_rules));
+                      S_028BE4_PIX_CENTER(state->half_pixel_center) |
+                      S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
        si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
        si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
        si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
@@ -483,8 +622,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
        rctx->sprite_coord_enable = rs->sprite_coord_enable;
        rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
        rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
-       rctx->pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
-       rctx->pa_cl_vs_out_cntl = rs->pa_cl_vs_out_cntl;
 
        si_pm4_bind_state(rctx, rasterizer, rs);
        si_update_fb_rs_state(rctx);
@@ -501,7 +638,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state)
  */
 static void si_update_dsa_stencil_ref(struct r600_context *rctx)
 {
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
        struct pipe_stencil_ref *ref = &rctx->stencil_ref;
         struct si_state_dsa *dsa = rctx->queued.named.dsa;
 
@@ -627,7 +764,6 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
        si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
        si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
        si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
-       si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
        dsa->db_render_override = db_render_override;
 
        return dsa;
@@ -652,7 +788,7 @@ static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
 }
 
 static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth,
-                                   bool copy_stencil)
+                                   bool copy_stencil, int sample)
 {
        struct pipe_depth_stencil_alpha_state dsa;
         struct si_state_dsa *state;
@@ -664,7 +800,8 @@ static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth,
                si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
                               S_028000_DEPTH_COPY(copy_depth) |
                               S_028000_STENCIL_COPY(copy_stencil) |
-                              S_028000_COPY_CENTROID(1));
+                              S_028000_COPY_CENTROID(1) |
+                              S_028000_COPY_SAMPLE(sample));
        } else {
                si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
                               S_028000_DEPTH_COMPRESS_DISABLE(1) |
@@ -1164,6 +1301,8 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
                                       const struct util_format_description *desc,
                                       int first_non_void)
 {
+       struct r600_screen *rscreen = (struct r600_screen*)screen;
+       bool enable_s3tc = rscreen->info.drm_minor >= 31;
        boolean uniform = TRUE;
        int i;
 
@@ -1205,7 +1344,51 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
                break;
        }
 
-       /* TODO compressed formats */
+       if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+               if (!enable_s3tc)
+                       goto out_unknown;
+
+               switch (format) {
+               case PIPE_FORMAT_RGTC1_SNORM:
+               case PIPE_FORMAT_LATC1_SNORM:
+               case PIPE_FORMAT_RGTC1_UNORM:
+               case PIPE_FORMAT_LATC1_UNORM:
+                       return V_008F14_IMG_DATA_FORMAT_BC4;
+               case PIPE_FORMAT_RGTC2_SNORM:
+               case PIPE_FORMAT_LATC2_SNORM:
+               case PIPE_FORMAT_RGTC2_UNORM:
+               case PIPE_FORMAT_LATC2_UNORM:
+                       return V_008F14_IMG_DATA_FORMAT_BC5;
+               default:
+                       goto out_unknown;
+               }
+       }
+
+       if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+
+               if (!enable_s3tc)
+                       goto out_unknown;
+
+               if (!util_format_s3tc_enabled) {
+                       goto out_unknown;
+               }
+
+               switch (format) {
+               case PIPE_FORMAT_DXT1_RGB:
+               case PIPE_FORMAT_DXT1_RGBA:
+               case PIPE_FORMAT_DXT1_SRGB:
+               case PIPE_FORMAT_DXT1_SRGBA:
+                       return V_008F14_IMG_DATA_FORMAT_BC1;
+               case PIPE_FORMAT_DXT3_RGBA:
+               case PIPE_FORMAT_DXT3_SRGBA:
+                       return V_008F14_IMG_DATA_FORMAT_BC2;
+               case PIPE_FORMAT_DXT5_RGBA:
+               case PIPE_FORMAT_DXT5_SRGBA:
+                       return V_008F14_IMG_DATA_FORMAT_BC3;
+               default:
+                       goto out_unknown;
+               }
+       }
 
        if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
                return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
@@ -1373,7 +1556,7 @@ static unsigned si_tex_compare(unsigned compare)
        }
 }
 
-static unsigned si_tex_dim(unsigned dim)
+static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
 {
        switch (dim) {
        default:
@@ -1383,9 +1566,11 @@ static unsigned si_tex_dim(unsigned dim)
                return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
        case PIPE_TEXTURE_2D:
        case PIPE_TEXTURE_RECT:
-               return V_008F1C_SQ_RSRC_IMG_2D;
+               return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
+                                       V_008F1C_SQ_RSRC_IMG_2D;
        case PIPE_TEXTURE_2D_ARRAY:
-               return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
+               return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
+                                       V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
        case PIPE_TEXTURE_3D:
                return V_008F1C_SQ_RSRC_IMG_3D;
        case PIPE_TEXTURE_CUBE:
@@ -1492,6 +1677,7 @@ boolean si_is_format_supported(struct pipe_screen *screen,
                                unsigned sample_count,
                                unsigned usage)
 {
+       struct r600_screen *rscreen = (struct r600_screen *)screen;
        unsigned retval = 0;
 
        if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -1502,9 +1688,19 @@ boolean si_is_format_supported(struct pipe_screen *screen,
        if (!util_format_is_supported(format, usage))
                return FALSE;
 
-       /* Multisample */
-       if (sample_count > 1)
-               return FALSE;
+       if (sample_count > 1) {
+               if (HAVE_LLVM < 0x0304 || rscreen->chip_class != SI)
+                       return FALSE;
+
+               switch (sample_count) {
+               case 2:
+               case 4:
+               case 8:
+                       break;
+               default:
+                       return FALSE;
+               }
+       }
 
        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
            si_is_sampler_format_supported(screen, format)) {
@@ -1541,67 +1737,16 @@ boolean si_is_format_supported(struct pipe_screen *screen,
        return retval == usage;
 }
 
-static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, unsigned level)
-{
-       if (util_format_is_depth_or_stencil(rtex->real_format)) {
-               if (rtex->surface.level[level].mode == RADEON_SURF_MODE_1D) {
-                       return 4;
-               } else if (rtex->surface.level[level].mode == RADEON_SURF_MODE_2D) {
-                       switch (rtex->real_format) {
-                       case PIPE_FORMAT_Z16_UNORM:
-                               return 5;
-                       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-                       case PIPE_FORMAT_X8Z24_UNORM:
-                       case PIPE_FORMAT_Z24X8_UNORM:
-                       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-                       case PIPE_FORMAT_Z32_FLOAT:
-                       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-                               return 6;
-                       default:
-                               return 7;
-                       }
-               }
-       }
+static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
+{
+       unsigned tile_mode_index = 0;
 
-       switch (rtex->surface.level[level].mode) {
-       default:
-               assert(!"Invalid surface mode");
-               /* Fall through */
-       case RADEON_SURF_MODE_LINEAR_ALIGNED:
-               return 8;
-       case RADEON_SURF_MODE_1D:
-               if (rtex->surface.flags & RADEON_SURF_SCANOUT)
-                       return 9;
-               else
-                       return 13;
-       case RADEON_SURF_MODE_2D:
-               if (rtex->surface.flags & RADEON_SURF_SCANOUT) {
-                       switch (util_format_get_blocksize(rtex->real_format)) {
-                       case 1:
-                               return 10;
-                       case 2:
-                               return 11;
-                       default:
-                               assert(!"Invalid block size");
-                               /* Fall through */
-                       case 4:
-                               return 12;
-                       }
-               } else {
-                       switch (util_format_get_blocksize(rtex->real_format)) {
-                       case 1:
-                               return 14;
-                       case 2:
-                               return 15;
-                       case 4:
-                               return 16;
-                       case 8:
-                               return 17;
-                       default:
-                               return 13;
-                       }
-               }
+       if (stencil) {
+               tile_mode_index = rtex->surface.stencil_tiling_index[level];
+       } else {
+               tile_mode_index = rtex->surface.tiling_index[level];
        }
+       return tile_mode_index;
 }
 
 /*
@@ -1611,7 +1756,7 @@ static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, unsigned
 static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
                  const struct pipe_framebuffer_state *state, int cb)
 {
-       struct r600_resource_texture *rtex;
+       struct r600_texture *rtex;
        struct r600_surface *surf;
        unsigned level = state->cbufs[cb]->u.tex.level;
        unsigned pitch, slice;
@@ -1625,7 +1770,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        unsigned max_comp_size;
 
        surf = (struct r600_surface *)state->cbufs[cb];
-       rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+       rtex = (struct r600_texture*)state->cbufs[cb]->texture;
 
        offset = rtex->surface.level[level].offset;
        if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
@@ -1638,7 +1783,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
                slice = slice - 1;
        }
 
-       tile_mode_index = si_tile_mode_index(rtex, level);
+       tile_mode_index = si_tile_mode_index(rtex, level, false);
 
        desc = util_format_description(surf->base.format);
        for (i = 0; i < 4; i++) {
@@ -1706,6 +1851,26 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
                S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
 
+       if (rtex->resource.b.b.nr_samples > 1) {
+               unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
+
+               color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
+                               S_028C74_NUM_FRAGMENTS(log_samples);
+
+               if (rtex->fmask.size) {
+                       color_info |= S_028C70_COMPRESSION(1);
+                       unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
+
+                       /* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */
+                       color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) |
+                                       S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+               }
+       }
+
+       if (rtex->cmask.size) {
+               color_info |= S_028C70_FAST_CLEAR(1);
+       }
+
        offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
        offset >>= 8;
 
@@ -1725,6 +1890,25 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
        si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
 
+       if (rtex->cmask.size) {
+               si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C,
+                              offset + (rtex->cmask.offset >> 8));
+               si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C,
+                              S_028C80_TILE_MAX(rtex->cmask.slice_tile_max));
+       }
+       if (rtex->fmask.size) {
+               si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C,
+                              offset + (rtex->fmask.offset >> 8));
+               si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C,
+                              S_028C88_TILE_MAX(rtex->fmask.slice_tile_max));
+       }
+
+       /* set CB_COLOR1_INFO for possible dual-src blending */
+       if (state->nr_cbufs == 1) {
+               assert(cb == 0);
+               si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, color_info);
+       }
+
        /* Determine pixel shader export format */
        max_comp_size = si_colorformat_max_comp_size(format);
        if (ntype == V_028C70_NUMBER_SRGB ||
@@ -1732,16 +1916,21 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
             max_comp_size <= 10) ||
            (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
                rctx->export_16bpc |= 1 << cb;
+               /* set SPI_SHADER_COL_FORMAT for possible dual-src blending */
+               if (state->nr_cbufs == 1)
+                       rctx->export_16bpc |= 1 << 1;
        }
 }
 
 static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
                  const struct pipe_framebuffer_state *state)
 {
-       struct r600_resource_texture *rtex;
+       struct r600_screen *rscreen = rctx->screen;
+       struct r600_texture *rtex;
        struct r600_surface *surf;
-       unsigned level, pitch, slice, format, tile_mode_index;
-       uint32_t z_info, s_info;
+       unsigned level, pitch, slice, format, tile_mode_index, array_mode;
+       unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
+       uint32_t z_info, s_info, db_depth_info;
        uint64_t z_offs, s_offs;
 
        if (state->zsbuf == NULL) {
@@ -1752,12 +1941,12 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
 
        surf = (struct r600_surface *)state->zsbuf;
        level = surf->base.u.tex.level;
-       rtex = (struct r600_resource_texture*)surf->base.texture;
+       rtex = (struct r600_texture*)surf->base.texture;
 
-       format = si_translate_dbformat(rtex->real_format);
+       format = si_translate_dbformat(rtex->resource.b.b.format);
 
        if (format == V_028040_Z_INVALID) {
-               R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->real_format);
+               R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
        }
        assert(format != V_028040_Z_INVALID);
 
@@ -1774,28 +1963,64 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
                slice = slice - 1;
        }
 
+       db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+
        z_info = S_028040_FORMAT(format);
+       if (rtex->resource.b.b.nr_samples > 1) {
+               z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
+       }
+
        if (rtex->surface.flags & RADEON_SURF_SBUFFER)
                s_info = S_028044_FORMAT(V_028044_STENCIL_8);
        else
                s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
 
-       tile_mode_index = si_tile_mode_index(rtex, level);
-       if (tile_mode_index < 4 || tile_mode_index > 7) {
-               R600_ERR("Invalid DB tiling mode %d!\n",
-                                rtex->surface.level[level].mode);
-               si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, S_028040_FORMAT(V_028040_Z_INVALID));
-               si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
-               return;
+       if (rctx->chip_class >= CIK) {
+               switch (rtex->surface.level[level].mode) {
+               case RADEON_SURF_MODE_2D:
+                       array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
+                       break;
+               case RADEON_SURF_MODE_1D:
+               case RADEON_SURF_MODE_LINEAR_ALIGNED:
+               case RADEON_SURF_MODE_LINEAR:
+               default:
+                       array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
+                       break;
+               }
+               tile_split = rtex->surface.tile_split;
+               stile_split = rtex->surface.stencil_tile_split;
+               macro_aspect = rtex->surface.mtilea;
+               bankw = rtex->surface.bankw;
+               bankh = rtex->surface.bankh;
+               tile_split = cik_tile_split(tile_split);
+               stile_split = cik_tile_split(stile_split);
+               macro_aspect = cik_macro_tile_aspect(macro_aspect);
+               bankw = cik_bank_wh(bankw);
+               bankh = cik_bank_wh(bankh);
+               nbanks = cik_num_banks(rscreen->tiling_info.num_banks);
+               pipe_config = cik_db_pipe_config(rscreen->info.r600_num_tile_pipes,
+                                                rscreen->info.r600_num_backends);
+
+               db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
+                       S_02803C_PIPE_CONFIG(pipe_config) |
+                       S_02803C_BANK_WIDTH(bankw) |
+                       S_02803C_BANK_HEIGHT(bankh) |
+                       S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
+                       S_02803C_NUM_BANKS(nbanks);
+               z_info |= S_028040_TILE_SPLIT(tile_split);
+               s_info |= S_028044_TILE_SPLIT(stile_split);
+       } else {
+               tile_mode_index = si_tile_mode_index(rtex, level, false);
+               z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+               tile_mode_index = si_tile_mode_index(rtex, level, true);
+               s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
        }
-       z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
-       s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
 
        si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW,
                       S_028008_SLICE_START(state->zsbuf->u.tex.first_layer) |
                       S_028008_SLICE_MAX(state->zsbuf->u.tex.last_layer));
 
-       si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, S_02803C_ADDR5_SWIZZLE_MASK(1));
+       si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, db_depth_info);
        si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, z_info);
        si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, s_info);
 
@@ -1809,18 +2034,211 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
        si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice));
 }
 
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
+       (((s0x) & 0xf) | (((s0y) & 0xf) << 4) |            \
+       (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) |     \
+       (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) |    \
+        (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
+
+/* 2xMSAA
+ * There are two locations (-4, 4), (4, -4). */
+static uint32_t sample_locs_2x[] = {
+       FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+       FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+       FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+       FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+};
+static unsigned max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
+static uint32_t sample_locs_4x[] = {
+       FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+       FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+       FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+       FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+};
+static unsigned max_dist_4x = 6;
+/* Cayman/SI 8xMSAA */
+static uint32_t cm_sample_locs_8x[] = {
+       FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+       FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+       FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+       FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+       FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+       FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+       FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+       FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+};
+static unsigned cm_max_dist_8x = 8;
+/* Cayman/SI 16xMSAA */
+static uint32_t cm_sample_locs_16x[] = {
+       FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+       FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+       FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+       FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+       FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+       FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+       FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+       FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+       FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+       FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+       FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+       FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+       FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+       FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+       FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+       FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+};
+static unsigned cm_max_dist_16x = 8;
+
+static void si_get_sample_position(struct pipe_context *ctx,
+                                  unsigned sample_count,
+                                  unsigned sample_index,
+                                  float *out_value)
+{
+       int offset, index;
+       struct {
+               int idx:4;
+       } val;
+       switch (sample_count) {
+       case 1:
+       default:
+               out_value[0] = out_value[1] = 0.5;
+               break;
+       case 2:
+               offset = 4 * (sample_index * 2);
+               val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+               out_value[0] = (float)(val.idx + 8) / 16.0f;
+               val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+               out_value[1] = (float)(val.idx + 8) / 16.0f;
+               break;
+       case 4:
+               offset = 4 * (sample_index * 2);
+               val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+               out_value[0] = (float)(val.idx + 8) / 16.0f;
+               val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+               out_value[1] = (float)(val.idx + 8) / 16.0f;
+               break;
+       case 8:
+               offset = 4 * (sample_index % 4 * 2);
+               index = (sample_index / 4) * 4;
+               val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+               out_value[0] = (float)(val.idx + 8) / 16.0f;
+               val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+               out_value[1] = (float)(val.idx + 8) / 16.0f;
+               break;
+       case 16:
+               offset = 4 * (sample_index % 4 * 2);
+               index = (sample_index / 4) * 4;
+               val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+               out_value[0] = (float)(val.idx + 8) / 16.0f;
+               val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+               out_value[1] = (float)(val.idx + 8) / 16.0f;
+               break;
+       }
+}
+
+static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples)
+{
+       unsigned max_dist = 0;
+
+       switch (nr_samples) {
+       default:
+               nr_samples = 0;
+               break;
+       case 2:
+               si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
+               si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
+               si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
+               si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
+               max_dist = max_dist_2x;
+               break;
+       case 4:
+               si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
+               si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
+               si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
+               si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
+               max_dist = max_dist_4x;
+               break;
+       case 8:
+               si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]);
+               si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]);
+               si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0);
+               si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0);
+               si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]);
+               si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]);
+               si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0);
+               si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0);
+               si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]);
+               si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]);
+               si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0);
+               si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0);
+               si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]);
+               si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]);
+               max_dist = cm_max_dist_8x;
+               break;
+       case 16:
+               si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]);
+               si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]);
+               si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]);
+               si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]);
+               si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]);
+               si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]);
+               si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]);
+               si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]);
+               si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]);
+               si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]);
+               si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]);
+               si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]);
+               si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]);
+               si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]);
+               si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]);
+               si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]);
+               max_dist = cm_max_dist_16x;
+               break;
+       }
+
+       if (nr_samples > 1) {
+               unsigned log_samples = util_logbase2(nr_samples);
+
+               si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL,
+                              S_028BDC_LAST_PIXEL(1) |
+                              S_028BDC_EXPAND_LINE_WIDTH(1));
+               si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG,
+                              S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+                              S_028BE0_MAX_SAMPLE_DIST(max_dist) |
+                              S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples));
+
+               si_pm4_set_reg(pm4, R_028804_DB_EQAA,
+                              S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+                              S_028804_PS_ITER_SAMPLES(log_samples) |
+                              S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+                              S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+                              S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+                              S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+       } else {
+               si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1));
+               si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0);
+
+               si_pm4_set_reg(pm4, R_028804_DB_EQAA,
+                              S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+                              S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+       }
+}
+
 static void si_set_framebuffer_state(struct pipe_context *ctx,
                                     const struct pipe_framebuffer_state *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-       uint32_t shader_mask, tl, br;
-       int tl_x, tl_y, br_x, br_y;
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+       uint32_t tl, br;
+       int tl_x, tl_y, br_x, br_y, nr_samples, i;
 
        if (pm4 == NULL)
                return;
 
        si_pm4_inval_fb_cache(pm4, state->nr_cbufs);
+       rctx->flush_and_inv_cb_meta = true;
 
        if (state->zsbuf)
                si_pm4_inval_zsbuf_cache(pm4);
@@ -1829,16 +2247,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
        /* build states */
        rctx->export_16bpc = 0;
-       for (int i = 0; i < state->nr_cbufs; i++) {
+       rctx->fb_compressed_cb_mask = 0;
+       for (i = 0; i < state->nr_cbufs; i++) {
+               struct r600_texture *rtex =
+                       (struct r600_texture*)state->cbufs[i]->texture;
+
                si_cb(rctx, pm4, state, i);
+
+               if (rtex->fmask.size || rtex->cmask.size) {
+                       rctx->fb_compressed_cb_mask |= 1 << i;
+               }
        }
+       for (; i < 8; i++) {
+               si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+                              S_028C70_FORMAT(V_028C70_COLOR_INVALID));
+       }
+
        assert(!(rctx->export_16bpc & ~0xff));
        si_db(rctx, pm4, state);
 
-       shader_mask = 0;
-       for (int i = 0; i < state->nr_cbufs; i++) {
-               shader_mask |= 0xf << (i * 4);
-       }
        tl_x = 0;
        tl_y = 0;
        br_x = state->width;
@@ -1857,8 +2284,18 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br);
        si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
        si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-       si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask);
-       si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
+
+       if (state->nr_cbufs)
+               nr_samples = state->cbufs[0]->texture->nr_samples;
+       else if (state->zsbuf)
+               nr_samples = state->zsbuf->texture->nr_samples;
+       else
+               nr_samples = 0;
+
+       si_set_msaa_state(rctx, pm4, nr_samples);
+       rctx->fb_log_samples = util_logbase2(nr_samples);
+       rctx->fb_cb0_is_integer = state->nr_cbufs &&
+                                 util_format_is_pure_integer(state->cbufs[0]->format);
 
        si_pm4_set_state(rctx, framebuffer, pm4);
        si_update_fb_rs_state(rctx);
@@ -1870,30 +2307,47 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
  */
 
 /* Compute the key for the hw shader variant */
-static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *ctx,
-                                                         struct si_pipe_shader_selector *sel)
+static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+                                         struct si_pipe_shader_selector *sel,
+                                         union si_shader_key *key)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_shader_key key;
-       memset(&key, 0, sizeof(key));
+       memset(key, 0, sizeof(*key));
 
-       if (sel->type == PIPE_SHADER_FRAGMENT) {
+       if (sel->type == PIPE_SHADER_VERTEX) {
+               unsigned i;
+               if (!rctx->vertex_elements)
+                       return;
+
+               for (i = 0; i < rctx->vertex_elements->count; ++i)
+                       key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor;
+
+               if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
+                       key->vs.ucps_enabled |= 0x2;
+               if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
+                       key->vs.ucps_enabled |= 0x1;
+       } else if (sel->type == PIPE_SHADER_FRAGMENT) {
                if (sel->fs_write_all)
-                       key.nr_cbufs = rctx->framebuffer.nr_cbufs;
-               key.export_16bpc = rctx->export_16bpc;
+                       key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+               key->ps.export_16bpc = rctx->export_16bpc;
+
                if (rctx->queued.named.rasterizer) {
-                       key.color_two_side = rctx->queued.named.rasterizer->two_side;
-                       key.flatshade = rctx->queued.named.rasterizer->flatshade;
+                       key->ps.color_two_side = rctx->queued.named.rasterizer->two_side;
+                       key->ps.flatshade = rctx->queued.named.rasterizer->flatshade;
+
+                       if (rctx->queued.named.blend) {
+                               key->ps.alpha_to_one = rctx->queued.named.blend->alpha_to_one &&
+                                                      rctx->queued.named.rasterizer->multisample_enable &&
+                                                      !rctx->fb_cb0_is_integer;
+                       }
                }
                if (rctx->queued.named.dsa) {
-                       key.alpha_func = rctx->queued.named.dsa->alpha_func;
-                       key.alpha_ref = rctx->queued.named.dsa->alpha_ref;
+                       key->ps.alpha_func = rctx->queued.named.dsa->alpha_func;
+                       key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref;
                } else {
-                       key.alpha_func = PIPE_FUNC_ALWAYS;
+                       key->ps.alpha_func = PIPE_FUNC_ALWAYS;
                }
        }
-
-       return key;
 }
 
 /* Select the hw shader variant depending on the current state.
@@ -1902,11 +2356,11 @@ int si_shader_select(struct pipe_context *ctx,
                     struct si_pipe_shader_selector *sel,
                     unsigned *dirty)
 {
-       struct si_shader_key key;
+       union si_shader_key key;
        struct si_pipe_shader * shader = NULL;
        int r;
 
-       key = si_shader_selector_key(ctx, sel);
+       si_shader_selector_key(ctx, sel, &key);
 
        /* Check if we don't need to change anything.
         * This path is also used for most shaders that don't need multiple
@@ -1934,8 +2388,9 @@ int si_shader_select(struct pipe_context *ctx,
        if (unlikely(!shader)) {
                shader = CALLOC(1, sizeof(struct si_pipe_shader));
                shader->selector = sel;
+               shader->key = key;
 
-               r = si_pipe_shader_create(ctx, shader, key);
+               r = si_pipe_shader_create(ctx, shader);
                if (unlikely(r)) {
                        R600_ERR("Failed to build shader variant (type=%u) %d\n",
                                 sel->type, r);
@@ -1951,10 +2406,9 @@ int si_shader_select(struct pipe_context *ctx,
                    sel->num_shaders == 0 &&
                    shader->shader.fs_write_all) {
                        sel->fs_write_all = 1;
-                       key = si_shader_selector_key(ctx, sel);
+                       si_shader_selector_key(ctx, sel, &shader->key);
                }
 
-               shader->key = key;
                sel->num_shaders++;
        }
 
@@ -2082,7 +2536,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
                                                        const struct pipe_sampler_view *state)
 {
        struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
-       struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
+       struct r600_texture *tmp = (struct r600_texture*)texture;
        const struct util_format_description *desc;
        unsigned format, num_format;
        uint32_t pitch = 0;
@@ -2160,7 +2614,27 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
                break;
        default:
                if (first_non_void < 0) {
-                       num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
+                       if (util_format_is_compressed(pipe_format)) {
+                               switch (pipe_format) {
+                               case PIPE_FORMAT_DXT1_SRGB:
+                               case PIPE_FORMAT_DXT1_SRGBA:
+                               case PIPE_FORMAT_DXT3_SRGBA:
+                               case PIPE_FORMAT_DXT5_SRGBA:
+                                       num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
+                                       break;
+                               case PIPE_FORMAT_RGTC1_SNORM:
+                               case PIPE_FORMAT_LATC1_SNORM:
+                               case PIPE_FORMAT_RGTC2_SNORM:
+                               case PIPE_FORMAT_LATC2_SNORM:
+                                       num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
+                                       break;
+                               default:
+                                       num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
+                                       break;
+                               }
+                       } else {
+                               num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
+                       }
                } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
                        num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
                } else {
@@ -2223,17 +2697,59 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
                          S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
                          S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
                          S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
-                         S_008F1C_BASE_LEVEL(state->u.tex.first_level) |
-                         S_008F1C_LAST_LEVEL(state->u.tex.last_level) |
-                         S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0)) |
+                         S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
+                                                     0 : state->u.tex.first_level) |
+                         S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
+                                                     util_logbase2(texture->nr_samples) :
+                                                     state->u.tex.last_level) |
+                         S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
                          S_008F1C_POW2_PAD(texture->last_level > 0) |
-                         S_008F1C_TYPE(si_tex_dim(texture->target)));
+                         S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
        view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
        view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
                          S_008F24_LAST_ARRAY(state->u.tex.last_layer));
        view->state[6] = 0;
        view->state[7] = 0;
 
+       /* Initialize the sampler view for FMASK. */
+       if (tmp->fmask.size) {
+               uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset;
+               uint32_t fmask_format;
+
+               switch (texture->nr_samples) {
+               case 2:
+                       fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
+                       break;
+               case 4:
+                       fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
+                       break;
+               case 8:
+                       fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
+                       break;
+               default:
+                       assert(0);
+                       fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+               }
+
+               view->fmask_state[0] = va >> 8;
+               view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
+                                      S_008F14_DATA_FORMAT(fmask_format) |
+                                      S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
+               view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
+                                      S_008F18_HEIGHT(height - 1);
+               view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
+                                      S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
+                                      S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
+                                      S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
+                                      S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
+                                      S_008F1C_TYPE(si_tex_dim(texture->target, 0));
+               view->fmask_state[4] = S_008F20_PITCH(tmp->fmask.pitch - 1);
+               view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
+                                      S_008F24_LAST_ARRAY(state->u.tex.last_layer);
+               view->fmask_state[6] = 0;
+               view->fmask_state[7] = 0;
+       }
+
        return &view->base;
 }
 
@@ -2246,11 +2762,31 @@ static void si_sampler_view_destroy(struct pipe_context *ctx,
        FREE(resource);
 }
 
+static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
+{
+       return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
+              wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
+              (linear_filter &&
+               (wrap == PIPE_TEX_WRAP_CLAMP ||
+                wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
+}
+
+static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
+{
+       bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+                            state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
+
+       return (state->border_color.ui[0] || state->border_color.ui[1] ||
+               state->border_color.ui[2] || state->border_color.ui[3]) &&
+              (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
+               wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
+               wrap_mode_uses_border_color(state->wrap_r, linear_filter));
+}
+
 static void *si_create_sampler_state(struct pipe_context *ctx,
                                     const struct pipe_sampler_state *state)
 {
        struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
-       union util_color uc;
        unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
        unsigned border_color_type;
 
@@ -2258,20 +2794,10 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                return NULL;
        }
 
-       util_pack_color(state->border_color.f, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
-       switch (uc.ui) {
-       case 0x000000FF:
-               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
-               break;
-       case 0x00000000:
-               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
-               break;
-       case 0xFFFFFFFF:
-               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
-               break;
-       default: /* Use border color pointer */
+       if (sampler_state_needs_border_color(state))
                border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
-       }
+       else
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
 
        rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
                          S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
@@ -2290,95 +2816,98 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
        rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
 
        if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-               memcpy(rstate->border_color, state->border_color.f,
+               memcpy(rstate->border_color, state->border_color.ui,
                       sizeof(rstate->border_color));
        }
 
        return rstate;
 }
 
-static struct si_pm4_state *si_set_sampler_view(struct r600_context *rctx,
-                                               unsigned count,
-                                               struct pipe_sampler_view **views,
-                                               struct r600_textures_info *samplers,
-                                               unsigned user_data_reg)
+/* XXX consider moving this function to si_descriptors.c for gcc to inline
+ *     the si_set_sampler_view calls. LTO might help too. */
+static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx,
+                                                unsigned shader, unsigned count,
+                                                struct pipe_sampler_view **views)
 {
-       struct si_pipe_sampler_view **resource = (struct si_pipe_sampler_view **)views;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-       int i, j;
-
-       if (!count)
-               goto out;
+       struct r600_textures_info *samplers = &rctx->samplers[shader];
+       struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+       int i;
 
        si_pm4_inval_texture_cache(pm4);
 
-       si_pm4_sh_data_begin(pm4);
        for (i = 0; i < count; i++) {
-               pipe_sampler_view_reference(
-                       (struct pipe_sampler_view **)&samplers->views[i],
-                       views[i]);
-
                if (views[i]) {
-                       struct r600_resource_texture *rtex =
-                               (struct r600_resource_texture*)views[i]->texture;
+                       struct r600_texture *rtex =
+                               (struct r600_texture*)views[i]->texture;
 
                        if (rtex->is_depth && !rtex->is_flushing_texture) {
                                samplers->depth_texture_mask |= 1 << i;
                        } else {
                                samplers->depth_texture_mask &= ~(1 << i);
                        }
+                       if (rtex->cmask.size || rtex->fmask.size) {
+                               samplers->compressed_colortex_mask |= 1 << i;
+                       } else {
+                               samplers->compressed_colortex_mask &= ~(1 << i);
+                       }
 
-                       si_pm4_add_bo(pm4, resource[i]->resource, RADEON_USAGE_READ);
+                       si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state);
+
+                       if (rtex->fmask.size) {
+                               si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
+                                                   views[i], rviews[i]->fmask_state);
+                       } else {
+                               si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
+                                                   NULL, NULL);
+                       }
                } else {
                        samplers->depth_texture_mask &= ~(1 << i);
-               }
-
-               for (j = 0; j < Elements(resource[i]->state); ++j) {
-                       si_pm4_sh_data_add(pm4, resource[i] ? resource[i]->state[j] : 0);
+                       samplers->compressed_colortex_mask &= ~(1 << i);
+                       si_set_sampler_view(rctx, shader, i, NULL, NULL);
+                       si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
+                                           NULL, NULL);
                }
        }
-
-       for (i = count; i < NUM_TEX_UNITS; i++) {
-               if (samplers->views[i])
-                       pipe_sampler_view_reference((struct pipe_sampler_view **)&samplers->views[i], NULL);
+       for (; i < samplers->n_views; i++) {
+               samplers->depth_texture_mask &= ~(1 << i);
+               samplers->compressed_colortex_mask &= ~(1 << i);
+               si_set_sampler_view(rctx, shader, i, NULL, NULL);
+               si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
+                                   NULL, NULL);
        }
 
-       si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_RESOURCE);
-
-out:
-       rctx->ps_samplers.n_views = count;
+       samplers->n_views = count;
        return pm4;
 }
 
-static void si_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
-                                  struct pipe_sampler_view **views)
+static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,
+                                   struct pipe_sampler_view **views)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct si_pm4_state *pm4;
 
-       pm4 = si_set_sampler_view(rctx, count, views, &rctx->vs_samplers,
-                           R_00B130_SPI_SHADER_USER_DATA_VS_0);
+       pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views);
        si_pm4_set_state(rctx, vs_sampler_views, pm4);
 }
 
-static void si_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
-                                  struct pipe_sampler_view **views)
+static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count,
+                                   struct pipe_sampler_view **views)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct si_pm4_state *pm4;
 
-       pm4 = si_set_sampler_view(rctx, count, views, &rctx->ps_samplers,
-                                 R_00B030_SPI_SHADER_USER_DATA_PS_0);
+       pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views);
        si_pm4_set_state(rctx, ps_sampler_views, pm4);
 }
 
-static struct si_pm4_state *si_bind_sampler(struct r600_context *rctx, unsigned count,
-                                           void **states,
-                                           struct r600_textures_info *samplers,
-                                           unsigned user_data_reg)
+static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, unsigned count,
+                                                  void **states,
+                                                  struct r600_textures_info *samplers,
+                                                  unsigned user_data_reg)
 {
        struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
        uint32_t *border_color_table = NULL;
        int i, j;
 
@@ -2413,11 +2942,8 @@ static struct si_pm4_state *si_bind_sampler(struct r600_context *rctx, unsigned
                        }
 
                        for (j = 0; j < 4; j++) {
-                               union fi border_color;
-
-                               border_color.f = rstates[i]->border_color[j];
                                border_color_table[4 * rctx->border_color_offset + j] =
-                                       util_le32_to_cpu(border_color.i);
+                                       util_le32_to_cpu(rstates[i]->border_color[j]);
                        }
 
                        rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
@@ -2436,6 +2962,8 @@ static struct si_pm4_state *si_bind_sampler(struct r600_context *rctx, unsigned
                                         (void*)rctx->border_color_table);
 
                si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
+               if (rctx->chip_class >= CIK)
+                       si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
                rctx->ws->buffer_unmap(rctx->border_color_table->cs_buf);
                si_pm4_add_bo(pm4, rctx->border_color_table, RADEON_USAGE_READ);
        }
@@ -2447,28 +2975,39 @@ out:
        return pm4;
 }
 
-static void si_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states)
+static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct si_pm4_state *pm4;
 
-       pm4 = si_bind_sampler(rctx, count, states, &rctx->vs_samplers,
+       pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX],
                              R_00B130_SPI_SHADER_USER_DATA_VS_0);
        si_pm4_set_state(rctx, vs_sampler, pm4);
 }
 
-static void si_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
+static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct si_pm4_state *pm4;
 
-       pm4 = si_bind_sampler(rctx, count, states, &rctx->ps_samplers,
+       pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT],
                              R_00B030_SPI_SHADER_USER_DATA_PS_0);
        si_pm4_set_state(rctx, ps_sampler, pm4);
 }
 
-static void si_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+       uint16_t mask = sample_mask;
+
+        if (pm4 == NULL)
+                return;
+
+       si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
+       si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
+
+       si_pm4_set_state(rctx, sample_mask, pm4);
 }
 
 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
@@ -2480,64 +3019,40 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
  * Constants
  */
 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
-                                  struct pipe_constant_buffer *cb)
+                                  struct pipe_constant_buffer *input)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_resource *rbuffer = cb ? si_resource(cb->buffer) : NULL;
-       struct si_pm4_state *pm4;
-       uint32_t offset;
-       uint64_t va;
+       struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+       struct pipe_constant_buffer *cb;
+       const uint8_t *ptr;
 
        /* Note that the state tracker can unbind constant buffers by
         * passing NULL here.
         */
-       if (cb == NULL || (!cb->buffer && !cb->user_buffer))
+       if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
+               state->enabled_mask &= ~(1 << index);
+               state->dirty_mask &= ~(1 << index);
+               pipe_resource_reference(&state->cb[index].buffer, NULL);
                return;
+       }
 
-       pm4 = CALLOC_STRUCT(si_pm4_state);
-       si_pm4_inval_shader_cache(pm4);
-
-       if (cb->user_buffer)
-               r600_upload_const_buffer(rctx, &rbuffer, cb->user_buffer, cb->buffer_size, &offset);
-       else
-               offset = 0;
-       va = r600_resource_va(ctx->screen, (void*)rbuffer);
-       va += offset;
-
-       si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ);
-
-       si_pm4_sh_data_begin(pm4);
-
-       /* Fill in a T# buffer resource description */
-       si_pm4_sh_data_add(pm4, va);
-       si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
-                                S_008F04_STRIDE(0)));
-       si_pm4_sh_data_add(pm4, cb->buffer_size);
-       si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                               S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32));
-
-       switch (shader) {
-       case PIPE_SHADER_VERTEX:
-               si_pm4_sh_data_end(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0, SI_SGPR_CONST);
-               si_pm4_set_state(rctx, vs_const, pm4);
-               break;
+       cb = &state->cb[index];
+       cb->buffer_size = input->buffer_size;
 
-       case PIPE_SHADER_FRAGMENT:
-               si_pm4_sh_data_end(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0, SI_SGPR_CONST);
-               si_pm4_set_state(rctx, ps_const, pm4);
-               break;
+       ptr = input->user_buffer;
 
-       default:
-               R600_ERR("unsupported %d\n", shader);
-               FREE(pm4);
+       if (ptr) {
+               r600_upload_const_buffer(rctx,
+                               (struct si_resource**)&cb->buffer, ptr,
+                               cb->buffer_size, &cb->buffer_offset);
+       } else {
+               /* Setup the hw buffer. */
+               cb->buffer_offset = input->buffer_offset;
+               pipe_resource_reference(&cb->buffer, input->buffer);
        }
 
-       if (cb->buffer != &rbuffer->b.b)
-               si_resource_reference(&rbuffer, NULL);
+       state->enabled_mask |= 1 << index;
+       state->dirty_mask |= 1 << index;
 }
 
 /*
@@ -2652,15 +3167,30 @@ static void si_set_polygon_stipple(struct pipe_context *ctx,
 static void si_texture_barrier(struct pipe_context *ctx)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+
+       if (pm4 == NULL)
+               return;
 
        si_pm4_inval_texture_cache(pm4);
        si_pm4_inval_fb_cache(pm4, rctx->framebuffer.nr_cbufs);
        si_pm4_set_state(rctx, texture_barrier, pm4);
 }
 
+static void *si_create_blend_custom(struct r600_context *rctx, unsigned mode)
+{
+       struct pipe_blend_state blend;
+
+       memset(&blend, 0, sizeof(blend));
+       blend.independent_blend_enable = true;
+       blend.rt[0].colormask = 0xf;
+       return si_create_blend_state_mode(&rctx->context, &blend, mode);
+}
+
 void si_init_state_functions(struct r600_context *rctx)
 {
+       int i;
+
        rctx->context.create_blend_state = si_create_blend_state;
        rctx->context.bind_blend_state = si_bind_blend_state;
        rctx->context.delete_blend_state = si_delete_blend_state;
@@ -2673,17 +3203,23 @@ void si_init_state_functions(struct r600_context *rctx)
        rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state;
        rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state;
        rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state;
-       rctx->custom_dsa_flush_depth_stencil = si_create_db_flush_dsa(rctx, true, true);
-       rctx->custom_dsa_flush_depth = si_create_db_flush_dsa(rctx, true, false);
-       rctx->custom_dsa_flush_stencil = si_create_db_flush_dsa(rctx, false, true);
-       rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false);
+
+       for (i = 0; i < 8; i++) {
+               rctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(rctx, true, true, i);
+               rctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(rctx, true, false, i);
+               rctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(rctx, false, true, i);
+       }
+       rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false, 0);
+       rctx->custom_blend_resolve = si_create_blend_custom(rctx, V_028808_CB_RESOLVE);
+       rctx->custom_blend_decompress = si_create_blend_custom(rctx, V_028808_CB_FMASK_DECOMPRESS);
 
        rctx->context.set_clip_state = si_set_clip_state;
-       rctx->context.set_scissor_state = si_set_scissor_state;
-       rctx->context.set_viewport_state = si_set_viewport_state;
+       rctx->context.set_scissor_states = si_set_scissor_states;
+       rctx->context.set_viewport_states = si_set_viewport_states;
        rctx->context.set_stencil_ref = si_set_pipe_stencil_ref;
 
        rctx->context.set_framebuffer_state = si_set_framebuffer_state;
+       rctx->context.get_sample_position = si_get_sample_position;
 
        rctx->context.create_vs_state = si_create_vs_state;
        rctx->context.create_fs_state = si_create_fs_state;
@@ -2693,13 +3229,13 @@ void si_init_state_functions(struct r600_context *rctx)
        rctx->context.delete_fs_state = si_delete_ps_shader;
 
        rctx->context.create_sampler_state = si_create_sampler_state;
-       rctx->context.bind_vertex_sampler_states = si_bind_vs_sampler;
-       rctx->context.bind_fragment_sampler_states = si_bind_ps_sampler;
+       rctx->context.bind_vertex_sampler_states = si_bind_vs_sampler_states;
+       rctx->context.bind_fragment_sampler_states = si_bind_ps_sampler_states;
        rctx->context.delete_sampler_state = si_delete_sampler_state;
 
        rctx->context.create_sampler_view = si_create_sampler_view;
-       rctx->context.set_vertex_sampler_views = si_set_vs_sampler_view;
-       rctx->context.set_fragment_sampler_views = si_set_ps_sampler_view;
+       rctx->context.set_vertex_sampler_views = si_set_vs_sampler_views;
+       rctx->context.set_fragment_sampler_views = si_set_ps_sampler_views;
        rctx->context.sampler_view_destroy = si_sampler_view_destroy;
 
        rctx->context.set_sample_mask = si_set_sample_mask;
@@ -2724,7 +3260,10 @@ void si_init_state_functions(struct r600_context *rctx)
 
 void si_init_config(struct r600_context *rctx)
 {
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+
+       if (pm4 == NULL)
+               return;
 
        si_cmd_context_control(pm4);
 
@@ -2753,31 +3292,50 @@ void si_init_config(struct r600_context *rctx)
                       S_028AA8_PRIMGROUP_SIZE(63));
        si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
        si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
-       si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
-                      S_008A14_CLIP_VTX_REORDER_ENA(1));
+       if (rctx->chip_class < CIK)
+               si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
+                              S_008A14_CLIP_VTX_REORDER_ENA(1));
 
        si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, 0);
        si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
        si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
 
-       si_pm4_set_reg(pm4, R_028804_DB_EQAA, 0x110000);
-
        si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
-       switch (rctx->screen->family) {
-       case CHIP_TAHITI:
-       case CHIP_PITCAIRN:
-               si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
-               break;
-       case CHIP_VERDE:
-               si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
-               break;
-       case CHIP_OLAND:
-               si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
-               break;
-       default:
-               si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
-               break;
+       if (rctx->chip_class >= CIK) {
+               switch (rctx->screen->family) {
+               case CHIP_BONAIRE:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
+                       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+                       break;
+               case CHIP_KAVERI:
+                       /* XXX todo */
+               case CHIP_KABINI:
+                       /* XXX todo */
+               default:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+                       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+                       break;
+               }
+       } else {
+               switch (rctx->screen->family) {
+               case CHIP_TAHITI:
+               case CHIP_PITCAIRN:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
+                       break;
+               case CHIP_VERDE:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
+                       break;
+               case CHIP_OLAND:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
+                       break;
+               case CHIP_HAINAN:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+                       break;
+               default:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+                       break;
+               }
        }
 
        si_pm4_set_state(rctx, init, pm4);