r600g: fix eg/cayman scissor workaround
[mesa.git] / src / gallium / drivers / r600 / r600_state.c
index 01c59072a266a1001e6f01dc5a89990071cd025e..21e6abcb1f6e34923d8dfbc2027d6803ccd0a8c7 100644 (file)
@@ -37,6 +37,7 @@
 #include <util/u_memory.h>
 #include <util/u_inlines.h>
 #include <util/u_framebuffer.h>
+#include "util/u_transfer.h"
 #include <pipebuffer/pb_buffer.h>
 #include "r600.h"
 #include "r600d.h"
@@ -116,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx,
 static void *r600_create_blend_state(struct pipe_context *ctx,
                                        const struct pipe_blend_state *state)
 {
+       struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
        struct r600_pipe_state *rstate;
-       u32 color_control, target_mask;
+       u32 color_control = 0, target_mask;
 
        if (blend == NULL) {
                return NULL;
@@ -128,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
        rstate->id = R600_PIPE_STATE_BLEND;
 
        target_mask = 0;
-       color_control = S_028808_PER_MRT_BLEND(1);
+
+       /* R600 does not support per-MRT blends */
+       if (rctx->family > CHIP_R600)
+               color_control |= S_028808_PER_MRT_BLEND(1);
        if (state->logicop_enable) {
                color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
        } else {
@@ -151,20 +156,24 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
                }
        }
        blend->cb_target_mask = target_mask;
+       /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */
        r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-                               color_control, 0xFFFFFFFF, NULL);
+                               color_control, 0xFFFFFFFD, NULL);
 
        for (int i = 0; i < 8; i++) {
-               unsigned eqRGB = state->rt[i].rgb_func;
-               unsigned srcRGB = state->rt[i].rgb_src_factor;
-               unsigned dstRGB = state->rt[i].rgb_dst_factor;
+               /* state->rt entries > 0 only written if independent blending */
+               const int j = state->independent_blend_enable ? i : 0;
+
+               unsigned eqRGB = state->rt[j].rgb_func;
+               unsigned srcRGB = state->rt[j].rgb_src_factor;
+               unsigned dstRGB = state->rt[j].rgb_dst_factor;
 
-               unsigned eqA = state->rt[i].alpha_func;
-               unsigned srcA = state->rt[i].alpha_src_factor;
-               unsigned dstA = state->rt[i].alpha_dst_factor;
+               unsigned eqA = state->rt[j].alpha_func;
+               unsigned srcA = state->rt[j].alpha_src_factor;
+               unsigned dstA = state->rt[j].alpha_dst_factor;
                uint32_t bc = 0;
 
-               if (!state->rt[i].blend_enable)
+               if (!state->rt[j].blend_enable)
                        continue;
 
                bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
@@ -178,10 +187,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
                        bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
                }
 
-               r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
-               if (i == 0) {
+               /* R600 does not support per-MRT blends */
+               if (rctx->family > CHIP_R600)
+                       r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+               if (i == 0)
                        r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
-               }
        }
        return rstate;
 }
@@ -189,20 +199,19 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
 static void *r600_create_dsa_state(struct pipe_context *ctx,
                                   const struct pipe_depth_stencil_alpha_state *state)
 {
-       struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+       struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
        unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
        unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
+       struct r600_pipe_state *rstate;
 
-       if (rstate == NULL) {
+       if (dsa == NULL) {
                return NULL;
        }
 
+       rstate = &dsa->rstate;
+
        rstate->id = R600_PIPE_STATE_DSA;
        /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
-       /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
-        * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
-        * be set if shader use texkill instruction
-        */
        db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
        stencil_ref_mask = 0;
        stencil_ref_mask_bf = 0;
@@ -240,6 +249,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
                alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
                alpha_ref = fui(state->alpha.ref_value);
        }
+       dsa->alpha_ref = alpha_ref;
 
        /* misc */
        db_render_control = 0;
@@ -256,12 +266,14 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
        r600_pipe_state_add_reg(rstate,
                                R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
                                0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+       /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
+        * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
+        * r600_pipe_shader_ps().*/
+       r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
        r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
@@ -357,6 +369,7 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
 {
        struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
        union util_color uc;
+       unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0;
 
        if (rstate == NULL) {
                return NULL;
@@ -368,12 +381,12 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
                        S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
                        S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
                        S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
-                       S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) |
-                       S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) |
+                       S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
+                       S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
                        S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
+                       S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
                        S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
                        S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
-       /* FIXME LOD it depends on texture base level ... */
        r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
                        S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
                        S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
@@ -397,10 +410,11 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
        const struct util_format_description *desc;
        struct r600_resource_texture *tmp;
        struct r600_resource *rbuffer;
-       unsigned format;
+       unsigned format, endian;
        uint32_t word4 = 0, yuv_format = 0, pitch = 0;
        unsigned char swizzle[4], array_mode = 0, tile_type = 0;
        struct r600_bo *bo[2];
+       unsigned height, depth;
 
        if (resource == NULL)
                return NULL;
@@ -418,7 +432,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
        swizzle[1] = state->swizzle_g;
        swizzle[2] = state->swizzle_b;
        swizzle[3] = state->swizzle_a;
-       format = r600_translate_texformat(state->format,
+       format = r600_translate_texformat(ctx->screen, state->format,
                                          swizzle,
                                          &word4, &yuv_format);
        if (format == ~0) {
@@ -433,16 +447,28 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
                r600_texture_depth_flush(ctx, texture, TRUE);
                tmp = tmp->flushed_depth_texture;
        }
+       endian = r600_colorformat_endian_swap(format);
+
+       if (tmp->force_int_type) {
+               word4 &= C_038010_NUM_FORMAT_ALL;
+               word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
+       }
        rbuffer = &tmp->resource;
        bo[0] = rbuffer->bo;
        bo[1] = rbuffer->bo;
-       pitch = align(tmp->pitch_in_pixels[0], 8);
-       if (tmp->tiled) {
-               array_mode = tmp->array_mode[0];
-               tile_type = tmp->tile_type;
+       pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+       array_mode = tmp->array_mode[0];
+       tile_type = tmp->tile_type;
+
+       height = texture->height0;
+       depth = texture->depth0;
+       if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+               height = 1;
+               depth = texture->array_size;
+       } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+               depth = texture->array_size;
        }
 
-       /* FIXME properly handle first level != 0 */
        r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
                                S_038000_DIM(r600_tex_dim(texture->target)) |
                                S_038000_TILE_MODE(array_mode) |
@@ -450,8 +476,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
                                S_038000_PITCH((pitch / 8) - 1) |
                                S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
-                               S_038004_TEX_HEIGHT(texture->height0 - 1) |
-                               S_038004_TEX_DEPTH(texture->depth0 - 1) |
+                               S_038004_TEX_HEIGHT(height - 1) |
+                               S_038004_TEX_DEPTH(depth - 1) |
                                S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
                                (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
@@ -459,15 +485,17 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
                                (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
        r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
                                word4 |
-                               S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
+                               S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
                                S_038010_REQUEST_SIZE(1) |
+                               S_038010_ENDIAN_SWAP(endian) |
                                S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
                                S_038014_LAST_LEVEL(state->u.tex.last_level) |
-                               S_038014_BASE_ARRAY(0) |
-                               S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
+                               S_038014_BASE_ARRAY(state->u.tex.first_layer) |
+                               S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
-                               S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
+                               S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
+                               S_038018_MAX_ANISO(4 /* max 16 samples */), 0xFFFFFFFF, NULL);
 
        return &resource->base;
 }
@@ -480,7 +508,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
 
        for (int i = 0; i < count; i++) {
                if (resource[i]) {
-                       r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+                       r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+                                                               i + R600_MAX_CONST_BUFFERS);
                }
        }
 }
@@ -688,7 +717,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
        unsigned level = state->cbufs[cb]->u.tex.level;
        unsigned pitch, slice;
        unsigned color_info;
-       unsigned format, swap, ntype;
+       unsigned format, swap, ntype, endian;
        unsigned offset;
        const struct util_format_description *desc;
        struct r600_bo *bo[3];
@@ -710,32 +739,70 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
        /* XXX quite sure for dx10+ hw don't need any offset hacks */
        offset = r600_texture_get_offset(rtex,
                                         level, state->cbufs[cb]->u.tex.first_layer);
-       pitch = rtex->pitch_in_pixels[level] / 8 - 1;
-       slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
-       ntype = 0;
+       pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+       slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
        desc = util_format_description(surf->base.format);
-       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
-               ntype = V_0280A0_NUMBER_SRGB;
 
        for (i = 0; i < 4; i++) {
                if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
                        break;
                }
        }
+       ntype = V_0280A0_NUMBER_UNORM;
+       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+               ntype = V_0280A0_NUMBER_SRGB;
+       else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED)
+               ntype = V_0280A0_NUMBER_SNORM;
 
        format = r600_translate_colorformat(surf->base.format);
        swap = r600_translate_colorswap(surf->base.format);
+       if(rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) {
+               endian = ENDIAN_NONE;
+       } else {
+               endian = r600_colorformat_endian_swap(format);
+       }
+
+       /* disable when gallium grows int textures */
+       if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+               ntype = V_0280A0_NUMBER_UINT;
+
        color_info = S_0280A0_FORMAT(format) |
                S_0280A0_COMP_SWAP(swap) |
                S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
                S_0280A0_BLEND_CLAMP(1) |
-               S_0280A0_NUMBER_TYPE(ntype);
+               S_0280A0_NUMBER_TYPE(ntype) |
+               S_0280A0_ENDIAN(endian);
 
-       /* on R600 this can't be set if BLEND_CLAMP isn't set,
-          if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
-       if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
-           desc->channel[i].size < 12)
-               color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+       /* EXPORT_NORM is an optimzation that can be enabled for better
+        * performance in certain cases
+        */
+       if (rctx->family < CHIP_RV770) {
+               /* EXPORT_NORM can be enabled if:
+                * - 11-bit or smaller UNORM/SNORM/SRGB
+                * - BLEND_CLAMP is enabled
+                * - BLEND_FLOAT32 is disabled
+                */
+               if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+                   (desc->channel[i].size < 12 &&
+                    desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
+                    ntype != V_0280A0_NUMBER_UINT &&
+                    ntype != V_0280A0_NUMBER_SINT) &&
+                   G_0280A0_BLEND_CLAMP(color_info) &&
+                   !G_0280A0_BLEND_FLOAT32(color_info))
+                       color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+       } else {
+               /* EXPORT_NORM can be enabled if:
+                * - 11-bit or smaller UNORM/SNORM/SRGB
+                * - 16-bit or smaller FLOAT
+                */
+               if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+                   ((desc->channel[i].size < 12 &&
+                     desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
+                     ntype != V_0280A0_NUMBER_UINT && ntype != V_0280A0_NUMBER_SINT) ||
+                   (desc->channel[i].size < 17 &&
+                    desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)))
+                       color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+       }
 
        r600_pipe_state_add_reg(rstate,
                                R_028040_CB_COLOR0_BASE + cb * 4,
@@ -785,8 +852,8 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
        /* XXX quite sure for dx10+ hw don't need any offset hacks */
        offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
                                         level, state->zsbuf->u.tex.first_layer);
-       pitch = rtex->pitch_in_pixels[level] / 8 - 1;
-       slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+       pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+       slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
        format = r600_translate_dbformat(state->zsbuf->texture->format);
 
        r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
@@ -812,6 +879,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
        if (rstate == NULL)
                return;
 
+       r600_context_flush_dest_caches(&rctx->ctx);
+       rctx->ctx.num_dest_buffers = state->nr_cbufs;
+
        /* unreference old buffer and reference new one */
        rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
 
@@ -823,6 +893,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
        }
        if (state->zsbuf) {
                r600_db(rctx, rstate, state);
+               rctx->ctx.num_dest_buffers++;
        }
 
        target_mask = 0x00000000;
@@ -902,6 +973,17 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
        }
 }
 
+static void r600_texture_barrier(struct pipe_context *ctx)
+{
+       struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+       r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) |
+                       S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
+                       S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
+                       S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
+                       S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1));
+}
+
 void r600_init_state_functions(struct r600_pipe_context *rctx)
 {
        rctx->context.create_blend_state = r600_create_blend_state;
@@ -913,7 +995,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
        rctx->context.create_vertex_elements_state = r600_create_vertex_elements;
        rctx->context.create_vs_state = r600_create_shader_state;
        rctx->context.bind_blend_state = r600_bind_blend_state;
-       rctx->context.bind_depth_stencil_alpha_state = r600_bind_state;
+       rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
        rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler;
        rctx->context.bind_fs_state = r600_bind_ps_shader;
        rctx->context.bind_rasterizer_state = r600_bind_rs_state;
@@ -941,6 +1023,8 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
        rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view;
        rctx->context.set_viewport_state = r600_set_viewport_state;
        rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+       rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
+       rctx->context.texture_barrier = r600_texture_barrier;
 }
 
 void r600_init_config(struct r600_pipe_context *rctx)
@@ -1194,6 +1278,163 @@ void r600_init_config(struct r600_pipe_context *rctx)
        r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
+void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+       struct r600_pipe_state *rstate = &shader->rstate;
+       struct r600_shader *rshader = &shader->shader;
+       unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
+       int pos_index = -1, face_index = -1;
+
+       rstate->nregs = 0;
+
+       for (i = 0; i < rshader->ninput; i++) {
+               if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+                       pos_index = i;
+               if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+                       face_index = i;
+       }
+
+       db_shader_control = 0;
+       for (i = 0; i < rshader->noutput; i++) {
+               if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+                       db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
+               if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+                       db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1);
+       }
+       if (rshader->uses_kill)
+               db_shader_control |= S_02880C_KILL_ENABLE(1);
+
+       exports_ps = 0;
+       num_cout = 0;
+       for (i = 0; i < rshader->noutput; i++) {
+               if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
+                   rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+                       exports_ps |= 1;
+               else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
+                       num_cout++;
+               }
+       }
+       exports_ps |= S_028854_EXPORT_COLORS(num_cout);
+       if (!exports_ps) {
+               /* always at least export 1 component per pixel */
+               exports_ps = 2;
+       }
+
+       spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
+                               S_0286CC_PERSP_GRADIENT_ENA(1);
+       spi_input_z = 0;
+       if (pos_index != -1) {
+               spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
+                                       S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+                                       S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
+                                       S_0286CC_BARYC_SAMPLE_CNTL(1));
+               spi_input_z |= 1;
+       }
+
+       spi_ps_in_control_1 = 0;
+       if (face_index != -1) {
+               spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+                       S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+       }
+
+       r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                               R_028840_SQ_PGM_START_PS,
+                               r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+       r600_pipe_state_add_reg(rstate,
+                               R_028850_SQ_PGM_RESOURCES_PS,
+                               S_028868_NUM_GPRS(rshader->bc.ngpr) |
+                               S_028868_STACK_SIZE(rshader->bc.nstack),
+                               0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                               R_028854_SQ_PGM_EXPORTS_PS,
+                               exports_ps, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                               R_0288CC_SQ_PGM_CF_OFFSET_PS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+                               S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all),
+                               S_028808_MULTIWRITE_ENABLE(1),
+                               NULL);
+       /* only set some bits here, the other bits are set in the dsa state */
+       r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
+                               db_shader_control,
+                               S_02880C_Z_EXPORT_ENABLE(1) |
+                               S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
+                               S_02880C_KILL_ENABLE(1),
+                               NULL);
+
+       r600_pipe_state_add_reg(rstate,
+                               R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
+                               0xFFFFFFFF, NULL);
+}
+
+void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+       struct r600_pipe_state *rstate = &shader->rstate;
+       struct r600_shader *rshader = &shader->shader;
+       unsigned spi_vs_out_id[10];
+       unsigned i, tmp;
+
+       /* clear previous register */
+       rstate->nregs = 0;
+
+       /* so far never got proper semantic id from tgsi */
+       /* FIXME better to move this in config things so they get emited
+        * only one time per cs
+        */
+       for (i = 0; i < 10; i++) {
+               spi_vs_out_id[i] = 0;
+       }
+       for (i = 0; i < 32; i++) {
+               tmp = i << ((i & 3) * 8);
+               spi_vs_out_id[i / 4] |= tmp;
+       }
+       for (i = 0; i < 10; i++) {
+               r600_pipe_state_add_reg(rstate,
+                                       R_028614_SPI_VS_OUT_ID_0 + i * 4,
+                                       spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+       }
+
+       r600_pipe_state_add_reg(rstate,
+                       R_0286C4_SPI_VS_OUT_CONFIG,
+                       S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+                       0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                       R_028868_SQ_PGM_RESOURCES_VS,
+                       S_028868_NUM_GPRS(rshader->bc.ngpr) |
+                       S_028868_STACK_SIZE(rshader->bc.nstack),
+                       0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                       R_0288D0_SQ_PGM_CF_OFFSET_VS,
+                       0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate,
+                       R_028858_SQ_PGM_START_VS,
+                       r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+       r600_pipe_state_add_reg(rstate,
+                               R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
+                               0xFFFFFFFF, NULL);
+}
+
+void r600_fetch_shader(struct r600_vertex_element *ve)
+{
+       struct r600_pipe_state *rstate;
+
+       rstate = &ve->rstate;
+       rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+       rstate->nregs = 0;
+       r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+                               r600_bo_offset(ve->fetch_shader) >> 8,
+                               0xFFFFFFFF, ve->fetch_shader);
+}
+
 void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
 {
        struct pipe_depth_stencil_alpha_state dsa;
@@ -1242,8 +1483,8 @@ void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
        r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
                                rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
-                               S_038008_STRIDE(stride),
-                               0xFFFFFFFF, NULL);
+                               S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+                               S_038008_STRIDE(stride), 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
                                0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,