Merge remote-tracking branch 'origin/master' into pipe-video
[mesa.git] / src / mesa / drivers / dri / r600 / r600_blit.c
index 58eb91ec615c4536e50971e3b70303c5841cd841..2d47afdcbbfe51999d6ad5248ac3131dbe308371 100644 (file)
@@ -33,7 +33,7 @@
 #include "r600_cmdbuf.h"
 
 /* common formats supported as both textures and render targets */
-static unsigned is_blit_supported(gl_format mesa_format)
+unsigned r600_check_blit(gl_format mesa_format)
 {
     switch (mesa_format) {
     case MESA_FORMAT_RGBA8888:
@@ -72,7 +72,7 @@ static unsigned is_blit_supported(gl_format mesa_format)
     case MESA_FORMAT_Z24_S8:
     case MESA_FORMAT_Z16:
     case MESA_FORMAT_Z32:
-    case MESA_FORMAT_SRGBA8:
+    case MESA_FORMAT_SARGB8:
     case MESA_FORMAT_SLA8:
     case MESA_FORMAT_SL8:
            break;
@@ -90,22 +90,21 @@ static unsigned is_blit_supported(gl_format mesa_format)
 
 static inline void
 set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format,
-                  int pitch, int w, int h, intptr_t dst_offset)
+                  int nPitchInPixel, int w, int h, intptr_t dst_offset)
 {
     uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0;
-    int nPitchInPixel, id = 0;
-    uint32_t comp_swap, format, bpp = _mesa_get_format_bytes(mesa_format);
+    int id = 0;
+    uint32_t endian, comp_swap, format;
     BATCH_LOCALS(&context->radeon);
 
     cb_color0_base = dst_offset / 256;
+       endian = ENDIAN_NONE;
 
-    nPitchInPixel = pitch/bpp;
     SETfield(cb_color0_size, (nPitchInPixel / 8) - 1,
              PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
     SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1,
              SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
 
-    SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
     SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL,
              CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
 
@@ -113,24 +112,36 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
 
     switch(mesa_format) {
     case MESA_FORMAT_RGBA8888:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_STD_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_SIGNED_RGBA8888:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_STD_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_RGBA8888_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_STD;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_SIGNED_RGBA8888_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_STD;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
@@ -138,6 +149,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
             break;
     case MESA_FORMAT_ARGB8888:
     case MESA_FORMAT_XRGB8888:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_ALT;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
@@ -145,54 +159,81 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
             break;
     case MESA_FORMAT_ARGB8888_REV:
     case MESA_FORMAT_XRGB8888_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_RGB565:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
+                   comp_swap = SWAP_STD_REV;   
             format = COLOR_5_6_5;
-            comp_swap = SWAP_STD_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_RGB565_REV:
-            format = COLOR_5_6_5;
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             comp_swap = SWAP_STD;
+            format = COLOR_5_6_5;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ARGB4444:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_4_4_4_4;
             comp_swap = SWAP_ALT;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ARGB4444_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_4_4_4_4;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ARGB1555:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_1_5_5_5;
             comp_swap = SWAP_ALT;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ARGB1555_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_1_5_5_5;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_AL88:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_8_8;
             comp_swap = SWAP_STD;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_AL88_REV:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_8_8;
             comp_swap = SWAP_STD_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
@@ -224,19 +265,28 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_RGBA_FLOAT32:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32_32_32_32_FLOAT;
-            comp_swap = SWAP_STD_REV;
+            comp_swap = SWAP_STD;
            SETbit(cb_color0_info, BLEND_FLOAT32_bit);
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_RGBA_FLOAT16:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16_16_16_16_FLOAT;
-            comp_swap = SWAP_STD_REV;
+            comp_swap = SWAP_STD;
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ALPHA_FLOAT32:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32_FLOAT;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, BLEND_FLOAT32_bit);
@@ -244,12 +294,18 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_ALPHA_FLOAT16:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16_FLOAT;
             comp_swap = SWAP_ALT_REV;
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_LUMINANCE_FLOAT32:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32_FLOAT;
             comp_swap = SWAP_ALT;
            SETbit(cb_color0_info, BLEND_FLOAT32_bit);
@@ -257,12 +313,18 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_LUMINANCE_FLOAT16:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16_FLOAT;
             comp_swap = SWAP_ALT;
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32_32_FLOAT;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, BLEND_FLOAT32_bit);
@@ -270,12 +332,18 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16_16_FLOAT;
             comp_swap = SWAP_ALT_REV;
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32_FLOAT;
             comp_swap = SWAP_STD;
            SETbit(cb_color0_info, BLEND_FLOAT32_bit);
@@ -283,6 +351,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16_FLOAT;
             comp_swap = SWAP_STD;
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
@@ -290,6 +361,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
             break;
     case MESA_FORMAT_X8_Z24:
     case MESA_FORMAT_S8_Z24:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_24;
             comp_swap = SWAP_STD;
            SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
@@ -298,6 +372,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_Z24_S8:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_24_8;
             comp_swap = SWAP_STD;
            SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
@@ -306,6 +383,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_Z16:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_16;
             comp_swap = SWAP_STD;
            SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
@@ -314,6 +394,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_Z32:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_32;
             comp_swap = SWAP_STD;
            SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
@@ -321,13 +404,19 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
            CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
-    case MESA_FORMAT_SRGBA8:
+    case MESA_FORMAT_SARGB8:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN32;
+#endif
             format = COLOR_8_8_8_8;
-            comp_swap = SWAP_STD_REV;
+            comp_swap = SWAP_ALT;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
            SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
             break;
     case MESA_FORMAT_SLA8:
+#ifdef MESA_BIG_ENDIAN
+                       endian = ENDIAN_8IN16;
+#endif
             format = COLOR_8_8;
             comp_swap = SWAP_ALT_REV;
            SETbit(cb_color0_info, SOURCE_FORMAT_bit);
@@ -345,6 +434,11 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
             return;
     }
 
+    /* must be 0 on r7xx */
+    if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+           CLEARbit(cb_color0_info, BLEND_FLOAT32_bit);
+
+    SETfield(cb_color0_info, endian, ENDIAN_shift, ENDIAN_mask);
     SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift,
              CB_COLOR0_INFO__FORMAT_mask);
     SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask);
@@ -387,18 +481,25 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
                         0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
     END_BATCH();
 
-    BEGIN_BATCH_NO_AUTOSTATE(12);
+    BEGIN_BATCH_NO_AUTOSTATE(9);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
-    R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
     END_BATCH();
 
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+    R600_OUT_BATCH_RELOC(0,
+                        bo,
+                        0,
+                        0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    END_BATCH();
+
     COMMIT_BATCH();
 
 }
 
-static inline void load_shaders(GLcontext * ctx)
+static inline void load_shaders(struct gl_context * ctx)
 {
 
     radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
@@ -416,10 +517,10 @@ static inline void load_shaders(GLcontext * ctx)
     shader = context->blit_bo->ptr;
 
     for(i=0; i<sizeof(r6xx_vs)/4; i++) {
-        shader[128+i] = r6xx_vs[i];
+        shader[128+i] = CPU_TO_LE32(r6xx_vs[i]);
     }
     for(i=0; i<sizeof(r6xx_ps)/4; i++) {
-        shader[256+i] = r6xx_ps[i];
+        shader[256+i] = CPU_TO_LE32(r6xx_ps[i]);
     }
 
     radeon_bo_unmap(context->blit_bo);
@@ -511,6 +612,8 @@ static inline void
 set_vtx_resource(context_t *context)
 {
     struct radeon_bo *bo = context->blit_bo;
+       uint32_t sq_vtx_constant_word2 = 0;
+
     BATCH_LOCALS(&context->radeon);
 
     BEGIN_BATCH_NO_AUTOSTATE(6);
@@ -533,20 +636,26 @@ set_vtx_resource(context_t *context)
     else
            r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
 
+       sq_vtx_constant_word2 = 0
+#ifdef MESA_BIG_ENDIAN
+               | (SQ_ENDIAN_8IN32 << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)
+#endif
+               | (16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift);
+
     BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
 
     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
     R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(48 - 1);
-    R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift);
+    R600_OUT_BATCH(sq_vtx_constant_word2);
     R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift);
-    R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0,
+    R600_OUT_BATCH_RELOC(0,
                          bo,
-                         SQ_VTX_CONSTANT_WORD0_0,
+                         0,
                          RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_BATCH();
     COMMIT_BATCH();
@@ -556,11 +665,9 @@ set_vtx_resource(context_t *context)
 static inline void
 set_tex_resource(context_t * context,
                 gl_format mesa_format, struct radeon_bo *bo, int w, int h,
-                int pitch, intptr_t src_offset)
+                int TexelPitch, intptr_t src_offset)
 {
     uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6;
-    int bpp = _mesa_get_format_bytes(mesa_format);
-    int TexelPitch = pitch/bpp;
 
     sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0;
     BATCH_LOCALS(&context->radeon);
@@ -662,11 +769,11 @@ set_tex_resource(context_t * context,
            SETfield(sq_tex_resource1, FMT_8_8_8_8,
                     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
 
-           SETfield(sq_tex_resource4, SQ_SEL_1,
+           SETfield(sq_tex_resource4, SQ_SEL_Y,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
            SETfield(sq_tex_resource4, SQ_SEL_Z,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
-           SETfield(sq_tex_resource4, SQ_SEL_W,
+           SETfield(sq_tex_resource4, SQ_SEL_1,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
            SETfield(sq_tex_resource4, SQ_SEL_X,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
@@ -1042,17 +1149,17 @@ set_tex_resource(context_t * context,
            SETfield(sq_tex_resource4, SQ_SEL_X,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
            break;
-    case MESA_FORMAT_SRGBA8:
+    case MESA_FORMAT_SARGB8:
            SETfield(sq_tex_resource1, FMT_8_8_8_8,
                     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
 
-           SETfield(sq_tex_resource4, SQ_SEL_W,
-                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
            SETfield(sq_tex_resource4, SQ_SEL_Z,
-                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
            SETfield(sq_tex_resource4, SQ_SEL_Y,
-                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
            SETfield(sq_tex_resource4, SQ_SEL_X,
+                    SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+           SETfield(sq_tex_resource4, SQ_SEL_W,
                     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
            SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
            break;
@@ -1446,7 +1553,7 @@ set_default_state(context_t *context)
            SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
     }
 
-    BEGIN_BATCH_NO_AUTOSTATE(117);
+    BEGIN_BATCH_NO_AUTOSTATE(120);
     R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
     R600_OUT_BATCH(sq_config);
     R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
@@ -1476,7 +1583,6 @@ set_default_state(context_t *context)
                          (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
     R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
     R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
-    R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
     R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
     R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
     R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
@@ -1492,9 +1598,10 @@ set_default_state(context_t *context)
     R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
         (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
         (X_1_256TH << QUANT_MODE_shift));
+    R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, 0);
 
     R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
-    R600_OUT_BATCH(2048);
+    R600_OUT_BATCH(0xffffff);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(0);
@@ -1525,6 +1632,7 @@ set_default_state(context_t *context)
     R600_OUT_BATCH(0);
 
     R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+    R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0);
 
     END_BATCH();
     COMMIT_BATCH();
@@ -1535,56 +1643,54 @@ static GLboolean validate_buffers(context_t *rmesa,
                                   struct radeon_bo *dst_bo)
 {
     int ret;
-    radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
-                                      src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
 
-    radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
-                                      dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+    radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
 
-    radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
-                                      rmesa->blit_bo, RADEON_GEM_DOMAIN_GTT, 0);
+    ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+                                       src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
 
     ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
-                                       rmesa->blit_bo,
-                                       RADEON_GEM_DOMAIN_GTT, 0);
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
     if (ret)
         return GL_FALSE;
 
     ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
-                                        first_elem(&rmesa->radeon.dma.reserved)->bo,
-                                        RADEON_GEM_DOMAIN_GTT, 0);
+                                       rmesa->blit_bo,
+                                       RADEON_GEM_DOMAIN_GTT, 0);
     if (ret)
         return GL_FALSE;
 
     return GL_TRUE;
 }
 
-GLboolean r600_blit(GLcontext *ctx,
-                    struct radeon_bo *src_bo,
-                    intptr_t src_offset,
-                    gl_format src_mesaformat,
-                    unsigned src_pitch,
-                    unsigned src_width,
-                    unsigned src_height,
-                    unsigned src_x,
-                    unsigned src_y,
-                    struct radeon_bo *dst_bo,
-                    intptr_t dst_offset,
-                    gl_format dst_mesaformat,
-                    unsigned dst_pitch,
-                    unsigned dst_width,
-                    unsigned dst_height,
-                    unsigned dst_x,
-                    unsigned dst_y,
-                    unsigned w,
-                    unsigned h,
-                    unsigned flip_y)
+unsigned r600_blit(struct gl_context *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x,
+                   unsigned src_y,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x,
+                   unsigned dst_y,
+                   unsigned w,
+                   unsigned h,
+                   unsigned flip_y)
 {
     context_t *context = R700_CONTEXT(ctx);
     int id = 0;
 
-    if (!is_blit_supported(dst_mesaformat))
-           return GL_FALSE;
+    if (!r600_check_blit(dst_mesaformat))
+        return GL_FALSE;
 
     if (src_bo == dst_bo) {
         return GL_FALSE;
@@ -1606,9 +1712,9 @@ GLboolean r600_blit(GLcontext *ctx,
     }
 
     /* Flush is needed to make sure that source buffer has correct data */
-    radeonFlush(context->radeon.glCtx);
+    radeonFlush(ctx);
 
-    rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+    rcommonEnsureCmdBufSpace(&context->radeon, 311, __FUNCTION__);
 
     /* load shaders */
     load_shaders(context->radeon.glCtx);
@@ -1617,7 +1723,7 @@ GLboolean r600_blit(GLcontext *ctx,
         return GL_FALSE;
 
     /* set clear state */
-    /* 117 */
+    /* 120 */
     set_default_state(context);
 
     /* shaders */
@@ -1633,7 +1739,7 @@ GLboolean r600_blit(GLcontext *ctx,
     set_tex_sampler(context);
 
     /* dst */
-    /* 27 */
+    /* 31 */
     set_render_target(context, dst_bo, dst_mesaformat,
                      dst_pitch, dst_width, dst_height, dst_offset);
     /* scissors */
@@ -1655,9 +1761,10 @@ GLboolean r600_blit(GLcontext *ctx,
                 CB_ACTION_ENA_bit | (1 << (id + 6)));
 
     /* 5 */
+    /* XXX drm should handle this in fence submit */
     r700WaitForIdleClean(context);
 
-    radeonFlush(context->radeon.glCtx);
+    radeonFlush(ctx);
 
     return GL_TRUE;
 }