r600g: texture buffer object + glsl 1.40 enable support (v2)
authorDave Airlie <airlied@redhat.com>
Sun, 16 Dec 2012 10:31:32 +0000 (10:31 +0000)
committerDave Airlie <airlied@redhat.com>
Fri, 11 Jan 2013 22:31:54 +0000 (22:31 +0000)
This adds TBO support to r600g, and with GLSL 1.40 enabled,
we now get 3.1 core profiles advertised for r600g.

The r600/700 implementation is a bit different from the evergreen one,
as r6/7 hw lacks vertex fetch swizzles. So we implement it by passing 5
constants per sampler to the shader, the shader uses the first 4 as masks
for each component and the 5th as the alpha value to OR in.

Now TXQ is also broken so we have to pass a constant for the buffer size,
on evergreen we just pass this, on r6/7 we pass it as the 6th element
in the const info buffer.

v1.1: drop return as DDX doesn't use a texture type
v2: add r600/700 support.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/r600/r600_texture.c

index d0402c219fbb87d450cb322da5d82e1a770fe9e5..7040b7aa8cb21fd017b712476b50b0d14b940d95 100644 (file)
@@ -969,6 +969,58 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
        return ss;
 }
 
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+                           unsigned width0, unsigned height0)
+                           
+{
+       struct pipe_context *ctx = view->base.context;
+       struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+       uint64_t va;
+       int stride = util_format_get_blocksize(view->base.format);
+       unsigned format, num_format, format_comp, endian;
+       unsigned swizzle_res;
+       unsigned char swizzle[4];
+       const struct util_format_description *desc;
+
+       swizzle[0] = view->base.swizzle_r;
+       swizzle[1] = view->base.swizzle_g;
+       swizzle[2] = view->base.swizzle_b;
+       swizzle[3] = view->base.swizzle_a;
+
+       r600_vertex_data_type(view->base.format,
+                             &format, &num_format, &format_comp,
+                             &endian);
+
+       desc = util_format_description(view->base.format);
+
+       swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE);
+
+       va = r600_resource_va(ctx->screen, view->base.texture);
+       view->tex_resource = &tmp->resource;
+
+       view->skip_mip_address_reloc = true;
+       view->tex_resource_words[0] = va;
+       view->tex_resource_words[1] = width0 - 1;
+       view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+               S_030008_STRIDE(stride) |
+               S_030008_DATA_FORMAT(format) |
+               S_030008_NUM_FORMAT_ALL(num_format) |
+               S_030008_FORMAT_COMP_ALL(format_comp) |
+               S_030008_SRF_MODE_ALL(1) |
+               S_030008_ENDIAN_SWAP(endian);
+       view->tex_resource_words[3] = swizzle_res;
+       /*
+        * in theory dword 4 is for number of elements, for use with resinfo,
+        * but it seems to utterly fail to work, the amd gpu shader analyser
+        * uses a const buffer to store the element sizes for buffer txq
+        */
+       view->tex_resource_words[4] = 0;
+       view->tex_resource_words[5] = view->tex_resource_words[6] = 0;
+       view->tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
+       return &view->base;
+}
+
 struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                     struct pipe_resource *texture,
@@ -997,6 +1049,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
        view->base.reference.count = 1;
        view->base.context = ctx;
 
+       if (texture->target == PIPE_BUFFER)
+               return texture_buffer_sampler_view(view, width0, height0);
+
        swizzle[0] = state->swizzle_r;
        swizzle[1] = state->swizzle_g;
        swizzle[2] = state->swizzle_b;
index d324d59f48d70eb3f6728507fb748fffeb253f20..0a6f63ff9c02e7c8c4fc6e5bb2479ba2c0af1eaf 100644 (file)
@@ -2609,7 +2609,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
        fprintf(stderr, "--------------------------------------\n");
 }
 
-static void r600_vertex_data_type(enum pipe_format pformat,
+void r600_vertex_data_type(enum pipe_format pformat,
                                  unsigned *format,
                                  unsigned *num_format, unsigned *format_comp, unsigned *endian)
 {
index 5727a7c421f7c2c0c7a76b36e2acf3966e35180e..182f403aa773ec7bb323540d5f596f1790e84d58 100644 (file)
@@ -250,4 +250,6 @@ void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
 void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 
+void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+                          unsigned *num_format, unsigned *format_comp, unsigned *endian);
 #endif
index 29ef988372b739823b2a83738de0e6fa4d0b0f7b..92beabc74d0a3a4f770ceda7e29b1546e925a72c 100644 (file)
@@ -416,6 +416,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_COMPUTE:
        case PIPE_CAP_START_INSTANCE:
        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+       case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
                return 1;
 
         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
@@ -425,7 +426,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return 256;
 
        case PIPE_CAP_GLSL_FEATURE_LEVEL:
-               return 130;
+               return 140;
 
        case PIPE_CAP_TEXTURE_MULTISAMPLE:
                return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
@@ -450,7 +451,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
        case PIPE_CAP_USER_VERTEX_BUFFERS:
-       case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
                return 0;
 
        /* Stream output. */
index 2dcb4734e6b8da2343f6bc09a114c9fb7b8b4789..d983718b1bb31a9191d15a983d5a97ddef1af6ab 100644 (file)
 #define R600_TRACE_CS 0
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 
 #define R600_MAX_CONST_BUFFER_SIZE 4096
 
@@ -330,6 +331,7 @@ struct r600_samplerview_state {
        uint32_t                        compressed_depthtex_mask; /* which textures are depth */
        uint32_t                        compressed_colortex_mask;
        boolean                         dirty_txq_constants;
+       boolean                         dirty_buffer_constants;
 };
 
 struct r600_sampler_states {
@@ -347,6 +349,8 @@ struct r600_textures_info {
 
        /* cube array txq workaround */
        uint32_t                        *txq_constants;
+       /* buffer related workarounds */
+       uint32_t                        *buffer_constants;
 };
 
 struct r600_fence {
@@ -678,6 +682,10 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
                                                const struct pipe_surface *templ,
                                                unsigned width, unsigned height);
 
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+                                  const unsigned char *swizzle_view,
+                                  boolean vtx);
+
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom);
index e0c2b44b5b1b6a5d5b5e0a7709bd021604f5bb32..8307750438517a7aece560e4fd56933139123955 100644 (file)
@@ -3896,6 +3896,128 @@ static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
        return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
 }
 
+static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
+{
+       struct r600_bytecode_vtx vtx;
+       struct r600_bytecode_alu alu;
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int src_gpr, r, i;
+       int id = tgsi_tex_get_src_gpr(ctx, 1);
+
+       src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
+       if (src_requires_loading) {
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+                       r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = i;
+                       if (i == 3)
+                               alu.last = 1;
+                       alu.dst.write = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+               src_gpr = ctx->temp_reg;
+       }
+
+       memset(&vtx, 0, sizeof(vtx));
+       vtx.inst = 0;
+       vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
+       vtx.fetch_type = 2;             /* VTX_FETCH_NO_INDEX_OFFSET */
+       vtx.src_gpr = src_gpr;
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+       vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;          /* SEL_X */
+       vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;          /* SEL_Y */
+       vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;          /* SEL_Z */
+       vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;          /* SEL_W */
+       vtx.use_const_fields = 1;
+       vtx.srf_mode_all = 1;           /* SRF_MODE_NO_ZERO */
+
+       if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+               return r;
+
+       if (ctx->bc->chip_class >= EVERGREEN)
+               return 0;
+
+       for (i = 0; i < 4; i++) {
+               int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+
+               alu.dst.chan = i;
+               alu.dst.sel = vtx.dst_gpr;
+               alu.dst.write = 1;
+
+               alu.src[0].sel = vtx.dst_gpr;
+               alu.src[0].chan = i;
+
+               alu.src[1].sel = 512 + (id * 2);
+               alu.src[1].chan = i % 4;
+               alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       if (inst->Dst[0].Register.WriteMask & 3) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT);
+
+               alu.dst.chan = 3;
+               alu.dst.sel = vtx.dst_gpr;
+               alu.dst.write = 1;
+
+               alu.src[0].sel = vtx.dst_gpr;
+               alu.src[0].chan = 3;
+
+               alu.src[1].sel = 512 + (id * 2) + 1;
+               alu.src[1].chan = 0;
+               alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
+static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int r;
+       int id = tgsi_tex_get_src_gpr(ctx, 1);
+
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+
+       if (ctx->bc->chip_class >= EVERGREEN) {
+               alu.src[0].sel = 512 + (id / 4);
+               alu.src[0].chan = id % 4;
+       } else {
+               /* r600 we have them at channel 2 of the second dword */
+               alu.src[0].sel = 512 + (id * 2) + 1;
+               alu.src[0].chan = 1;
+       }
+       alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       return 0;
+}
+
 static int tgsi_tex(struct r600_shader_ctx *ctx)
 {
        static float one_point_five = 1.5f;
@@ -3934,6 +4056,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
        src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
 
+       if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+               if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
+                       ctx->shader->uses_tex_buffers = true;
+                       return r600_do_buffer_txq(ctx);
+               }
+               else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
+                       if (ctx->bc->chip_class < EVERGREEN)
+                               ctx->shader->uses_tex_buffers = true;
+                       return do_vtx_fetch_inst(ctx, src_requires_loading);
+               }
+       }
+
        if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
                /* get offset values */
                if (inst->Texture.NumOffsets) {
index b58a58ab4dbf4a65cfceb43987099422a82d71b2..d61efcb1a726ebd874f2cb710c0a9b2e4c11081c 100644 (file)
@@ -61,6 +61,7 @@ struct r600_shader {
        boolean                 vs_out_misc_write;
        boolean                 vs_out_point_size;
        boolean                 has_txq_cube_array_z_comp;
+       boolean                 uses_tex_buffers;
 };
 
 struct r600_shader_key {
index e2d0f7544c178ac33f98bd37fb62e21bba598896..17c5a64e9e7774b9ba1d76299249e237f13095b6 100644 (file)
@@ -976,6 +976,46 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
        return ss;
 }
 
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+                           unsigned width0, unsigned height0)
+                           
+{
+       struct pipe_context *ctx = view->base.context;
+       struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+       uint64_t va;
+       int stride = util_format_get_blocksize(view->base.format);
+       unsigned format, num_format, format_comp, endian;
+
+       r600_vertex_data_type(view->base.format,
+                             &format, &num_format, &format_comp,
+                             &endian);
+
+       va = r600_resource_va(ctx->screen, view->base.texture);
+       view->tex_resource = &tmp->resource;
+
+       view->skip_mip_address_reloc = true;
+       view->tex_resource_words[0] = va;
+       view->tex_resource_words[1] = width0 - 1;
+       view->tex_resource_words[2] = S_038008_BASE_ADDRESS_HI(va >> 32UL) |
+               S_038008_STRIDE(stride) |
+               S_038008_DATA_FORMAT(format) |
+               S_038008_NUM_FORMAT_ALL(num_format) |
+               S_038008_FORMAT_COMP_ALL(format_comp) |
+               S_038008_SRF_MODE_ALL(1) |
+               S_038008_ENDIAN_SWAP(endian);
+       view->tex_resource_words[3] = 0;
+       /*
+        * in theory dword 4 is for number of elements, for use with resinfo,
+        * but it seems to utterly fail to work, the amd gpu shader analyser
+        * uses a const buffer to store the element sizes for buffer txq
+        */
+       view->tex_resource_words[4] = 0;
+       view->tex_resource_words[5] = 0;
+       view->tex_resource_words[6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_BUFFER);
+       return &view->base;
+}
+
 struct pipe_sampler_view *
 r600_create_sampler_view_custom(struct pipe_context *ctx,
                                struct pipe_resource *texture,
@@ -1000,6 +1040,9 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
        view->base.reference.count = 1;
        view->base.context = ctx;
 
+       if (texture->target == PIPE_BUFFER)
+               return texture_buffer_sampler_view(view, texture->width0, 1);
+
        swizzle[0] = state->swizzle_r;
        swizzle[1] = state->swizzle_g;
        swizzle[2] = state->swizzle_b;
index 3b61413f84e4f03d2c6b671901385c160470c3f9..34977373d21964a47e9db6b60f162955f52a24a7 100644 (file)
@@ -588,19 +588,20 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
                        struct r600_texture *rtex =
                                (struct r600_texture*)rviews[i]->base.texture;
 
-                       if (rtex->is_depth && !rtex->is_flushing_texture) {
-                               dst->views.compressed_depthtex_mask |= 1 << i;
-                       } else {
-                               dst->views.compressed_depthtex_mask &= ~(1 << i);
-                       }
+                       if (rviews[i]->base.texture->target != PIPE_BUFFER) {
+                               if (rtex->is_depth && !rtex->is_flushing_texture) {
+                                       dst->views.compressed_depthtex_mask |= 1 << i;
+                               } else {
+                                       dst->views.compressed_depthtex_mask &= ~(1 << i);
+                               }
 
-                       /* Track compressed colorbuffers. */
-                       if (rtex->cmask_size && rtex->fmask_size) {
-                               dst->views.compressed_colortex_mask |= 1 << i;
-                       } else {
-                               dst->views.compressed_colortex_mask &= ~(1 << i);
+                               /* Track compressed colorbuffers. */
+                               if (rtex->cmask_size && rtex->fmask_size) {
+                                       dst->views.compressed_colortex_mask |= 1 << i;
+                               } else {
+                                       dst->views.compressed_colortex_mask &= ~(1 << i);
+                               }
                        }
-
                        /* Changing from array to non-arrays textures and vice versa requires
                         * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
                        if (rctx->chip_class <= R700 &&
@@ -625,6 +626,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
        dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
        dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
        dst->views.dirty_txq_constants = TRUE;
+       dst->views.dirty_buffer_constants = TRUE;
        r600_sampler_views_dirty(rctx, &dst->views);
 
        if (dirty_sampler_states_mask) {
@@ -1023,6 +1025,92 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
        rctx->sample_mask.atom.dirty = true;
 }
 
+/*
+ * On r600/700 hw we don't have vertex fetch swizzle, though TBO
+ * doesn't require full swizzles it does need masking and setting alpha
+ * to one, so we setup a set of 5 constants with the masks + alpha value
+ * then in the shader, we AND the 4 components with 0xffffffff or 0,
+ * then OR the alpha with the value given here.
+ * We use a 6th constant to store the txq buffer size in
+ */
+static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type)
+{
+       struct r600_textures_info *samplers = &rctx->samplers[shader_type];
+       int bits;
+       uint32_t array_size;
+       struct pipe_constant_buffer cb;
+       int i, j;
+
+       if (!samplers->views.dirty_buffer_constants)
+               return;
+
+       samplers->views.dirty_buffer_constants = FALSE;
+
+       bits = util_last_bit(samplers->views.enabled_mask);
+       array_size = bits * 8 * sizeof(uint32_t) * 4;
+       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
+       memset(samplers->buffer_constants, 0, array_size);
+       for (i = 0; i < bits; i++) {
+               if (samplers->views.enabled_mask & (1 << i)) {
+                       int offset = i * 8;
+                       const struct util_format_description *desc;
+                       desc = util_format_description(samplers->views.views[i]->base.format);
+
+                       for (j = 0; j < 4; j++)
+                               if (j < desc->nr_channels)
+                                       samplers->buffer_constants[offset+j] = 0xffffffff;
+                               else
+                                       samplers->buffer_constants[offset+j] = 0x0;
+                       if (desc->nr_channels < 4) {
+                               if (desc->channel[0].pure_integer)
+                                       samplers->buffer_constants[offset+4] = 1;
+                               else
+                                       samplers->buffer_constants[offset+4] = 0x3f800000;
+                       } else
+                               samplers->buffer_constants[offset + 4] = 0;
+
+                       samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+               }
+       }
+
+       cb.buffer = NULL;
+       cb.user_buffer = samplers->buffer_constants;
+       cb.buffer_offset = 0;
+       cb.buffer_size = array_size;
+       rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+       pipe_resource_reference(&cb.buffer, NULL);
+}
+
+/* On evergreen we only need to store the buffer size for TXQ */
+static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
+{
+       struct r600_textures_info *samplers = &rctx->samplers[shader_type];
+       int bits;
+       uint32_t array_size;
+       struct pipe_constant_buffer cb;
+       int i;
+
+       if (!samplers->views.dirty_buffer_constants)
+               return;
+
+       samplers->views.dirty_buffer_constants = FALSE;
+
+       bits = util_last_bit(samplers->views.enabled_mask);
+       array_size = bits * sizeof(uint32_t) * 4;
+       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
+       memset(samplers->buffer_constants, 0, array_size);
+       for (i = 0; i < bits; i++)
+               if (samplers->views.enabled_mask & (1 << i))
+                  samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+
+       cb.buffer = NULL;
+       cb.user_buffer = samplers->buffer_constants;
+       cb.buffer_offset = 0;
+       cb.buffer_size = array_size;
+       rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+       pipe_resource_reference(&cb.buffer, NULL);
+}
+
 static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type)
 {
        struct r600_textures_info *samplers = &rctx->samplers[shader_type];
@@ -1090,6 +1178,21 @@ static bool r600_update_derived_state(struct r600_context *rctx)
        if (ps_dirty)
                r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
 
+       /* on R600 we stuff masks + txq info into one constant buffer */
+       /* on evergreen we only need a txq info one */
+       if (rctx->chip_class < EVERGREEN) {
+               if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
+                       r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+               if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
+                       r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+       } else {
+               if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
+                       eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+               if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
+                       eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+       }
+
+
        if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)
                r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
        if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
index 85da0930242f4e75ec1fc4029c63e676a3daec18..45a30f85df483c7cbf4bef18eecdf0404ae365ac 100644 (file)
@@ -912,18 +912,26 @@ void r600_init_surface_functions(struct r600_context *r600)
        r600->context.surface_destroy = r600_surface_destroy;
 }
 
-static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
-               const unsigned char *swizzle_view)
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+                                  const unsigned char *swizzle_view,
+                                  boolean vtx)
 {
        unsigned i;
        unsigned char swizzle[4];
        unsigned result = 0;
-       const uint32_t swizzle_shift[4] = {
+       const uint32_t tex_swizzle_shift[4] = {
                16, 19, 22, 25,
        };
+       const uint32_t vtx_swizzle_shift[4] = {
+               3, 6, 9, 12,
+       };
        const uint32_t swizzle_bit[4] = {
                0, 1, 2, 3,
        };
+       const uint32_t *swizzle_shift = tex_swizzle_shift;
+
+       if (vtx)
+               swizzle_shift = vtx_swizzle_shift;
 
        if (swizzle_view) {
                util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
@@ -977,7 +985,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
        };
        desc = util_format_description(format);
 
-       word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+       word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
 
        /* Colorspace (return non-RGB formats directly). */
        switch (desc->colorspace) {