r600g: implement clip vertex v2
authorVadim Girlin <vadimgirlin@gmail.com>
Fri, 20 Jan 2012 21:37:48 +0000 (01:37 +0400)
committerDave Airlie <airlied@redhat.com>
Sat, 21 Jan 2012 12:43:14 +0000 (12:43 +0000)
Clip planes are uploaded as a constant buffer and used by the vertex
shader to produce corresponding clip distances for hw clipping.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/r600/r600d.h

index 9401d823166c95266ae29df1fbf95f7238a0f73f..f8eb481a4b5712c1bd73efc4ac61dc55f6570150 100644 (file)
@@ -99,7 +99,9 @@ static const struct r600_reg evergreen_context_reg_list[] = {
        {R_028058_DB_DEPTH_SIZE, 0, 0, 0},
        {R_02805C_DB_DEPTH_SLICE, 0, 0, 0},
        {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0},
        {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0},
        {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0},
@@ -293,7 +295,9 @@ static const struct r600_reg evergreen_context_reg_list[] = {
        {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
        {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
        {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
        {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
        {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
@@ -465,7 +469,9 @@ static const struct r600_reg cayman_context_reg_list[] = {
        {R_028058_DB_DEPTH_SIZE, 0, 0, 0},
        {R_02805C_DB_DEPTH_SLICE, 0, 0, 0},
        {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0},
        {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0},
        {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0},
@@ -658,7 +664,9 @@ static const struct r600_reg cayman_context_reg_list[] = {
        {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
        {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
        {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
        {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
        {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
index 2f7046bcba67ba9a6ecd2bf6b2d387f4da23176a..96c11442388102af036d2868f4f3bf03e01045bb 100644 (file)
@@ -1211,6 +1211,7 @@ static void evergreen_set_clip_state(struct pipe_context *ctx,
 {
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+       struct pipe_resource *cbuf;
 
        if (rstate == NULL)
                return;
@@ -1235,6 +1236,13 @@ static void evergreen_set_clip_state(struct pipe_context *ctx,
        free(rctx->states[R600_PIPE_STATE_CLIP]);
        rctx->states[R600_PIPE_STATE_CLIP] = rstate;
        r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+       cbuf = pipe_user_buffer_create(ctx->screen,
+                                   state->ucp,
+                                   4*4*8, /* 8*4 floats */
+                                   PIPE_BIND_CONSTANT_BUFFER);
+       r600_set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, cbuf);
+       pipe_resource_reference(&cbuf, NULL);
 }
 
 static void evergreen_set_polygon_stipple(struct pipe_context *ctx,
index 9f8346b7669bceb4d1dfd9db6867782bf0d57bb9..fa3feceb3cc12a7bb2a25e8027f4867e8378bfb9 100644 (file)
 #define R_028050_DB_Z_WRITE_BASE                     0x00028050
 #define R_028054_DB_STENCIL_WRITE_BASE               0x00028054
 #define R_028140_ALU_CONST_BUFFER_SIZE_PS_0          0x00028140
+#define R_028144_ALU_CONST_BUFFER_SIZE_PS_1          0x00028144
 #define R_028180_ALU_CONST_BUFFER_SIZE_VS_0          0x00028180
+#define R_028184_ALU_CONST_BUFFER_SIZE_VS_1          0x00028184
 #define R_028200_PA_SC_WINDOW_OFFSET                 0x00028200
 #define R_02820C_PA_SC_CLIPRECT_RULE                 0x0002820C
 #define R_028210_PA_SC_CLIPRECT_0_TL                 0x00028210
 #define R_028924_SQ_GS_VERT_ITEMSIZE_2               0x00028924
 #define R_028928_SQ_GS_VERT_ITEMSIZE_3               0x00028928
 #define R_028940_ALU_CONST_CACHE_PS_0                0x00028940
+#define R_028944_ALU_CONST_CACHE_PS_1                0x00028944
 #define R_028980_ALU_CONST_CACHE_VS_0                0x00028980
+#define R_028984_ALU_CONST_CACHE_VS_1                0x00028984
 #define R_028A04_PA_SU_POINT_MINMAX                  0x00028A04
 #define R_028A08_PA_SU_LINE_CNTL                     0x00028A08
 #define   S_028A08_WIDTH(x)                            (((x) & 0xFFFF) << 0)
index 404f147c240d31a56176a36bc884a9031a523aa8..d1a7e38a6bbf053ebde548fd8a534b030c1a364d 100644 (file)
@@ -408,9 +408,13 @@ static const struct r600_reg r600_context_reg_list[] = {
        {R_028128_CB_CLEAR_BLUE, 0, 0, 0},
        {R_02812C_CB_CLEAR_ALPHA, 0, 0, 0},
        {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
        {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
        {R_02823C_CB_SHADER_MASK, 0, 0, 0},
        {R_028238_CB_TARGET_MASK, 0, 0, 0},
        {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
@@ -1329,15 +1333,20 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
                        if (block->pm4_bo_index[j]) {
                                /* find relocation */
                                struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
-                               block->pm4[reloc->bo_pm4_index] =
-                                       r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
-                               r600_context_bo_flush(ctx,
-                                                     reloc->flush_flags,
-                                                     reloc->flush_mask,
-                                                     reloc->bo);
+                               if (reloc->bo) {
+                                       block->pm4[reloc->bo_pm4_index] =
+                                                       r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
+                                       r600_context_bo_flush(ctx,
+                                                       reloc->flush_flags,
+                                                       reloc->flush_mask,
+                                                       reloc->bo);
+                               } else {
+                                       block->pm4[reloc->bo_pm4_index] = 0;
+                               }
                                nbo--;
                                if (nbo == 0)
                                        break;
+
                        }
                }
                ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
index b106802255a7efcc1b416b69151c488127528541..faa92cb8d3e295b4944bf2d118de6e5f1b2f3d58 100644 (file)
@@ -492,7 +492,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_MAX_CONSTS:
                return R600_MAX_CONST_BUFFER_SIZE;
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return R600_MAX_CONST_BUFFERS;
+               return R600_MAX_CONST_BUFFERS-1;
        case PIPE_SHADER_CAP_MAX_PREDS:
                return 0; /* FIXME */
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
index 8df03108c783f8b3b9b369d4df779fc9527bb7b3..65e84c190c52128f1729f692ad1c1126adb0539e 100644 (file)
@@ -39,7 +39,7 @@
 #include "r600_shader.h"
 #include "r600_resource.h"
 
-#define R600_MAX_CONST_BUFFERS 1
+#define R600_MAX_CONST_BUFFERS 2
 #define R600_MAX_CONST_BUFFER_SIZE 4096
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
index e05812191d6cc5fbb136c4605d9385e88eb1c534..db26faad270c1cf7d5cd65d52a21755d48174992 100644 (file)
@@ -195,6 +195,8 @@ struct r600_shader_ctx {
        int                                     num_interp_gpr;
        int                                     face_gpr;
        int                                     colors_used;
+       boolean                 clip_vertex_write;
+       unsigned                cv_output;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -479,6 +481,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                        case TGSI_SEMANTIC_PSIZE:
                                ctx->shader->vs_out_misc_write = 1;
                                break;
+                       case TGSI_SEMANTIC_CLIPVERTEX:
+                               ctx->clip_vertex_write = TRUE;
+                               ctx->cv_output = i;
+                               break;
                        }
                }
                break;
@@ -803,7 +809,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
        struct r600_bytecode_output output[32];
        unsigned output_done, noutput;
        unsigned opcode;
-       int i, j, r = 0, pos0;
+       int i, j, k, r = 0;
+       int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
 
        ctx.bc = &shader->bc;
        ctx.shader = shader;
@@ -817,6 +824,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 
        ctx.face_gpr = -1;
        ctx.colors_used = 0;
+       ctx.clip_vertex_write = 0;
 
        shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
 
@@ -959,6 +967,47 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 
        noutput = shader->noutput;
 
+       if (ctx.clip_vertex_write) {
+               /* need to convert a clipvertex write into clipdistance writes and not export
+                  the clip vertex anymore */
+
+               memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
+               shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
+               shader->output[noutput].gpr = ctx.temp_reg;
+               noutput++;
+               shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
+               shader->output[noutput].gpr = ctx.temp_reg+1;
+               noutput++;
+
+               shader->clip_dist_write = 0xFF;
+
+               for (i = 0; i < 8; i++) {
+                       int oreg = i >> 2;
+                       int ochan = i & 3;
+
+                       for (j = 0; j < 4; j++) {
+                               struct r600_bytecode_alu alu;
+                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                               alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
+                               alu.src[0].sel = shader->output[ctx.cv_output].gpr;
+                               alu.src[0].chan = j;
+
+                               alu.src[1].sel = 512 + i;
+                               alu.src[1].kc_bank = 1;
+                               alu.src[1].chan = j;
+
+                               alu.dst.sel = ctx.temp_reg + oreg;
+                               alu.dst.chan = j;
+                               alu.dst.write = (j == ochan);
+                               if (j == 3)
+                                       alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx.bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               }
+       }
+
        /* clamp color outputs */
        if (shader->clamp_color) {
                for (i = 0; i < noutput; i++) {
@@ -1056,89 +1105,81 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
        }
 
        /* export output */
-       j = 0;
-
-       for (i = 0, pos0 = 0; i < noutput; i++) {
-               memset(&output[i+j], 0, sizeof(struct r600_bytecode_output));
-               output[i + j].gpr = shader->output[i].gpr;
-               output[i + j].elem_size = 3;
-               output[i + j].swizzle_x = 0;
-               output[i + j].swizzle_y = 1;
-               output[i + j].swizzle_z = 2;
-               output[i + j].swizzle_w = 3;
-               output[i + j].burst_count = 1;
-               output[i + j].barrier = 1;
-               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
-               output[i + j].array_base = i+j - pos0;
-               output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+       for (i = 0, j = 0; i < noutput; i++, j++) {
+               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+               output[j].gpr = shader->output[i].gpr;
+               output[j].elem_size = 3;
+               output[j].swizzle_x = 0;
+               output[j].swizzle_y = 1;
+               output[j].swizzle_z = 2;
+               output[j].swizzle_w = 3;
+               output[j].burst_count = 1;
+               output[j].barrier = 1;
+               output[j].type = -1;
+               output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
                switch (ctx.type) {
                case TGSI_PROCESSOR_VERTEX:
                        switch (shader->output[i].name) {
                        case TGSI_SEMANTIC_POSITION:
-                               output[i + j].array_base = 60;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               /* position doesn't count in array_base */
-                               pos0++;
+                               output[j].array_base = next_pos_base++;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
                                break;
 
                        case TGSI_SEMANTIC_PSIZE:
-                               output[i + j].array_base = 61;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               /* position doesn't count in array_base */
-                               pos0++;
+                               output[j].array_base = next_pos_base++;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                               break;
+                       case TGSI_SEMANTIC_CLIPVERTEX:
+                               j--;
                                break;
-
                        case TGSI_SEMANTIC_CLIPDIST:
-                               /* array base for enabled OUT_MISC_VEC & CCDIST[0|1]_VEC
-                                * vectors is allocated sequentially, starting from 61 */
-                               output[i + j].array_base = 61 + shader->output[i].sid
-                                       /* +1 if OUT_MISC_VEC is enabled */
-                                       + shader->vs_out_misc_write
-                                       /* -1 if OUT_CCDIST0_VEC is disabled */
-                                       - (((shader->clip_dist_write & 0xF) == 0)? 1 : 0);
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               j++;
-                               pos0++;
-                               /* duplicate it as PARAM to pass to the pixel shader */
-                               memcpy(&output[i+j], &output[i+j-1], sizeof(struct r600_bytecode_output));
-                               output[i + j].array_base = i+j-pos0;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                               output[j].array_base = next_pos_base++;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                               /* spi_sid is 0 for clipdistance outputs that were generated
+                                * for clipvertex - we don't need to pass them to PS */
+                               if (shader->output[i].spi_sid) {
+                                       j++;
+                                       /* duplicate it as PARAM to pass to the pixel shader */
+                                       memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+                                       output[j].array_base = next_param_base++;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                               }
                                break;
                        }
                        break;
                case TGSI_PROCESSOR_FRAGMENT:
                        if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
-                               output[i + j].array_base = shader->output[i].sid;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               output[j].array_base = next_pixel_base++;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                                if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
-                                       for (j = 1; j < shader->nr_cbufs; j++) {
-                                               memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
-                                               output[i + j].gpr = shader->output[i].gpr;
-                                               output[i + j].elem_size = 3;
-                                               output[i + j].swizzle_x = 0;
-                                               output[i + j].swizzle_y = 1;
-                                               output[i + j].swizzle_z = 2;
-                                               output[i + j].swizzle_w = 3;
-                                               output[i + j].burst_count = 1;
-                                               output[i + j].barrier = 1;
-                                               output[i + j].array_base = shader->output[i].sid + j;
-                                               output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
-                                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                       for (k = 1; k < shader->nr_cbufs; k++) {
+                                               j++;
+                                               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+                                               output[j].gpr = shader->output[i].gpr;
+                                               output[j].elem_size = 3;
+                                               output[j].swizzle_x = 0;
+                                               output[j].swizzle_y = 1;
+                                               output[j].swizzle_z = 2;
+                                               output[j].swizzle_w = 3;
+                                               output[j].burst_count = 1;
+                                               output[j].barrier = 1;
+                                               output[j].array_base = next_pixel_base++;
+                                               output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+                                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                                        }
-                                       j = shader->nr_cbufs-1;
                                }
                        } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
-                               output[i + j].array_base = 61;
-                               output[i + j].swizzle_x = 2;
-                               output[i + j].swizzle_y = 7;
-                               output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               output[j].array_base = 61;
+                               output[j].swizzle_x = 2;
+                               output[j].swizzle_y = 7;
+                               output[j].swizzle_z = output[j].swizzle_w = 7;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                        } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
-                               output[i + j].array_base = 61;
-                               output[i + j].swizzle_x = 7;
-                               output[i + j].swizzle_y = 1;
-                               output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
-                               output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               output[j].array_base = 61;
+                               output[j].swizzle_x = 7;
+                               output[j].swizzle_y = 1;
+                               output[j].swizzle_z = output[j].swizzle_w = 7;
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                        } else {
                                R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
                                r = -EINVAL;
@@ -1150,48 +1191,49 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
                        r = -EINVAL;
                        goto out_err;
                }
+
+               if (output[j].type==-1) {
+                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                       output[j].array_base = next_param_base++;
+               }
        }
-       noutput += j;
+
        /* add fake param output for vertex shader if no param is exported */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX) {
-               for (i = 0, pos0 = 0; i < noutput; i++) {
-                       if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
-                               pos0 = 1;
-                               break;
-                       }
-               }
-               if (!pos0) {
-                       memset(&output[i], 0, sizeof(struct r600_bytecode_output));
-                       output[i].gpr = 0;
-                       output[i].elem_size = 3;
-                       output[i].swizzle_x = 7;
-                       output[i].swizzle_y = 7;
-                       output[i].swizzle_z = 7;
-                       output[i].swizzle_w = 7;
-                       output[i].burst_count = 1;
-                       output[i].barrier = 1;
-                       output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
-                       output[i].array_base = 0;
-                       output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
-                       noutput++;
-               }
+       if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
+                       memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+                       output[j].gpr = 0;
+                       output[j].elem_size = 3;
+                       output[j].swizzle_x = 7;
+                       output[j].swizzle_y = 7;
+                       output[j].swizzle_z = 7;
+                       output[j].swizzle_w = 7;
+                       output[j].burst_count = 1;
+                       output[j].barrier = 1;
+                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                       output[j].array_base = 0;
+                       output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+                       j++;
        }
+
        /* add fake pixel export */
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
-               memset(&output[0], 0, sizeof(struct r600_bytecode_output));
-               output[0].gpr = 0;
-               output[0].elem_size = 3;
-               output[0].swizzle_x = 7;
-               output[0].swizzle_y = 7;
-               output[0].swizzle_z = 7;
-               output[0].swizzle_w = 7;
-               output[0].burst_count = 1;
-               output[0].barrier = 1;
-               output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-               output[0].array_base = 0;
-               output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
-               noutput++;
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
+               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+               output[j].gpr = 0;
+               output[j].elem_size = 3;
+               output[j].swizzle_x = 7;
+               output[j].swizzle_y = 7;
+               output[j].swizzle_z = 7;
+               output[j].swizzle_w = 7;
+               output[j].burst_count = 1;
+               output[j].barrier = 1;
+               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+               output[j].array_base = 0;
+               output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+               j++;
        }
+
+       noutput = j;
+
        /* set export done on last export of each type */
        for (i = noutput - 1, output_done = 0; i >= 0; i--) {
                if (ctx.bc->chip_class < CAYMAN) {
index a03c4964f6746e2b17b8dea3873bbfbd04ae4bdb..8f4e9f20837c61d8ed53d1a952cbb54d6978526d 100644 (file)
@@ -1317,6 +1317,7 @@ static void r600_set_clip_state(struct pipe_context *ctx,
 {
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+       struct pipe_resource * cbuf;
 
        if (rstate == NULL)
                return;
@@ -1341,6 +1342,13 @@ static void r600_set_clip_state(struct pipe_context *ctx,
        free(rctx->states[R600_PIPE_STATE_CLIP]);
        rctx->states[R600_PIPE_STATE_CLIP] = rstate;
        r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+       cbuf = pipe_user_buffer_create(ctx->screen,
+                                   state->ucp,
+                                   4*4*8, /* 8*4 floats */
+                                   PIPE_BIND_CONSTANT_BUFFER);
+       r600_set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, cbuf);
+       pipe_resource_reference(&cbuf, NULL);
 }
 
 static void r600_set_polygon_stipple(struct pipe_context *ctx,
index 4bc4b97a5d77431515569670fa5e8f8045dbdaf9..6a313096f94918ed6ed5213439ad44dd51524d2d 100644 (file)
@@ -357,11 +357,11 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
        case PIPE_SHADER_VERTEX:
                rctx->vs_const_buffer.nregs = 0;
                r600_pipe_state_add_reg(&rctx->vs_const_buffer,
-                                       R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+                                       R_028180_ALU_CONST_BUFFER_SIZE_VS_0 + index * 4,
                                        ALIGN_DIVUP(buffer->width0 >> 4, 16),
                                        0xFFFFFFFF, NULL, 0);
                r600_pipe_state_add_reg(&rctx->vs_const_buffer,
-                                       R_028980_ALU_CONST_CACHE_VS_0,
+                                       R_028980_ALU_CONST_CACHE_VS_0 + index * 4,
                                        va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
                r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
@@ -563,7 +563,7 @@ static void r600_update_derived_state(struct r600_pipe_context *rctx)
        else
                user_clip_plane_enable = rctx->rasterizer->clip_plane_enable & 0x3F;
 
-       clip_dist_enable = rctx->rasterizer->clip_plane_enable & rctx->vs_shader->shader.clip_dist_write & 0xFF;
+       clip_dist_enable = rctx->rasterizer->clip_plane_enable & rctx->vs_shader->shader.clip_dist_write;
        rstate.nregs = 0;
 
        if (user_clip_plane_enable != rctx->user_clip_plane_enable) {
index ccdf82e65332fdbc0e58790ff65c16d3cdd684c7..16330d335ae67f2e68eed793bc8cfb4fef103816 100644 (file)
 #define R_038018_RESOURCE0_WORD6                     0x038018
 
 #define R_028140_ALU_CONST_BUFFER_SIZE_PS_0          0x00028140
+#define R_028144_ALU_CONST_BUFFER_SIZE_PS_1          0x00028144
 #define R_028180_ALU_CONST_BUFFER_SIZE_VS_0          0x00028180
+#define R_028184_ALU_CONST_BUFFER_SIZE_VS_1          0x00028184
 #define R_028940_ALU_CONST_CACHE_PS_0                0x00028940
+#define R_028944_ALU_CONST_CACHE_PS_1                0x00028944
 #define R_028980_ALU_CONST_CACHE_VS_0                0x00028980
+#define R_028984_ALU_CONST_CACHE_VS_1                0x00028984
 
 #define R_03CFF0_SQ_VTX_BASE_VTX_LOC                 0x03CFF0
 #define R_03CFF4_SQ_VTX_START_INST_LOC               0x03CFF4