r600g: atomize pixel shader
authorMarek Olšák <maraeo@gmail.com>
Sat, 2 Mar 2013 16:14:51 +0000 (17:14 +0100)
committerMarek Olšák <maraeo@gmail.com>
Mon, 11 Mar 2013 12:43:36 +0000 (13:43 +0100)
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 730e51fbfc643f213e8fe9a660ab7e1205ee19b7..a3528fc3421138496deeed795543f2e9213bc840 100644 (file)
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
-static const struct r600_reg evergreen_context_reg_list[] = {
-       {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-       {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-       {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-       {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-       {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-       {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-       {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-       {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-       {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-       {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-       {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-       {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-       {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-       {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-       {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-       {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-       {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-       {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-       {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-       {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-       {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-       {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-       {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-       {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-       {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-       {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-       {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-       {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-       {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-       {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-       {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-       {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-       {GROUP_FORCE_NEW_BLOCK, 0, 0},
-       {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-       {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-       {R_0286D8_SPI_INPUT_Z, 0, 0},
-       {R_0286E0_SPI_BARYC_CNTL, 0, 0},
-       {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
-       {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-       {R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
-       {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
-static const struct r600_reg cayman_context_reg_list[] = {
-       {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-       {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-       {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-       {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-       {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-       {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-       {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-       {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-       {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-       {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-       {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-       {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-       {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-       {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-       {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-       {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-       {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-       {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-       {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-       {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-       {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-       {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-       {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-       {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-       {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-       {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-       {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-       {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-       {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-       {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-       {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-       {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-       {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-       {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-       {R_0286D8_SPI_INPUT_Z, 0, 0},
-       {R_0286E0_SPI_BARYC_CNTL, 0, 0},
-       {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
-       {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-       {R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
-       {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
 int evergreen_context_init(struct r600_context *ctx)
 {
        int r = 0;
 
        /* add blocks */
-       if (ctx->family >= CHIP_CAYMAN)
-               r = r600_context_add_block(ctx, cayman_context_reg_list,
-                                          Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
-       else
-               r = r600_context_add_block(ctx, evergreen_context_reg_list,
-                                          Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
-       if (r)
-               goto out_err;
-
        r = r600_setup_block_table(ctx);
        if (r)
                goto out_err;
index c52e4c8f074e844142a56bd3711c3f1863611069..2bdefb0d35158e10b250087a74a841bbb1e70f6b 100644 (file)
@@ -2778,7 +2778,9 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
        r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
        r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
        r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
-       r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+       r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+       r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+       r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
        r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
        r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
 
@@ -3234,7 +3236,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
        r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
        r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
        r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
-       r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+       r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+       r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+       r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
        r600_store_context_reg(cb, R_0288EC_SQ_LDS_ALLOC_PS, 0);
        r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
 
@@ -3245,17 +3249,22 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_pipe_state *rstate = &shader->rstate;
+       struct r600_command_buffer *cb = &shader->command_buffer;
        struct r600_shader *rshader = &shader->shader;
        unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
        int pos_index = -1, face_index = -1;
        int ninterp = 0;
        boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
-       unsigned spi_baryc_cntl, sid, tmp, idx = 0;
+       unsigned spi_baryc_cntl, sid, tmp, num = 0;
        unsigned z_export = 0, stencil_export = 0;
        unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
+       uint32_t spi_ps_input_cntl[32];
 
-       rstate->nregs = 0;
+       if (!cb->buf) {
+               r600_init_command_buffer(cb, 64);
+       } else {
+               cb->num_dw = 0;
+       }
 
        for (i = 0; i < rshader->ninput; i++) {
                /* evergreen NUM_INTERP only contains values interpolated into the LDS,
@@ -3277,7 +3286,6 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                sid = rshader->input[i].spi_sid;
 
                if (sid) {
-
                        tmp = S_028644_SEMANTIC(sid);
 
                        if (rshader->input[i].name == TGSI_SEMANTIC_POSITION ||
@@ -3292,13 +3300,13 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                                tmp |= S_028644_PT_SPRITE_TEX(1);
                        }
 
-                       r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + idx * 4,
-                                       tmp);
-
-                       idx++;
+                       spi_ps_input_cntl[num++] = tmp;
                }
        }
 
+       r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num);
+       r600_store_array(cb, num, spi_ps_input_cntl);
+
        for (i = 0; i < rshader->noutput; i++) {
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
                        z_export = 1;
@@ -3342,7 +3350,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
                        S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
                        S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
-               spi_input_z |= 1;
+               spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
        }
 
        spi_ps_in_control_1 = 0;
@@ -3359,29 +3367,21 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
                                  S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
 
-       r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
-                               spi_ps_in_control_0);
-       r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
-                               spi_ps_in_control_1);
-       r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
-                               0);
-       r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z);
-       r600_pipe_state_add_reg(rstate,
-                               R_0286E0_SPI_BARYC_CNTL,
-                               spi_baryc_cntl);
-
-       r600_pipe_state_add_reg_bo(rstate,
-                               R_028840_SQ_PGM_START_PS,
-                               r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
-                               shader->bo, RADEON_USAGE_READ);
-       r600_pipe_state_add_reg(rstate,
-                               R_028844_SQ_PGM_RESOURCES_PS,
-                               S_028844_NUM_GPRS(rshader->bc.ngpr) |
-                               S_028844_PRIME_CACHE_ON_DRAW(1) |
-                               S_028844_STACK_SIZE(rshader->bc.nstack));
-       r600_pipe_state_add_reg(rstate,
-                               R_02884C_SQ_PGM_EXPORTS_PS,
-                               exports_ps);
+       r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
+       r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
+       r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
+
+       r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
+       r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
+       r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps);
+
+       r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2);
+       r600_store_value(cb, r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+       r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
+                        S_028844_NUM_GPRS(rshader->bc.ngpr) |
+                        S_028844_PRIME_CACHE_ON_DRAW(1) |
+                        S_028844_STACK_SIZE(rshader->bc.nstack));
+       /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 
        shader->db_shader_control = db_shader_control;
        shader->ps_depth_export = z_export | stencil_export;
@@ -3755,6 +3755,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
        r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
        r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
+       r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 
        rctx->context.create_blend_state = evergreen_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
index 12c7ed14095efdec74147bd57cc9e35989182724..53b68a44c9d9332705ed468ff797f178bd374247 100644 (file)
 #define R_0286C0_SPI_PS_INPUT_CNTL_31                0x000286C0
 #define R_0286C8_SPI_THREAD_GROUPING                 0x000286C8
 #define R_0286D8_SPI_INPUT_Z                         0x000286D8
+#define   S_0286D8_PROVIDE_Z_TO_SPI(x)                 (((x) & 0x1) << 0)
 #define R_0286DC_SPI_FOG_CNTL                        0x000286DC
 #define R_0286E4_SPI_PS_IN_CONTROL_2                 0x000286E4
 #define R_0286E8_SPI_COMPUTE_INPUT_CNTL              0x000286E8
index a2eefa8199f9b167568ab33ab40e9ff0ea1251fa..dda38e34583d32d9957cfc1c9e1b25ad0073d17f 100644 (file)
@@ -215,49 +215,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
        return 0;
 }
 
-static const struct r600_reg r600_context_reg_list[] = {
-       {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-       {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-       {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-       {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-       {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-       {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-       {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-       {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-       {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-       {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-       {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-       {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-       {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-       {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-       {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-       {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-       {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-       {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-       {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-       {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-       {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-       {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-       {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-       {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-       {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-       {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-       {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-       {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-       {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-       {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-       {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-       {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-       {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-       {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-       {R_0286D8_SPI_INPUT_Z, 0, 0},
-       {GROUP_FORCE_NEW_BLOCK, 0, 0},
-       {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-       {GROUP_FORCE_NEW_BLOCK, 0, 0},
-       {R_028850_SQ_PGM_RESOURCES_PS, 0, 0},
-       {R_028854_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
 /* initialize */
 void r600_context_fini(struct r600_context *ctx)
 {
@@ -322,12 +279,6 @@ int r600_context_init(struct r600_context *ctx)
 {
        int r;
 
-       /* add blocks */
-       r = r600_context_add_block(ctx, r600_context_reg_list,
-                                  Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET);
-       if (r)
-               goto out_err;
-
        r = r600_setup_block_table(ctx);
        if (r)
                goto out_err;
@@ -806,6 +757,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
        ctx->db_misc_state.atom.dirty = true;
        ctx->db_state.atom.dirty = true;
        ctx->framebuffer.atom.dirty = true;
+       ctx->pixel_shader.atom.dirty = true;
        ctx->poly_offset_state.atom.dirty = true;
        ctx->vgt_state.atom.dirty = true;
        ctx->sample_mask.atom.dirty = true;
index 813012fd2deebae078bf61a9cd6c9728a807a047..ba0aaadec2e1bd5f9bcb1016c76fb5bf8aa2f602 100644 (file)
@@ -34,7 +34,7 @@
 #include "r600_public.h"
 #include "r600_resource.h"
 
-#define R600_NUM_ATOMS 39
+#define R600_NUM_ATOMS 40
 
 #define R600_TRACE_CS 0
 
@@ -805,6 +805,13 @@ static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned val
        cb->buf[cb->num_dw++] = value;
 }
 
+static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+{
+       assert(cb->num_dw+num <= cb->max_num_dw);
+       memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
+       cb->num_dw += num;
+}
+
 static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
        assert(reg < R600_CONTEXT_REG_OFFSET);
index e8d7c8469ee40ea913cdd21b1106536e669fa27b..846c1598fcf7cd90e73ea1cc797fd266b9dd5dbc 100644 (file)
@@ -2690,7 +2690,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
 void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_pipe_state *rstate = &shader->rstate;
+       struct r600_command_buffer *cb = &shader->command_buffer;
        struct r600_shader *rshader = &shader->shader;
        unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
        int pos_index = -1, face_index = -1;
@@ -2699,8 +2699,13 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        unsigned z_export = 0, stencil_export = 0;
        unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
 
-       rstate->nregs = 0;
+       if (!cb->buf) {
+               r600_init_command_buffer(cb, 64);
+       } else {
+               cb->num_dw = 0;
+       }
 
+       r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, rshader->ninput);
        for (i = 0; i < rshader->ninput; i++) {
                if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
                        pos_index = i;
@@ -2730,8 +2735,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                        tmp |= S_028644_SEL_LINEAR(1);
                }
 
-               r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4,
-                               tmp);
+               r600_store_value(cb, tmp);
        }
 
        db_shader_control = 0;
@@ -2771,7 +2775,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                                        S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
                                        S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
                                        S_0286CC_BARYC_SAMPLE_CNTL(1));
-               spi_input_z |= 1;
+               spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
        }
 
        spi_ps_in_control_1 = 0;
@@ -2784,20 +2788,22 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        if (rctx->family == CHIP_R600)
                ufi = 1;
 
-       r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0);
-       r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1);
-       r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z);
-       r600_pipe_state_add_reg_bo(rstate,
-                                  R_028840_SQ_PGM_START_PS,
-                                  0, shader->bo, RADEON_USAGE_READ);
-       r600_pipe_state_add_reg(rstate,
-                               R_028850_SQ_PGM_RESOURCES_PS,
-                               S_028850_NUM_GPRS(rshader->bc.ngpr) |
-                               S_028850_STACK_SIZE(rshader->bc.nstack) |
-                               S_028850_UNCACHED_FIRST_INST(ufi));
-       r600_pipe_state_add_reg(rstate,
-                               R_028854_SQ_PGM_EXPORTS_PS,
-                               exports_ps);
+       r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
+       r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
+       r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
+
+       r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
+
+       r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
+       r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
+                        S_028850_NUM_GPRS(rshader->bc.ngpr) |
+                        S_028850_STACK_SIZE(rshader->bc.nstack) |
+                        S_028850_UNCACHED_FIRST_INST(ufi));
+       r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
+
+       r600_store_context_reg(cb, R_028840_SQ_PGM_START_PS, 0);
+       /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+
        /* only set some bits here, the other bits are set in the dsa state */
        shader->db_shader_control = db_shader_control;
        shader->ps_depth_export = z_export | stencil_export;
@@ -3192,6 +3198,7 @@ void r600_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
        r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
        r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
+       r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 
        rctx->context.create_blend_state = r600_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
index 842d0d4aa477ed367e0fb85e72f0b4650b187996..89eb5e3343f92a22b0c6fa96b72246ace7aa43ea 100644 (file)
@@ -39,6 +39,7 @@
 
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
 {
+       assert(!cb->buf);
        cb->buf = CALLOC(1, 4 * num_dw);
        cb->max_num_dw = num_dw;
 }
@@ -707,7 +708,7 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex
  * (*dirty) is set to 1 if current variant was changed */
 static int r600_shader_select(struct pipe_context *ctx,
         struct r600_pipe_shader_selector* sel,
-        unsigned *dirty)
+        bool *dirty)
 {
        struct r600_shader_key key;
        struct r600_context *rctx = (struct r600_context *)ctx;
@@ -766,7 +767,7 @@ static int r600_shader_select(struct pipe_context *ctx,
        }
 
        if (dirty)
-               *dirty = 1;
+               *dirty = true;
 
        shader->next_variant = sel->current;
        sel->current = shader;
@@ -816,8 +817,9 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
        if (!state)
                state = rctx->dummy_pixel_shader;
 
-       rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
-       r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+       rctx->pixel_shader.shader = rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
+       rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
+       rctx->pixel_shader.atom.dirty = true;
 
        r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
 
@@ -1198,7 +1200,7 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
 static bool r600_update_derived_state(struct r600_context *rctx)
 {
        struct pipe_context * ctx = (struct pipe_context*)rctx;
-       unsigned ps_dirty = 0;
+       bool ps_dirty = false;
        bool blend_disable;
 
        if (!rctx->blitter->running) {
@@ -1227,11 +1229,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                else
                        r600_update_ps_state(ctx, rctx->ps_shader->current);
 
-               ps_dirty = 1;
+               ps_dirty = true;
        }
 
-       if (ps_dirty)
-               r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+       if (ps_dirty) {
+               rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
+               rctx->pixel_shader.atom.dirty = true;
+       }
 
        /* on R600 we stuff masks + txq info into one constant buffer */
        /* on evergreen we only need a txq info one */