From 65cbf895670d2afb44d320fcc9d607f3c6c582ef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 2 Mar 2013 17:14:51 +0100 Subject: [PATCH] r600g: atomize pixel shader Reviewed-by: Jerome Glisse --- .../drivers/r600/evergreen_hw_context.c | 96 ------------------- src/gallium/drivers/r600/evergreen_state.c | 69 ++++++------- src/gallium/drivers/r600/evergreend.h | 1 + src/gallium/drivers/r600/r600_hw_context.c | 50 +--------- src/gallium/drivers/r600/r600_pipe.h | 9 +- src/gallium/drivers/r600/r600_state.c | 45 +++++---- src/gallium/drivers/r600/r600_state_common.c | 20 ++-- 7 files changed, 83 insertions(+), 207 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 730e51fbfc6..a3528fc3421 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -28,107 +28,11 @@ #include "util/u_memory.h" #include "util/u_math.h" -static const struct r600_reg evergreen_context_reg_list[] = { - {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, - {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, - {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, - {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, - {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, - {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, - {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, - {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, - {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, - {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, - {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, - {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, - {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, - {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, - {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, - {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, - {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, - {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, - {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, - {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, - {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, - {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, - {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, - {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, - {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, - {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, - {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, - {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, - {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, - {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, - {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, - {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, - {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, - {R_0286D8_SPI_INPUT_Z, 0, 0}, - {R_0286E0_SPI_BARYC_CNTL, 0, 0}, - {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, - {R_028844_SQ_PGM_RESOURCES_PS, 0, 0}, - {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0}, -}; - -static const struct r600_reg cayman_context_reg_list[] = { - {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, - {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, - {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, - {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, - {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, - {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, - {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, - {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, - {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, - {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, - {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, - {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, - {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, - {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, - {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, - {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, - {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, - {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, - {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, - {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, - {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, - {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, - {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, - {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, - {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, - {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, - {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, - {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, - {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, - {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, - {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, - {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, - {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, - {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, - {R_0286D8_SPI_INPUT_Z, 0, 0}, - {R_0286E0_SPI_BARYC_CNTL, 0, 0}, - {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, - {R_028844_SQ_PGM_RESOURCES_PS, 0, 0}, - {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0}, -}; - int evergreen_context_init(struct r600_context *ctx) { int r = 0; /* add blocks */ - if (ctx->family >= CHIP_CAYMAN) - r = r600_context_add_block(ctx, cayman_context_reg_list, - Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); - else - r = r600_context_add_block(ctx, evergreen_context_reg_list, - Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); - if (r) - goto out_err; - r = r600_setup_block_table(ctx); if (r) goto out_err; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c52e4c8f074..2bdefb0d351 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2778,7 +2778,9 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); - r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0); + r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); + r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ + r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0); r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); @@ -3234,7 +3236,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); - r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0); + r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); + r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ + r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ r600_store_context_reg(cb, R_0288EC_SQ_LDS_ALLOC_PS, 0); r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0); @@ -3245,17 +3249,22 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; + struct r600_command_buffer *cb = &shader->command_buffer; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; - unsigned spi_baryc_cntl, sid, tmp, idx = 0; + unsigned spi_baryc_cntl, sid, tmp, num = 0; unsigned z_export = 0, stencil_export = 0; unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; + uint32_t spi_ps_input_cntl[32]; - rstate->nregs = 0; + if (!cb->buf) { + r600_init_command_buffer(cb, 64); + } else { + cb->num_dw = 0; + } for (i = 0; i < rshader->ninput; i++) { /* evergreen NUM_INTERP only contains values interpolated into the LDS, @@ -3277,7 +3286,6 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader sid = rshader->input[i].spi_sid; if (sid) { - tmp = S_028644_SEMANTIC(sid); if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || @@ -3292,13 +3300,13 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader tmp |= S_028644_PT_SPRITE_TEX(1); } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + idx * 4, - tmp); - - idx++; + spi_ps_input_cntl[num++] = tmp; } } + r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num); + r600_store_array(cb, num, spi_ps_input_cntl); + for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) z_export = 1; @@ -3342,7 +3350,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); - spi_input_z |= 1; + spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); } spi_ps_in_control_1 = 0; @@ -3359,29 +3367,21 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | S_0286E0_LINEAR_CENTROID_ENA(have_centroid); - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, - spi_ps_in_control_0); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - spi_ps_in_control_1); - r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, - 0); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z); - r600_pipe_state_add_reg(rstate, - R_0286E0_SPI_BARYC_CNTL, - spi_baryc_cntl); - - r600_pipe_state_add_reg_bo(rstate, - R_028840_SQ_PGM_START_PS, - r600_resource_va(ctx->screen, (void *)shader->bo) >> 8, - shader->bo, RADEON_USAGE_READ); - r600_pipe_state_add_reg(rstate, - R_028844_SQ_PGM_RESOURCES_PS, - S_028844_NUM_GPRS(rshader->bc.ngpr) | - S_028844_PRIME_CACHE_ON_DRAW(1) | - S_028844_STACK_SIZE(rshader->bc.nstack)); - r600_pipe_state_add_reg(rstate, - R_02884C_SQ_PGM_EXPORTS_PS, - exports_ps); + r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); + r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ + r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */ + + r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); + r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z); + r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps); + + r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2); + r600_store_value(cb, r600_resource_va(ctx->screen, (void *)shader->bo) >> 8); + r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ + S_028844_NUM_GPRS(rshader->bc.ngpr) | + S_028844_PRIME_CACHE_ON_DRAW(1) | + S_028844_STACK_SIZE(rshader->bc.nstack)); + /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ shader->db_shader_control = db_shader_control; shader->ps_depth_export = z_export | stencil_export; @@ -3755,6 +3755,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); + r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 12c7ed14095..53b68a44c9d 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1798,6 +1798,7 @@ #define R_0286C0_SPI_PS_INPUT_CNTL_31 0x000286C0 #define R_0286C8_SPI_THREAD_GROUPING 0x000286C8 #define R_0286D8_SPI_INPUT_Z 0x000286D8 +#define S_0286D8_PROVIDE_Z_TO_SPI(x) (((x) & 0x1) << 0) #define R_0286DC_SPI_FOG_CNTL 0x000286DC #define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4 #define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8 diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index a2eefa8199f..dda38e34583 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -215,49 +215,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, return 0; } -static const struct r600_reg r600_context_reg_list[] = { - {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, - {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, - {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, - {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, - {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, - {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, - {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, - {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, - {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, - {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, - {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, - {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, - {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, - {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, - {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, - {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, - {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, - {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, - {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, - {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, - {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, - {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, - {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, - {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, - {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, - {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, - {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, - {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, - {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, - {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, - {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, - {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, - {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, - {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, - {R_0286D8_SPI_INPUT_Z, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028850_SQ_PGM_RESOURCES_PS, 0, 0}, - {R_028854_SQ_PGM_EXPORTS_PS, 0, 0}, -}; - /* initialize */ void r600_context_fini(struct r600_context *ctx) { @@ -322,12 +279,6 @@ int r600_context_init(struct r600_context *ctx) { int r; - /* add blocks */ - r = r600_context_add_block(ctx, r600_context_reg_list, - Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET); - if (r) - goto out_err; - r = r600_setup_block_table(ctx); if (r) goto out_err; @@ -806,6 +757,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->db_misc_state.atom.dirty = true; ctx->db_state.atom.dirty = true; ctx->framebuffer.atom.dirty = true; + ctx->pixel_shader.atom.dirty = true; ctx->poly_offset_state.atom.dirty = true; ctx->vgt_state.atom.dirty = true; ctx->sample_mask.atom.dirty = true; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 813012fd2de..ba0aaadec2e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -34,7 +34,7 @@ #include "r600_public.h" #include "r600_resource.h" -#define R600_NUM_ATOMS 39 +#define R600_NUM_ATOMS 40 #define R600_TRACE_CS 0 @@ -805,6 +805,13 @@ static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned val cb->buf[cb->num_dw++] = value; } +static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) +{ + assert(cb->num_dw+num <= cb->max_num_dw); + memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0])); + cb->num_dw += num; +} + static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg < R600_CONTEXT_REG_OFFSET); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e8d7c8469ee..846c1598fcf 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2690,7 +2690,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; + struct r600_command_buffer *cb = &shader->command_buffer; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; int pos_index = -1, face_index = -1; @@ -2699,8 +2699,13 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha unsigned z_export = 0, stencil_export = 0; unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; - rstate->nregs = 0; + if (!cb->buf) { + r600_init_command_buffer(cb, 64); + } else { + cb->num_dw = 0; + } + r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, rshader->ninput); for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) pos_index = i; @@ -2730,8 +2735,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha tmp |= S_028644_SEL_LINEAR(1); } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, - tmp); + r600_store_value(cb, tmp); } db_shader_control = 0; @@ -2771,7 +2775,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | S_0286CC_BARYC_SAMPLE_CNTL(1)); - spi_input_z |= 1; + spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); } spi_ps_in_control_1 = 0; @@ -2784,20 +2788,22 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha if (rctx->family == CHIP_R600) ufi = 1; - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z); - r600_pipe_state_add_reg_bo(rstate, - R_028840_SQ_PGM_START_PS, - 0, shader->bo, RADEON_USAGE_READ); - r600_pipe_state_add_reg(rstate, - R_028850_SQ_PGM_RESOURCES_PS, - S_028850_NUM_GPRS(rshader->bc.ngpr) | - S_028850_STACK_SIZE(rshader->bc.nstack) | - S_028850_UNCACHED_FIRST_INST(ufi)); - r600_pipe_state_add_reg(rstate, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps); + r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); + r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ + r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */ + + r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z); + + r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2); + r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/ + S_028850_NUM_GPRS(rshader->bc.ngpr) | + S_028850_STACK_SIZE(rshader->bc.nstack) | + S_028850_UNCACHED_FIRST_INST(ufi)); + r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */ + + r600_store_context_reg(cb, R_028840_SQ_PGM_START_PS, 0); + /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ + /* only set some bits here, the other bits are set in the dsa state */ shader->db_shader_control = db_shader_control; shader->ps_depth_export = z_export | stencil_export; @@ -3192,6 +3198,7 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); + r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 842d0d4aa47..89eb5e3343f 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -39,6 +39,7 @@ void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) { + assert(!cb->buf); cb->buf = CALLOC(1, 4 * num_dw); cb->max_num_dw = num_dw; } @@ -707,7 +708,7 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex * (*dirty) is set to 1 if current variant was changed */ static int r600_shader_select(struct pipe_context *ctx, struct r600_pipe_shader_selector* sel, - unsigned *dirty) + bool *dirty) { struct r600_shader_key key; struct r600_context *rctx = (struct r600_context *)ctx; @@ -766,7 +767,7 @@ static int r600_shader_select(struct pipe_context *ctx, } if (dirty) - *dirty = 1; + *dirty = true; shader->next_variant = sel->current; sel->current = shader; @@ -816,8 +817,9 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state) if (!state) state = rctx->dummy_pixel_shader; - rctx->ps_shader = (struct r600_pipe_shader_selector *)state; - r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + rctx->pixel_shader.shader = rctx->ps_shader = (struct r600_pipe_shader_selector *)state; + rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw; + rctx->pixel_shader.atom.dirty = true; r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo); @@ -1198,7 +1200,7 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s static bool r600_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; - unsigned ps_dirty = 0; + bool ps_dirty = false; bool blend_disable; if (!rctx->blitter->running) { @@ -1227,11 +1229,13 @@ static bool r600_update_derived_state(struct r600_context *rctx) else r600_update_ps_state(ctx, rctx->ps_shader->current); - ps_dirty = 1; + ps_dirty = true; } - if (ps_dirty) - r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + if (ps_dirty) { + rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw; + rctx->pixel_shader.atom.dirty = true; + } /* on R600 we stuff masks + txq info into one constant buffer */ /* on evergreen we only need a txq info one */ -- 2.30.2