From 077c448d184799e0d9ec962013ec784c6a5c1807 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:08 +0100 Subject: [PATCH] r600g: Add support for relative addressing on constant buffers. Relative addressing of constant buffers can't work properly through the kcache, since you can only address within the currently locked kcache window. Instead, this patch binds the constant buffer as a shader resource, and then explicitly fetches the constant using a vertex fetch with fetch type VTX_FETCH_NO_INDEX_OFFSET from the shader. There's probably still some room for improvement, doing the fetch right before the instruction that needs the value may not be quite optimal for example. --- src/gallium/drivers/r600/evergreen_state.c | 12 +- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 4 + src/gallium/drivers/r600/r600_shader.c | 119 +++++++++++++++++-- src/gallium/drivers/r600/r600_state.c | 9 +- src/gallium/drivers/r600/r600_state_common.c | 28 ++++- 6 files changed, 152 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bfa21997839..83ab0df9c16 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -434,7 +434,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -449,9 +450,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -460,7 +463,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 48ff95ba214..0b20b207dc6 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -370,7 +370,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONSTS: return 256; //max native parameters case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5f04fbf0992..b7ea6de3c7c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -36,6 +36,8 @@ #include "r600_shader.h" #include "r600_resource.h" +#define R600_MAX_CONST_BUFFERS 1 + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -140,7 +142,9 @@ struct r600_pipe_context { struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; + struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; /* shader information */ unsigned sprite_coord_enable; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 50f9ed6eda4..acb3ef2c4d6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,6 +28,7 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include @@ -296,6 +297,7 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; @@ -541,6 +543,55 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } } +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -554,7 +605,19 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -683,8 +746,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; @@ -1760,7 +1824,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2302,15 +2366,21 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2325,22 +2395,47 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e4382baad07..74dad450729 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -495,9 +495,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -507,7 +509,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a2b2c17e2ed..bcaf2b9e45e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -317,6 +317,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + struct r600_pipe_state *rstate; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -327,6 +328,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); + offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -337,8 +339,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + + rstate = &rctx->vs_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } break; case PIPE_SHADER_FRAGMENT: rctx->ps_const_buffer.nregs = 0; @@ -348,8 +361,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + + rstate = &rctx->ps_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } break; default: R600_ERR("unsupported %d\n", shader); -- 2.30.2