r600g: Add support for relative addressing on constant buffers.
authorHenri Verbeet <hverbeet@gmail.com>
Mon, 7 Feb 2011 14:22:08 +0000 (15:22 +0100)
committerHenri Verbeet <hverbeet@gmail.com>
Mon, 7 Feb 2011 14:22:08 +0000 (15:22 +0100)
Relative addressing of constant buffers can't work properly through the
kcache, since you can only address within the currently locked kcache window.
Instead, this patch binds the constant buffer as a shader resource, and then
explicitly fetches the constant using a vertex fetch with fetch type
VTX_FETCH_NO_INDEX_OFFSET from the shader. There's probably still some room
for improvement, doing the fetch right before the instruction that needs the
value may not be quite optimal for example.

src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index bfa219978396d0345a0d9e64bbc5399d10e39095..83ab0df9c1669965f3656f12a43a1b47a2d15a06 100644 (file)
@@ -434,7 +434,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou
 
        for (int i = 0; i < count; i++) {
                if (resource[i]) {
-                       evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+                       evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+                                                                    i + R600_MAX_CONST_BUFFERS);
                }
        }
 }
@@ -449,9 +450,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
        for (i = 0; i < count; i++) {
                if (&rctx->ps_samplers.views[i]->base != views[i]) {
                        if (resource[i])
-                               evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+                               evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+                                                                            i + R600_MAX_CONST_BUFFERS);
                        else
-                               evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+                               evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+                                                                            i + R600_MAX_CONST_BUFFERS);
 
                        pipe_sampler_view_reference(
                                (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -460,7 +463,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
        }
        for (i = count; i < NUM_TEX_UNITS; i++) {
                if (rctx->ps_samplers.views[i]) {
-                       evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+                       evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+                                                                    i + R600_MAX_CONST_BUFFERS);
                        pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
                }
        }
index 48ff95ba214afc795325e7eace79eadda6e19c5a..0b20b207dc6a85c189c2ec684b0ba4c222e06c8b 100644 (file)
@@ -370,7 +370,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_MAX_CONSTS:
                return 256; //max native parameters
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return 1;
+               return R600_MAX_CONST_BUFFERS;
        case PIPE_SHADER_CAP_MAX_PREDS:
                return 0; /* FIXME */
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
index 5f04fbf09920bc1fb4bdb9910f7772bc63f2a5ce..b7ea6de3c7c6608db52dfa50a82c997ab7076db2 100644 (file)
@@ -36,6 +36,8 @@
 #include "r600_shader.h"
 #include "r600_resource.h"
 
+#define R600_MAX_CONST_BUFFERS 1
+
 enum r600_pipe_state_id {
        R600_PIPE_STATE_BLEND = 0,
        R600_PIPE_STATE_BLEND_COLOR,
@@ -140,7 +142,9 @@ struct r600_pipe_context {
        struct r600_pipe_shader         *ps_shader;
        struct r600_pipe_shader         *vs_shader;
        struct r600_pipe_state          vs_const_buffer;
+       struct r600_pipe_state          vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
        struct r600_pipe_state          ps_const_buffer;
+       struct r600_pipe_state          ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
        struct r600_pipe_rasterizer     *rasterizer;
        /* shader information */
        unsigned                        sprite_coord_enable;
index 50f9ed6eda4441c0b50dd780874f00642c6e0fee..acb3ef2c4d610f53f57c5ab406aa72d4cda4c144 100644 (file)
@@ -28,6 +28,7 @@
 #include "r600_pipe.h"
 #include "r600_asm.h"
 #include "r600_sq.h"
+#include "r600_formats.h"
 #include "r600_opcodes.h"
 #include "r600d.h"
 #include <stdio.h>
@@ -296,6 +297,7 @@ struct r600_shader_ctx {
        unsigned                                type;
        unsigned                                file_offset[TGSI_FILE_COUNT];
        unsigned                                temp_reg;
+       unsigned                                ar_reg;
        struct r600_shader_tgsi_instruction     *inst_info;
        struct r600_bc                          *bc;
        struct r600_shader                      *shader;
@@ -541,6 +543,55 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
        }
 }
 
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+       struct r600_bc_vtx vtx;
+       unsigned int ar_reg;
+       int r;
+
+       if (offset) {
+               struct r600_bc_alu alu;
+
+               memset(&alu, 0, sizeof(alu));
+
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+               alu.src[0].sel = ctx->ar_reg;
+
+               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[1].value = offset;
+
+               alu.dst.sel = dst_reg;
+               alu.dst.write = 1;
+               alu.last = 1;
+
+               if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+                       return r;
+
+               ar_reg = dst_reg;
+       } else {
+               ar_reg = ctx->ar_reg;
+       }
+
+       memset(&vtx, 0, sizeof(vtx));
+       vtx.fetch_type = 2;             /* VTX_FETCH_NO_INDEX_OFFSET */
+       vtx.src_gpr = ar_reg;
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = dst_reg;
+       vtx.dst_sel_x = 0;              /* SEL_X */
+       vtx.dst_sel_y = 1;              /* SEL_Y */
+       vtx.dst_sel_z = 2;              /* SEL_Z */
+       vtx.dst_sel_w = 3;              /* SEL_W */
+       vtx.data_format = FMT_32_32_32_32_FLOAT;
+       vtx.num_format_all = 2;         /* NUM_FORMAT_SCALED */
+       vtx.format_comp_all = 1;        /* FORMAT_COMP_SIGNED */
+       vtx.srf_mode_all = 1;           /* SRF_MODE_NO_ZERO */
+
+       if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+               return r;
+
+       return 0;
+}
+
 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -554,7 +605,19 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
                tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
        }
        for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
-               if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+               if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+                       continue;
+               }
+
+               if (ctx->src[i].rel) {
+                       int treg = r600_get_temp(ctx);
+                       if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+                               return r;
+
+                       ctx->src[i].sel = treg;
+                       ctx->src[i].rel = 0;
+                       j--;
+               } else if (j > 0) {
                        int treg = r600_get_temp(ctx);
                        for (k = 0; k < 4; k++) {
                                memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -683,8 +746,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
        ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
 
        ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
-       ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+       ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
                        ctx.info.file_count[TGSI_FILE_TEMPORARY];
+       ctx.temp_reg = ctx.ar_reg + 1;
 
        ctx.nliterals = 0;
        ctx.literals = NULL;
@@ -1760,7 +1824,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        memset(&tex, 0, sizeof(struct r600_bc_tex));
        tex.inst = opcode;
        tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
-       tex.resource_id = tex.sampler_id;
+       tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
        tex.src_gpr = src_gpr;
        tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
        tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -2302,15 +2366,21 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 
        r600_bc_src(&alu.src[0], &ctx->src[0], 0);
        alu.last = 1;
-       alu.dst.chan = 0;
-       alu.dst.sel = ctx->temp_reg;
+       alu.dst.sel = ctx->ar_reg;
        alu.dst.write = 1;
        r = r600_bc_add_alu(ctx->bc, &alu);
        if (r)
                return r;
+
+       /* TODO: Note that the MOVA can be avoided if we never use AR for
+        * indexing non-CB registers in the current ALU clause. Similarly, we
+        * need to load AR from ar_reg again if we started a new clause
+        * between ARL and AR usage. The easy way to do that is to remove
+        * the MOVA here, and load it for the first AR access after ar_reg
+        * has been modified in each clause. */
        memset(&alu, 0, sizeof(struct r600_bc_alu));
        alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
-       alu.src[0].sel = ctx->temp_reg;
+       alu.src[0].sel = ctx->ar_reg;
        alu.src[0].chan = 0;
        alu.last = 1;
        r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2325,22 +2395,47 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
        struct r600_bc_alu alu;
        int r;
 
-       memset(&alu, 0, sizeof(struct r600_bc_alu));
-
        switch (inst->Instruction.Opcode) {
        case TGSI_OPCODE_ARL:
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+               memset(&alu, 0, sizeof(alu));
+               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+               alu.dst.sel = ctx->ar_reg;
+               alu.dst.write = 1;
+               alu.last = 1;
+
+               if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+                       return r;
+
+               memset(&alu, 0, sizeof(alu));
+               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+               alu.src[0].sel = ctx->ar_reg;
+               alu.dst.sel = ctx->ar_reg;
+               alu.dst.write = 1;
+               alu.last = 1;
+
+               if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+                       return r;
                break;
        case TGSI_OPCODE_ARR:
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+               memset(&alu, 0, sizeof(alu));
+               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+               alu.dst.sel = ctx->ar_reg;
+               alu.dst.write = 1;
+               alu.last = 1;
+
+               if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+                       return r;
                break;
        default:
                assert(0);
                return -1;
        }
 
-       r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-
+       memset(&alu, 0, sizeof(alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+       alu.src[0].sel = ctx->ar_reg;
        alu.last = 1;
 
        r = r600_bc_add_alu(ctx->bc, &alu);
index e4382baad0718372cbc013298021d8251eca3a17..74dad450729f25e6d8c3953bb878059cbba4c5ea 100644 (file)
@@ -495,9 +495,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
        for (i = 0; i < count; i++) {
                if (&rctx->ps_samplers.views[i]->base != views[i]) {
                        if (resource[i])
-                               r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+                               r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+                                                                       i + R600_MAX_CONST_BUFFERS);
                        else
-                               r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+                               r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+                                                                       i + R600_MAX_CONST_BUFFERS);
 
                        pipe_sampler_view_reference(
                                (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -507,7 +509,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
        }
        for (i = count; i < NUM_TEX_UNITS; i++) {
                if (rctx->ps_samplers.views[i]) {
-                       r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+                       r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+                                                               i + R600_MAX_CONST_BUFFERS);
                        pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
                }
        }
index a2b2c17e2ed0fab8e429df83265ed2f797e2f56b..bcaf2b9e45e3d26cbbeb07fbbe000acad8c269b3 100644 (file)
@@ -317,6 +317,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 {
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
+       struct r600_pipe_state *rstate;
        uint32_t offset;
 
        /* Note that the state tracker can unbind constant buffers by
@@ -327,6 +328,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
        }
 
        r600_upload_const_buffer(rctx, &rbuffer, &offset);
+       offset += r600_bo_offset(rbuffer->r.bo);
 
        switch (shader) {
        case PIPE_SHADER_VERTEX:
@@ -337,8 +339,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                                        0xFFFFFFFF, NULL);
                r600_pipe_state_add_reg(&rctx->vs_const_buffer,
                                        R_028980_ALU_CONST_CACHE_VS_0,
-                                       (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+                                       offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
                r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+
+               rstate = &rctx->vs_const_buffer_resource[index];
+               rstate->id = R600_PIPE_STATE_RESOURCE;
+               rstate->nregs = 0;
+               if (rctx->family >= CHIP_CEDAR) {
+                       evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+                       evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+               } else {
+                       r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+                       r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+               }
                break;
        case PIPE_SHADER_FRAGMENT:
                rctx->ps_const_buffer.nregs = 0;
@@ -348,8 +361,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                                        0xFFFFFFFF, NULL);
                r600_pipe_state_add_reg(&rctx->ps_const_buffer,
                                        R_028940_ALU_CONST_CACHE_PS_0,
-                                       (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+                                       offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
                r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+
+               rstate = &rctx->ps_const_buffer_resource[index];
+               rstate->id = R600_PIPE_STATE_RESOURCE;
+               rstate->nregs = 0;
+               if (rctx->family >= CHIP_CEDAR) {
+                       evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+                       evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+               } else {
+                       r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+                       r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+               }
                break;
        default:
                R600_ERR("unsupported %d\n", shader);