From 1de72dfc8a2014069edd1b3d3d46dad478d0680a Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 31 Mar 2015 11:51:00 -0400 Subject: [PATCH] freedreno/a3xx: add UBO support Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 1 + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 67 ++++++++----- .../drivers/freedreno/freedreno_screen.c | 4 +- src/gallium/drivers/freedreno/ir3/ir3.c | 2 +- .../drivers/freedreno/ir3/ir3_compiler.c | 93 +++++++++++++++++-- .../drivers/freedreno/ir3/ir3_legalize.c | 2 +- .../drivers/freedreno/ir3/ir3_shader.c | 2 +- 7 files changed, 133 insertions(+), 38 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 2378e3d5240..f69266a2998 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -50,6 +50,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_gpu_shader_fp64 on nvc0, softpipe
  • GL_ARB_instanced_arrays on freedreno
  • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
  • +
  • GL_ARB_uniform_buffer_object on freedreno
  • GL_EXT_draw_buffers2 on freedreno
  • GL_ARB_clip_control on i965
  • diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 5fd31f50daf..f961fc07585 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -87,11 +87,12 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader) + struct ir3_shader_variant *shader, + bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t first_immediate; - uint32_t base = 0; + uint32_t max_const; + int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. @@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring, * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ - first_immediate = MIN2(shader->first_immediate, shader->constlen); + max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ - while (enabled_mask) { - unsigned index = ffs(enabled_mask) - 1; + if (enabled_mask & 1) { + const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); - /* gallium could leave const buffers bound above what the - * current shader uses.. don't let that confuse us. + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: */ - if (base >= (4 * first_immediate)) - break; + size = MIN2(size, 4 * max_const); - if (constbuf->dirty_mask & (1 << index)) { - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, (4 * first_immediate) - base); - fd3_emit_constant(ring, sb, base, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); + if (size && constbuf->dirty_mask & (1 << index)) { + fd3_emit_constant(ring, sb, 0, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } - base += size; enabled_mask &= ~(1 << index); } + if (shader->constlen > shader->first_driver_param) { + uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); + /* emit ubos: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(params * 2)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 1; i <= params * 4; i++) { + struct pipe_constant_buffer *cb = &constbuf->cb[i]; + assert(!cb->user_buffer); + if ((enabled_mask & (1 << i)) && cb->buffer) + OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); + else + OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); + } + } + /* emit shader immediates: */ - if (shader) { + if (shader && emit_immediates) { int size = shader->immediates_count; - base = shader->first_immediate; + uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -619,11 +635,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], - (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); + vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); } } @@ -635,8 +651,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 0, 0 }; - if (vp->constlen > vp->first_driver_param) { - fd3_emit_constant(ring, SB_VERT_SHADER, vp->first_driver_param * 4, + if (vp->constlen >= vp->first_driver_param + 4) { + fd3_emit_constant(ring, SB_VERT_SHADER, + (vp->first_driver_param + 4) * 4, 0, 4, vertex_params, NULL); } } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index fe724442c07..66fe0e571cf 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -356,9 +356,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, * split between VS and FS. Use lower limit of 256 to * avoid getting into impossible situations: */ - return ((is_a3xx(screen) || is_a4xx(screen)) ? 256 : 64) * sizeof(float[4]); + return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return is_a3xx(screen) ? 16 : 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* nothing uses this */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 42a45776211..284c6559eb1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -487,7 +487,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, iassert(instr->regs_count >= 2); - if (instr->cat6.offset) { + if (instr->cat6.offset || instr->opc == OPC_LDG) { instr_cat6a_t *cat6a = ptr; cat6->has_off = true; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 38df8b5fdf4..43f4c955ac0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -151,6 +151,7 @@ static void vectorize(struct ir3_compile_context *ctx, static void create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src); static type_t get_ftype(struct ir3_compile_context *ctx); +static type_t get_utype(struct ir3_compile_context *ctx); static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i) { @@ -252,7 +253,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, * the assembler what the max addr reg value can be: */ if (info->indirect_files & FM(CONSTANT)) - so->constlen = ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; + so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1); i = 0; i += setup_arrays(ctx, TGSI_FILE_INPUT, i); @@ -261,12 +262,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, /* any others? we don't track arrays for const..*/ /* Immediates go after constants: */ - if (so->type == SHADER_VERTEX) { - so->first_driver_param = info->file_max[TGSI_FILE_CONSTANT] + 1; - so->first_immediate = so->first_driver_param + 1; - } else { - so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; - } + so->first_immediate = so->first_driver_param = + info->const_file_max[0] + 1; + /* 1 unit for the vertex id base */ + if (so->type == SHADER_VERTEX) + so->first_immediate++; + /* 4 (vec4) units for ubo base addresses */ + so->first_immediate += 4; ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); ret = tgsi_parse_init(&ctx->parser, ctx->tokens); @@ -717,6 +719,80 @@ ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg, reg->offset = regid(off, chan); instr = array_fanin(ctx, aid, src->File); + } else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) { + const struct tgsi_full_src_register *fsrc = (const void *)src; + struct ir3_instruction *temp = NULL; + int ubo_regid = regid(ctx->so->first_driver_param, 0) + + fsrc->Dimension.Index - 1; + int offset = 0; + + /* We don't handle indirect UBO array accesses... yet. */ + compile_assert(ctx, !fsrc->Dimension.Indirect); + /* UBOs start at index 1. */ + compile_assert(ctx, fsrc->Dimension.Index > 0); + + if (src->Indirect) { + /* In case of an indirect index, it will have been loaded into an + * address register. There will be a sequence of + * + * shl.b x, val, 2 + * mova a0, x + * + * We rely on this sequence to get the original val out and shift + * it by 4, since we're dealing in vec4 units. + */ + compile_assert(ctx, ctx->block->address); + compile_assert(ctx, ctx->block->address->regs[1]->instr->opc == + OPC_SHL_B); + + temp = instr = instr_create(ctx, 2, OPC_SHL_B); + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr = + ctx->block->address->regs[1]->instr->regs[1]->instr; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; + } else if (src->Index >= 64) { + /* Otherwise it's a plain index (in vec4 units). Move it into a + * register. + */ + temp = instr = instr_create(ctx, 1, 0); + instr->cat1.src_type = get_utype(ctx); + instr->cat1.dst_type = get_utype(ctx); + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16; + } else { + /* The offset is small enough to fit into the ldg instruction + * directly. + */ + offset = src->Index * 16; + } + + if (temp) { + /* If there was an offset (most common), add it to the buffer + * address. + */ + instr = instr_create(ctx, 2, OPC_ADD_S); + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp; + ir3_reg_create(instr, ubo_regid, IR3_REG_CONST); + } else { + /* Otherwise just load the buffer address directly */ + instr = instr_create(ctx, 1, 0); + instr->cat1.src_type = get_utype(ctx); + instr->cat1.dst_type = get_utype(ctx); + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, ubo_regid, IR3_REG_CONST); + } + + temp = instr; + + instr = instr_create(ctx, 6, OPC_LDG); + instr->cat6.type = TYPE_U32; + instr->cat6.offset = offset + chan * 4; + ir3_reg_create(instr, 0, 0); + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; + + reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST); } else { /* normal case (not relative addressed GPR) */ instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan)); @@ -3183,7 +3259,8 @@ decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) instr->cat1.src_type = get_stype(ctx); instr->cat1.dst_type = get_stype(ctx); ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, regid(so->first_driver_param, 0), IR3_REG_CONST); + ir3_reg_create(instr, regid(so->first_driver_param + 4, 0), + IR3_REG_CONST); break; case TGSI_SEMANTIC_INSTANCEID: ctx->instance_id = instr = create_input(ctx->block, NULL, r); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index db501e7a51c..2455f7e4efc 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -175,7 +175,7 @@ static void legalize(struct ir3_legalize_ctx *ctx) /* both tex/sfu appear to not always immediately consume * their src register(s): */ - if (is_tex(n) || is_sfu(n)) { + if (is_tex(n) || is_sfu(n) || is_mem(n)) { foreach_src(reg, n) { if (reg_gpr(reg)) regmask_set(&needs_ss_war, reg); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index b1dff381813..0cf357e17d8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -116,7 +116,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id) * the compiler (to worst-case value) since we don't know in * the assembler what the max addr reg value can be: */ - v->constlen = MAX2(v->constlen, v->info.max_const + 1); + v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1)); fixup_regfootprint(v); -- 2.30.2