From: Jonathan Marek Date: Sat, 4 Jan 2020 19:17:15 +0000 (-0500) Subject: etnaviv: implement UBOs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6346490a2ee5535dc75fcab34981d2a5b591dc01;p=mesa.git etnaviv: implement UBOs At the same time, use pre-HALTI2 to use address register for indirect uniform loads, since integers/LOAD instruction isn't always available. Passes all dEQP-GLES3.functional.ubo.* on GC7000L. GC3000 with an extra flush hack passes most of them, but still fails on some of the cases with many loads. Signed-off-by: Jonathan Marek Reviewed-by: Christian Gmeiner Tested-by: Marge Bot Part-of: --- diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index 3aa2b00c822..8a71f62a07d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -115,10 +115,36 @@ etna_lower_io(nir_shader *shader, struct etna_shader_variant *v) nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa)); } break; case nir_intrinsic_load_uniform: { - /* multiply by 16 and convert to int */ + /* convert indirect load_uniform to load_ubo when possible + * this is required on HALTI5+ because address register is not implemented + * address register loads also arent done optimally + */ + if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0])) + break; + + nir_intrinsic_instr *load_ubo = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); + load_ubo->num_components = intr->num_components; + nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, + load_ubo->num_components, 32, NULL); + b.cursor = nir_before_instr(instr); - nir_ssa_def *ssa = nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)); - nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa)); + load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b, + nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)), + nir_imm_int(&b, nir_intrinsic_base(intr) * 16))); + nir_builder_instr_insert(&b, &load_ubo->instr); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, + nir_src_for_ssa(&load_ubo->dest.ssa)); + nir_instr_remove(&intr->instr); + } break; + case nir_intrinsic_load_ubo: { + nir_const_value *idx = nir_src_as_const_value(intr->src[0]); + assert(idx); + /* offset index by 1, index 0 is used for converted load_uniform */ + b.cursor = nir_before_instr(instr); + nir_instr_rewrite_src(instr, &intr->src[0], + nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1))); } break; case nir_intrinsic_load_vertex_id: case nir_intrinsic_load_instance_id: @@ -593,44 +619,6 @@ etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src } } -static void -etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst, - struct etna_inst_src src, struct etna_inst_src base) -{ - /* convert float offset back to integer */ - if (c->specs->halti < 2) { - emit_inst(c, &(struct etna_inst) { - .opcode = INST_OPCODE_F2I, - .type = INST_TYPE_U32, - .dst = dst, - .src[0] = src, - }); - - emit_inst(c, &(struct etna_inst) { - .opcode = INST_OPCODE_LOAD, - .type = INST_TYPE_U32, - .dst = dst, - .src[0] = { - .use = 1, - .rgroup = INST_RGROUP_TEMP, - .reg = dst.reg, - .swiz = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1) - }, - .src[1] = base, - }); - - return; - } - - emit_inst(c, &(struct etna_inst) { - .opcode = INST_OPCODE_LOAD, - .type = INST_TYPE_U32, - .dst = dst, - .src[0] = src, - .src[1] = base, - }); -} - #define OPT(nir, pass, ...) ({ \ bool this_progress = false; \ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h index fc6a8569aa0..564db9052f4 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h @@ -94,7 +94,6 @@ static inline bool is_sysval(nir_instr *instr) #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)} #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x) #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x) -#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x) #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x) static int @@ -388,6 +387,7 @@ get_src(struct state *state, nir_src *src) case nir_intrinsic_load_input: case nir_intrinsic_load_instance_id: case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ubo: return ra_src(state, src); case nir_intrinsic_load_front_face: return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL }; @@ -586,6 +586,7 @@ dest_for_instr(nir_instr *instr) case nir_instr_type_intrinsic: { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic == nir_intrinsic_load_uniform || + intr->intrinsic == nir_intrinsic_load_ubo || intr->intrinsic == nir_intrinsic_load_input || intr->intrinsic == nir_intrinsic_load_instance_id) dest = &intr->dest; @@ -908,8 +909,8 @@ ra_assign(struct state *state, nir_shader *shader) if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic == nir_intrinsic_load_uniform) { - /* make sure there isn't any reswizzling */ + /* can't have dst swizzle or sparse writemask on UBO loads */ + if (intr->intrinsic == nir_intrinsic_load_ubo) { assert(dest == &intr->dest); if (dest->ssa.num_components == 2) c = REG_CLASS_VIRT_VEC2C; @@ -1102,9 +1103,37 @@ emit_intrinsic(struct state *state, nir_intrinsic_instr * intr) break; case nir_intrinsic_load_uniform: { unsigned dst_swiz; - hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz); - /* TODO: might have a problem with dst_swiz .. */ - emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1)); + struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz); + + /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */ + emit_inst(state->c, &(struct etna_inst) { + .opcode = INST_OPCODE_MOVAR, + .dst.write_mask = 0x1, + .src[2] = get_src(state, &intr->src[0]), + }); + emit_inst(state->c, &(struct etna_inst) { + .opcode = INST_OPCODE_MOV, + .dst = dst, + .src[2] = { + .use = 1, + .rgroup = INST_RGROUP_UNIFORM_0, + .reg = nir_intrinsic_base(intr), + .swiz = dst_swiz, + .amode = INST_AMODE_ADD_A_X, + }, + }); + } break; + case nir_intrinsic_load_ubo: { + /* TODO: if offset is of the form (x + C) then add C to the base instead */ + unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32; + unsigned dst_swiz; + emit_inst(state->c, &(struct etna_inst) { + .opcode = INST_OPCODE_LOAD, + .type = INST_TYPE_U32, + .dst = ra_dest(state, &intr->dest, &dst_swiz), + .src[0] = get_src(state, &intr->src[1]), + .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1), + }); } break; case nir_intrinsic_load_front_face: case nir_intrinsic_load_frag_coord: @@ -1402,6 +1431,8 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) .shader = shader, .impl = nir_shader_get_entrypoint(shader), }; + bool have_indirect_uniform = false; + unsigned indirect_max = 0; nir_builder b; nir_builder_init(&b, state.impl); @@ -1421,19 +1452,25 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) } break; case nir_instr_type_intrinsic: { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + /* TODO: load_ubo can also become a constant in some cases + * (at the moment it can end up emitting a LOAD with two + * uniform sources, which could be a problem on HALTI2) + */ if (intr->intrinsic != nir_intrinsic_load_uniform) break; nir_const_value *off = nir_src_as_const_value(intr->src[0]); - if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) + if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) { + have_indirect_uniform = true; + indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr); break; + } unsigned base = nir_intrinsic_base(intr); /* pre halti2 uniform offset will be float */ if (c->specs->halti < 2) - base += (unsigned) off[0].f32 / 16; + base += (unsigned) off[0].f32; else - base += off[0].u32 / 16; - + base += off[0].u32; nir_const_value value[4]; for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) { @@ -1455,6 +1492,13 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) } } + /* TODO: only emit required indirect uniform ranges */ + if (have_indirect_uniform) { + for (unsigned i = 0; i < indirect_max * 4; i++) + c->consts[i] = UNIFORM(i).u64; + state.const_count = indirect_max; + } + /* add mov for any store output using sysval/const */ nir_foreach_block(block, state.impl) { nir_foreach_instr_safe(instr, block) { diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.c b/src/gallium/drivers/etnaviv/etnaviv_context.c index 533b4033adc..c3ea0b0d4af 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_context.c +++ b/src/gallium/drivers/etnaviv/etnaviv_context.c @@ -288,8 +288,10 @@ etna_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } /* Mark constant buffers as being read */ - resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX].buffer); - resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].buffer); + for (unsigned i = 0; i < ETNA_MAX_CONST_BUF; i++) { + resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX][i].buffer); + resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT][i].buffer); + } /* Mark VBOs as being read */ foreach_bit(i, ctx->vertex_buffer.enabled_mask) { diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.h b/src/gallium/drivers/etnaviv/etnaviv_context.h index ea4de2e0372..6e3d7d1a347 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_context.h +++ b/src/gallium/drivers/etnaviv/etnaviv_context.h @@ -92,7 +92,7 @@ enum etna_immediate_contents { ETNA_IMMEDIATE_TEXRECT_SCALE_X, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, ETNA_IMMEDIATE_UBO0_ADDR, - ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + 255, + ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + ETNA_MAX_CONST_BUF - 1, }; struct etna_shader_uniform_info { @@ -164,7 +164,7 @@ struct etna_context { uint32_t active_sampler_views; uint32_t dirty_sampler_views; struct pipe_sampler_view *sampler_view[PIPE_MAX_SAMPLERS]; - struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES]; + struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES][ETNA_MAX_CONST_BUF]; struct etna_vertexbuf_state vertex_buffer; struct etna_index_buffer index_buffer; struct etna_shader_state shader; diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c index e60437d8ec9..b92cf770cf4 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c @@ -668,12 +668,12 @@ etna_emit_state(struct etna_context *ctx) if (do_uniform_flush) etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); - etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]); + etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]); if (do_uniform_flush) etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); - etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]); + etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]); if (ctx->specs.halti >= 5) { /* HALTI5 needs to be prompted to pre-fetch shaders */ @@ -687,14 +687,14 @@ etna_emit_state(struct etna_context *ctx) etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits)) - etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]); + etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]); /* ideally this cache would only be flushed if there are PS uniform changes */ if (do_uniform_flush) etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits)) - etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]); + etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]); } /**** End of state update ****/ #undef EMIT_STATE diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h index c868cafab8d..22f6d2cd9fa 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h @@ -38,6 +38,7 @@ #define ETNA_NUM_LOD (14) #define ETNA_NUM_LAYERS (6) #define ETNA_MAX_UNIFORMS (256) +#define ETNA_MAX_CONST_BUF 16 #define ETNA_MAX_PIXELPIPES 2 /* All RS operations must have width%16 = 0 */ diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index f9edf53e2c0..dcceddc4729 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -280,6 +280,10 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_cap param) { struct etna_screen *screen = etna_screen(pscreen); + bool ubo_enable = screen->specs.halti >= 2 && DBG_ENABLED(ETNA_DBG_NIR); + + if (DBG_ENABLED(ETNA_DBG_DEQP)) + ubo_enable = true; switch (shader) { case PIPE_SHADER_FRAGMENT: @@ -315,7 +319,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_TEMPS: return 64; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return DBG_ENABLED(ETNA_DBG_DEQP) ? 16 : 1; + return ubo_enable ? ETNA_MAX_CONST_BUF : 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: @@ -340,6 +344,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_PREFERRED_IR: return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + if (ubo_enable) + return 16384; /* 16384 so state tracker enables UBOs */ return shader == PIPE_SHADER_FRAGMENT ? screen->specs.max_ps_uniforms * sizeof(float[4]) : screen->specs.max_vs_uniforms * sizeof(float[4]); @@ -617,9 +623,6 @@ etna_determine_uniform_limits(struct etna_screen *screen) screen->specs.max_vs_uniforms = 168; screen->specs.max_ps_uniforms = 64; } - - if (DBG_ENABLED(ETNA_DBG_DEQP)) - screen->specs.max_ps_uniforms = 1024; } static bool diff --git a/src/gallium/drivers/etnaviv/etnaviv_state.c b/src/gallium/drivers/etnaviv/etnaviv_state.c index 12f2648c566..5de34a88717 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_state.c +++ b/src/gallium/drivers/etnaviv/etnaviv_state.c @@ -83,24 +83,19 @@ etna_set_constant_buffer(struct pipe_context *pctx, { struct etna_context *ctx = etna_context(pctx); - if (unlikely(index > 0)) { - DBG("Unhandled buffer index %i", index); - return; - } - + assert(index < ETNA_MAX_CONST_BUF); - util_copy_constant_buffer(&ctx->constant_buffer[shader], cb); + util_copy_constant_buffer(&ctx->constant_buffer[shader][index], cb); /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) return; - /* there is no support for ARB_uniform_buffer_object */ - assert(cb->buffer == NULL && cb->user_buffer != NULL); + assert(index != 0 || cb->user_buffer != NULL); if (!cb->buffer) { - struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader]; + struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader][index]; u_upload_data(pctx->const_uploader, 0, cb->buffer_size, 16, cb->user_buffer, &cb->buffer_offset, &cb->buffer); } diff --git a/src/gallium/drivers/etnaviv/etnaviv_uniforms.c b/src/gallium/drivers/etnaviv/etnaviv_uniforms.c index 22dbd6dbae1..356a55ba451 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_uniforms.c +++ b/src/gallium/drivers/etnaviv/etnaviv_uniforms.c @@ -67,6 +67,7 @@ etna_uniforms_write(const struct etna_context *ctx, const struct etna_shader_uniform_info *uinfo = &sobj->uniforms; bool frag = (sobj == ctx->shader.fs); uint32_t base = frag ? ctx->specs.ps_uniforms_offset : ctx->specs.vs_uniforms_offset; + unsigned idx; if (!uinfo->imm_count) return; @@ -94,11 +95,11 @@ etna_uniforms_write(const struct etna_context *ctx, break; case ETNA_IMMEDIATE_UBO0_ADDR ... ETNA_IMMEDIATE_UBOMAX_ADDR: - assert(uinfo->imm_contents[i] == ETNA_IMMEDIATE_UBO0_ADDR); + idx = uinfo->imm_contents[i] - ETNA_IMMEDIATE_UBO0_ADDR; etna_cmd_stream_reloc(stream, &(struct etna_reloc) { - .bo = etna_resource(cb->buffer)->bo, + .bo = etna_resource(cb[idx].buffer)->bo, .flags = ETNA_RELOC_READ, - .offset = cb->buffer_offset + val, + .offset = cb[idx].buffer_offset + val, }); break;