nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa));
} break;
case nir_intrinsic_load_uniform: {
- /* multiply by 16 and convert to int */
+ /* convert indirect load_uniform to load_ubo when possible
+ * this is required on HALTI5+ because address register is not implemented
+ * address register loads also arent done optimally
+ */
+ if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0]))
+ break;
+
+ nir_intrinsic_instr *load_ubo =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
+ load_ubo->num_components = intr->num_components;
+ nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
+ load_ubo->num_components, 32, NULL);
+
b.cursor = nir_before_instr(instr);
- nir_ssa_def *ssa = nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16));
- nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
+ load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b,
+ nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)),
+ nir_imm_int(&b, nir_intrinsic_base(intr) * 16)));
+ nir_builder_instr_insert(&b, &load_ubo->instr);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_src_for_ssa(&load_ubo->dest.ssa));
+ nir_instr_remove(&intr->instr);
+ } break;
+ case nir_intrinsic_load_ubo: {
+ nir_const_value *idx = nir_src_as_const_value(intr->src[0]);
+ assert(idx);
+ /* offset index by 1, index 0 is used for converted load_uniform */
+ b.cursor = nir_before_instr(instr);
+ nir_instr_rewrite_src(instr, &intr->src[0],
+ nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1)));
} break;
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
}
}
-static void
-etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
- struct etna_inst_src src, struct etna_inst_src base)
-{
- /* convert float offset back to integer */
- if (c->specs->halti < 2) {
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_F2I,
- .type = INST_TYPE_U32,
- .dst = dst,
- .src[0] = src,
- });
-
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LOAD,
- .type = INST_TYPE_U32,
- .dst = dst,
- .src[0] = {
- .use = 1,
- .rgroup = INST_RGROUP_TEMP,
- .reg = dst.reg,
- .swiz = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1)
- },
- .src[1] = base,
- });
-
- return;
- }
-
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LOAD,
- .type = INST_TYPE_U32,
- .dst = dst,
- .src[0] = src,
- .src[1] = base,
- });
-}
-
#define OPT(nir, pass, ...) ({ \
bool this_progress = false; \
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
#define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
#define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
-#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x)
#define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
static int
case nir_intrinsic_load_input:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_ubo:
return ra_src(state, src);
case nir_intrinsic_load_front_face:
return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic == nir_intrinsic_load_uniform ||
+ intr->intrinsic == nir_intrinsic_load_ubo ||
intr->intrinsic == nir_intrinsic_load_input ||
intr->intrinsic == nir_intrinsic_load_instance_id)
dest = &intr->dest;
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic == nir_intrinsic_load_uniform) {
- /* make sure there isn't any reswizzling */
+ /* can't have dst swizzle or sparse writemask on UBO loads */
+ if (intr->intrinsic == nir_intrinsic_load_ubo) {
assert(dest == &intr->dest);
if (dest->ssa.num_components == 2)
c = REG_CLASS_VIRT_VEC2C;
break;
case nir_intrinsic_load_uniform: {
unsigned dst_swiz;
- hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
- /* TODO: might have a problem with dst_swiz .. */
- emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1));
+ struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
+
+ /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
+ emit_inst(state->c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MOVAR,
+ .dst.write_mask = 0x1,
+ .src[2] = get_src(state, &intr->src[0]),
+ });
+ emit_inst(state->c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MOV,
+ .dst = dst,
+ .src[2] = {
+ .use = 1,
+ .rgroup = INST_RGROUP_UNIFORM_0,
+ .reg = nir_intrinsic_base(intr),
+ .swiz = dst_swiz,
+ .amode = INST_AMODE_ADD_A_X,
+ },
+ });
+ } break;
+ case nir_intrinsic_load_ubo: {
+ /* TODO: if offset is of the form (x + C) then add C to the base instead */
+ unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
+ unsigned dst_swiz;
+ emit_inst(state->c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_LOAD,
+ .type = INST_TYPE_U32,
+ .dst = ra_dest(state, &intr->dest, &dst_swiz),
+ .src[0] = get_src(state, &intr->src[1]),
+ .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
+ });
} break;
case nir_intrinsic_load_front_face:
case nir_intrinsic_load_frag_coord:
.shader = shader,
.impl = nir_shader_get_entrypoint(shader),
};
+ bool have_indirect_uniform = false;
+ unsigned indirect_max = 0;
nir_builder b;
nir_builder_init(&b, state.impl);
} break;
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ /* TODO: load_ubo can also become a constant in some cases
+ * (at the moment it can end up emitting a LOAD with two
+ * uniform sources, which could be a problem on HALTI2)
+ */
if (intr->intrinsic != nir_intrinsic_load_uniform)
break;
nir_const_value *off = nir_src_as_const_value(intr->src[0]);
- if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT)
+ if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
+ have_indirect_uniform = true;
+ indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
break;
+ }
unsigned base = nir_intrinsic_base(intr);
/* pre halti2 uniform offset will be float */
if (c->specs->halti < 2)
- base += (unsigned) off[0].f32 / 16;
+ base += (unsigned) off[0].f32;
else
- base += off[0].u32 / 16;
-
+ base += off[0].u32;
nir_const_value value[4];
for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
}
}
+ /* TODO: only emit required indirect uniform ranges */
+ if (have_indirect_uniform) {
+ for (unsigned i = 0; i < indirect_max * 4; i++)
+ c->consts[i] = UNIFORM(i).u64;
+ state.const_count = indirect_max;
+ }
+
/* add mov for any store output using sysval/const */
nir_foreach_block(block, state.impl) {
nir_foreach_instr_safe(instr, block) {
}
/* Mark constant buffers as being read */
- resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX].buffer);
- resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].buffer);
+ for (unsigned i = 0; i < ETNA_MAX_CONST_BUF; i++) {
+ resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX][i].buffer);
+ resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT][i].buffer);
+ }
/* Mark VBOs as being read */
foreach_bit(i, ctx->vertex_buffer.enabled_mask) {
ETNA_IMMEDIATE_TEXRECT_SCALE_X,
ETNA_IMMEDIATE_TEXRECT_SCALE_Y,
ETNA_IMMEDIATE_UBO0_ADDR,
- ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + 255,
+ ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + ETNA_MAX_CONST_BUF - 1,
};
struct etna_shader_uniform_info {
uint32_t active_sampler_views;
uint32_t dirty_sampler_views;
struct pipe_sampler_view *sampler_view[PIPE_MAX_SAMPLERS];
- struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+ struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES][ETNA_MAX_CONST_BUF];
struct etna_vertexbuf_state vertex_buffer;
struct etna_index_buffer index_buffer;
struct etna_shader_state shader;
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
- etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
+ etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
- etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
+ etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
if (ctx->specs.halti >= 5) {
/* HALTI5 needs to be prompted to pre-fetch shaders */
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
- etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
+ etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
/* ideally this cache would only be flushed if there are PS uniform changes */
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
- etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
+ etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
}
/**** End of state update ****/
#undef EMIT_STATE
#define ETNA_NUM_LOD (14)
#define ETNA_NUM_LAYERS (6)
#define ETNA_MAX_UNIFORMS (256)
+#define ETNA_MAX_CONST_BUF 16
#define ETNA_MAX_PIXELPIPES 2
/* All RS operations must have width%16 = 0 */
enum pipe_shader_cap param)
{
struct etna_screen *screen = etna_screen(pscreen);
+ bool ubo_enable = screen->specs.halti >= 2 && DBG_ENABLED(ETNA_DBG_NIR);
+
+ if (DBG_ENABLED(ETNA_DBG_DEQP))
+ ubo_enable = true;
switch (shader) {
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_CAP_MAX_TEMPS:
return 64; /* Max native temporaries. */
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return DBG_ENABLED(ETNA_DBG_DEQP) ? 16 : 1;
+ return ubo_enable ? ETNA_MAX_CONST_BUF : 1;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_PREFERRED_IR:
return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ if (ubo_enable)
+ return 16384; /* 16384 so state tracker enables UBOs */
return shader == PIPE_SHADER_FRAGMENT
? screen->specs.max_ps_uniforms * sizeof(float[4])
: screen->specs.max_vs_uniforms * sizeof(float[4]);
screen->specs.max_vs_uniforms = 168;
screen->specs.max_ps_uniforms = 64;
}
-
- if (DBG_ENABLED(ETNA_DBG_DEQP))
- screen->specs.max_ps_uniforms = 1024;
}
static bool
{
struct etna_context *ctx = etna_context(pctx);
- if (unlikely(index > 0)) {
- DBG("Unhandled buffer index %i", index);
- return;
- }
-
+ assert(index < ETNA_MAX_CONST_BUF);
- util_copy_constant_buffer(&ctx->constant_buffer[shader], cb);
+ util_copy_constant_buffer(&ctx->constant_buffer[shader][index], cb);
/* Note that the state tracker can unbind constant buffers by
* passing NULL here. */
if (unlikely(!cb || (!cb->buffer && !cb->user_buffer)))
return;
- /* there is no support for ARB_uniform_buffer_object */
- assert(cb->buffer == NULL && cb->user_buffer != NULL);
+ assert(index != 0 || cb->user_buffer != NULL);
if (!cb->buffer) {
- struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader];
+ struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader][index];
u_upload_data(pctx->const_uploader, 0, cb->buffer_size, 16, cb->user_buffer, &cb->buffer_offset, &cb->buffer);
}
const struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
bool frag = (sobj == ctx->shader.fs);
uint32_t base = frag ? ctx->specs.ps_uniforms_offset : ctx->specs.vs_uniforms_offset;
+ unsigned idx;
if (!uinfo->imm_count)
return;
break;
case ETNA_IMMEDIATE_UBO0_ADDR ... ETNA_IMMEDIATE_UBOMAX_ADDR:
- assert(uinfo->imm_contents[i] == ETNA_IMMEDIATE_UBO0_ADDR);
+ idx = uinfo->imm_contents[i] - ETNA_IMMEDIATE_UBO0_ADDR;
etna_cmd_stream_reloc(stream, &(struct etna_reloc) {
- .bo = etna_resource(cb->buffer)->bo,
+ .bo = etna_resource(cb[idx].buffer)->bo,
.flags = ETNA_RELOC_READ,
- .offset = cb->buffer_offset + val,
+ .offset = cb[idx].buffer_offset + val,
});
break;