etnaviv: implement UBOs
authorJonathan Marek <jonathan@marek.ca>
Sat, 4 Jan 2020 19:17:15 +0000 (14:17 -0500)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Jan 2020 11:47:34 +0000 (11:47 +0000)
At the same time, use pre-HALTI2 to use address register for indirect
uniform loads, since integers/LOAD instruction isn't always available.

Passes all dEQP-GLES3.functional.ubo.* on GC7000L. GC3000 with an extra
flush hack passes most of them, but still fails on some of the cases with
many loads.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389>

src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
src/gallium/drivers/etnaviv/etnaviv_context.c
src/gallium/drivers/etnaviv/etnaviv_context.h
src/gallium/drivers/etnaviv/etnaviv_emit.c
src/gallium/drivers/etnaviv/etnaviv_internal.h
src/gallium/drivers/etnaviv/etnaviv_screen.c
src/gallium/drivers/etnaviv/etnaviv_state.c
src/gallium/drivers/etnaviv/etnaviv_uniforms.c

index 3aa2b00c822f984464e869f8bb09928d11a88281..8a71f62a07d8ce9d08b1fa1c24918fd5404df5e4 100644 (file)
@@ -115,10 +115,36 @@ etna_lower_io(nir_shader *shader, struct etna_shader_variant *v)
                   nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa));
                } break;
                case nir_intrinsic_load_uniform: {
-                  /* multiply by 16 and convert to int */
+                  /* convert indirect load_uniform to load_ubo when possible
+                   * this is required on HALTI5+ because address register is not implemented
+                   * address register loads also arent done optimally
+                   */
+                  if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0]))
+                     break;
+
+                  nir_intrinsic_instr *load_ubo =
+                     nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
+                  load_ubo->num_components = intr->num_components;
+                  nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
+                                    load_ubo->num_components, 32, NULL);
+
                   b.cursor = nir_before_instr(instr);
-                  nir_ssa_def *ssa = nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16));
-                  nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
+                  load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+                  load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b,
+                     nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)),
+                     nir_imm_int(&b, nir_intrinsic_base(intr) * 16)));
+                  nir_builder_instr_insert(&b, &load_ubo->instr);
+                  nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+                                             nir_src_for_ssa(&load_ubo->dest.ssa));
+                  nir_instr_remove(&intr->instr);
+               } break;
+               case nir_intrinsic_load_ubo: {
+                  nir_const_value *idx = nir_src_as_const_value(intr->src[0]);
+                  assert(idx);
+                  /* offset index by 1, index 0 is used for converted load_uniform */
+                  b.cursor = nir_before_instr(instr);
+                  nir_instr_rewrite_src(instr, &intr->src[0],
+                                        nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1)));
                } break;
                case nir_intrinsic_load_vertex_id:
                case nir_intrinsic_load_instance_id:
@@ -593,44 +619,6 @@ etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src
    }
 }
 
-static void
-etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
-                   struct etna_inst_src src, struct etna_inst_src base)
-{
-   /* convert float offset back to integer */
-   if (c->specs->halti < 2) {
-      emit_inst(c, &(struct etna_inst) {
-         .opcode = INST_OPCODE_F2I,
-         .type = INST_TYPE_U32,
-         .dst = dst,
-         .src[0] = src,
-      });
-
-      emit_inst(c, &(struct etna_inst) {
-         .opcode = INST_OPCODE_LOAD,
-         .type = INST_TYPE_U32,
-         .dst = dst,
-         .src[0] = {
-            .use = 1,
-            .rgroup = INST_RGROUP_TEMP,
-            .reg = dst.reg,
-            .swiz = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1)
-         },
-         .src[1] = base,
-      });
-
-      return;
-   }
-
-   emit_inst(c, &(struct etna_inst) {
-      .opcode = INST_OPCODE_LOAD,
-      .type = INST_TYPE_U32,
-      .dst = dst,
-      .src[0] = src,
-      .src[1] = base,
-   });
-}
-
 #define OPT(nir, pass, ...) ({                             \
    bool this_progress = false;                             \
    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
index fc6a8569aa076903952b112bd86698aa349e05fa..564db9052f43bac0e5969e426c0aab210ccc22ee 100644 (file)
@@ -94,7 +94,6 @@ static inline bool is_sysval(nir_instr *instr)
 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
-#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x)
 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
 
 static int
@@ -388,6 +387,7 @@ get_src(struct state *state, nir_src *src)
       case nir_intrinsic_load_input:
       case nir_intrinsic_load_instance_id:
       case nir_intrinsic_load_uniform:
+      case nir_intrinsic_load_ubo:
          return ra_src(state, src);
       case nir_intrinsic_load_front_face:
          return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
@@ -586,6 +586,7 @@ dest_for_instr(nir_instr *instr)
    case nir_instr_type_intrinsic: {
       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
       if (intr->intrinsic == nir_intrinsic_load_uniform ||
+          intr->intrinsic == nir_intrinsic_load_ubo ||
           intr->intrinsic == nir_intrinsic_load_input ||
           intr->intrinsic == nir_intrinsic_load_instance_id)
          dest = &intr->dest;
@@ -908,8 +909,8 @@ ra_assign(struct state *state, nir_shader *shader)
 
       if (instr->type == nir_instr_type_intrinsic) {
          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-         if (intr->intrinsic == nir_intrinsic_load_uniform) {
-            /* make sure there isn't any reswizzling */
+         /* can't have dst swizzle or sparse writemask on UBO loads */
+         if (intr->intrinsic == nir_intrinsic_load_ubo) {
             assert(dest == &intr->dest);
             if (dest->ssa.num_components == 2)
                c = REG_CLASS_VIRT_VEC2C;
@@ -1102,9 +1103,37 @@ emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
       break;
    case nir_intrinsic_load_uniform: {
       unsigned dst_swiz;
-      hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
-      /* TODO: might have a problem with dst_swiz .. */
-      emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1));
+      struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
+
+      /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
+      emit_inst(state->c, &(struct etna_inst) {
+         .opcode = INST_OPCODE_MOVAR,
+         .dst.write_mask = 0x1,
+         .src[2] = get_src(state, &intr->src[0]),
+      });
+      emit_inst(state->c, &(struct etna_inst) {
+         .opcode = INST_OPCODE_MOV,
+         .dst = dst,
+         .src[2] = {
+            .use = 1,
+            .rgroup = INST_RGROUP_UNIFORM_0,
+            .reg = nir_intrinsic_base(intr),
+            .swiz = dst_swiz,
+            .amode = INST_AMODE_ADD_A_X,
+         },
+      });
+   } break;
+   case nir_intrinsic_load_ubo: {
+      /* TODO: if offset is of the form (x + C) then add C to the base instead */
+      unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
+      unsigned dst_swiz;
+      emit_inst(state->c, &(struct etna_inst) {
+         .opcode = INST_OPCODE_LOAD,
+         .type = INST_TYPE_U32,
+         .dst = ra_dest(state, &intr->dest, &dst_swiz),
+         .src[0] = get_src(state, &intr->src[1]),
+         .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
+      });
    } break;
    case nir_intrinsic_load_front_face:
    case nir_intrinsic_load_frag_coord:
@@ -1402,6 +1431,8 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
       .shader = shader,
       .impl = nir_shader_get_entrypoint(shader),
    };
+   bool have_indirect_uniform = false;
+   unsigned indirect_max = 0;
 
    nir_builder b;
    nir_builder_init(&b, state.impl);
@@ -1421,19 +1452,25 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
          } break;
          case nir_instr_type_intrinsic: {
             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            /* TODO: load_ubo can also become a constant in some cases
+             * (at the moment it can end up emitting a LOAD with two
+             *  uniform sources, which could be a problem on HALTI2)
+             */
             if (intr->intrinsic != nir_intrinsic_load_uniform)
                break;
             nir_const_value *off = nir_src_as_const_value(intr->src[0]);
-            if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT)
+            if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
+               have_indirect_uniform = true;
+               indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
                break;
+            }
 
             unsigned base = nir_intrinsic_base(intr);
             /* pre halti2 uniform offset will be float */
             if (c->specs->halti < 2)
-               base += (unsigned) off[0].f32 / 16;
+               base += (unsigned) off[0].f32;
             else
-               base += off[0].u32 / 16;
-
+               base += off[0].u32;
             nir_const_value value[4];
 
             for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
@@ -1455,6 +1492,13 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
       }
    }
 
+   /* TODO: only emit required indirect uniform ranges */
+   if (have_indirect_uniform) {
+      for (unsigned i = 0; i < indirect_max * 4; i++)
+         c->consts[i] = UNIFORM(i).u64;
+      state.const_count = indirect_max;
+   }
+
    /* add mov for any store output using sysval/const  */
    nir_foreach_block(block, state.impl) {
       nir_foreach_instr_safe(instr, block) {
index 533b4033adc1c88f583e4118dfaeaa63119cb17b..c3ea0b0d4af903277495fe8918bbcaeb0c1a99c7 100644 (file)
@@ -288,8 +288,10 @@ etna_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
    }
 
    /* Mark constant buffers as being read */
-   resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX].buffer);
-   resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].buffer);
+   for (unsigned i = 0; i < ETNA_MAX_CONST_BUF; i++) {
+      resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX][i].buffer);
+      resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT][i].buffer);
+   }
 
    /* Mark VBOs as being read */
    foreach_bit(i, ctx->vertex_buffer.enabled_mask) {
index ea4de2e037236b5032135e59c49ecb9750e694fc..6e3d7d1a347d728c473ed68cb20fd326e08b0b58 100644 (file)
@@ -92,7 +92,7 @@ enum etna_immediate_contents {
    ETNA_IMMEDIATE_TEXRECT_SCALE_X,
    ETNA_IMMEDIATE_TEXRECT_SCALE_Y,
    ETNA_IMMEDIATE_UBO0_ADDR,
-   ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + 255,
+   ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + ETNA_MAX_CONST_BUF - 1,
 };
 
 struct etna_shader_uniform_info {
@@ -164,7 +164,7 @@ struct etna_context {
    uint32_t active_sampler_views;
    uint32_t dirty_sampler_views;
    struct pipe_sampler_view *sampler_view[PIPE_MAX_SAMPLERS];
-   struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+   struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES][ETNA_MAX_CONST_BUF];
    struct etna_vertexbuf_state vertex_buffer;
    struct etna_index_buffer index_buffer;
    struct etna_shader_state shader;
index e60437d8ec926dc0e27b1a1d5443b9aba1a72e07..b92cf770cf4b3c55a856aab0545af859dde1a18e 100644 (file)
@@ -668,12 +668,12 @@ etna_emit_state(struct etna_context *ctx)
       if (do_uniform_flush)
          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
 
-      etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
+      etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
 
       if (do_uniform_flush)
          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
 
-      etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
+      etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
 
       if (ctx->specs.halti >= 5) {
          /* HALTI5 needs to be prompted to pre-fetch shaders */
@@ -687,14 +687,14 @@ etna_emit_state(struct etna_context *ctx)
          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
 
       if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
-         etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
+         etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
 
       /* ideally this cache would only be flushed if there are PS uniform changes */
       if (do_uniform_flush)
          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
 
       if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
-         etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
+         etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
    }
 /**** End of state update ****/
 #undef EMIT_STATE
index c868cafab8d18b4fe1d212c79ea075499331e370..22f6d2cd9fadb4e1e31be6c85cb8f0c864bc0611 100644 (file)
@@ -38,6 +38,7 @@
 #define ETNA_NUM_LOD (14)
 #define ETNA_NUM_LAYERS (6)
 #define ETNA_MAX_UNIFORMS (256)
+#define ETNA_MAX_CONST_BUF 16
 #define ETNA_MAX_PIXELPIPES 2
 
 /* All RS operations must have width%16 = 0 */
index f9edf53e2c0ad2deddffa594bc7205f751b16e4b..dcceddc4729973ab7a66df43af6dbee58455bec5 100644 (file)
@@ -280,6 +280,10 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
                              enum pipe_shader_cap param)
 {
    struct etna_screen *screen = etna_screen(pscreen);
+   bool ubo_enable = screen->specs.halti >= 2 && DBG_ENABLED(ETNA_DBG_NIR);
+
+   if (DBG_ENABLED(ETNA_DBG_DEQP))
+      ubo_enable = true;
 
    switch (shader) {
    case PIPE_SHADER_FRAGMENT:
@@ -315,7 +319,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_MAX_TEMPS:
       return 64; /* Max native temporaries. */
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-      return DBG_ENABLED(ETNA_DBG_DEQP) ? 16 : 1;
+      return ubo_enable ? ETNA_MAX_CONST_BUF : 1;
    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
@@ -340,6 +344,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      if (ubo_enable)
+         return 16384; /* 16384 so state tracker enables UBOs */
       return shader == PIPE_SHADER_FRAGMENT
                 ? screen->specs.max_ps_uniforms * sizeof(float[4])
                 : screen->specs.max_vs_uniforms * sizeof(float[4]);
@@ -617,9 +623,6 @@ etna_determine_uniform_limits(struct etna_screen *screen)
       screen->specs.max_vs_uniforms = 168;
       screen->specs.max_ps_uniforms = 64;
    }
-
-   if (DBG_ENABLED(ETNA_DBG_DEQP))
-      screen->specs.max_ps_uniforms = 1024;
 }
 
 static bool
index 12f2648c5667da46b8a3a3b4c8a031efd0e346b0..5de34a887176a68941726982935fb5bd952ac009 100644 (file)
@@ -83,24 +83,19 @@ etna_set_constant_buffer(struct pipe_context *pctx,
 {
    struct etna_context *ctx = etna_context(pctx);
 
-   if (unlikely(index > 0)) {
-      DBG("Unhandled buffer index %i", index);
-      return;
-   }
-
+   assert(index < ETNA_MAX_CONST_BUF);
 
-   util_copy_constant_buffer(&ctx->constant_buffer[shader], cb);
+   util_copy_constant_buffer(&ctx->constant_buffer[shader][index], cb);
 
    /* Note that the state tracker can unbind constant buffers by
     * passing NULL here. */
    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer)))
       return;
 
-   /* there is no support for ARB_uniform_buffer_object */
-   assert(cb->buffer == NULL && cb->user_buffer != NULL);
+   assert(index != 0 || cb->user_buffer != NULL);
 
    if (!cb->buffer) {
-      struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader];
+      struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader][index];
       u_upload_data(pctx->const_uploader, 0, cb->buffer_size, 16, cb->user_buffer, &cb->buffer_offset, &cb->buffer);
    }
 
index 22dbd6dbae1abb690f6016c49f2971261ea18057..356a55ba451c278256fc519f256d5a3cef5f72a5 100644 (file)
@@ -67,6 +67,7 @@ etna_uniforms_write(const struct etna_context *ctx,
    const struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
    bool frag = (sobj == ctx->shader.fs);
    uint32_t base = frag ? ctx->specs.ps_uniforms_offset : ctx->specs.vs_uniforms_offset;
+   unsigned idx;
 
    if (!uinfo->imm_count)
       return;
@@ -94,11 +95,11 @@ etna_uniforms_write(const struct etna_context *ctx,
          break;
 
       case ETNA_IMMEDIATE_UBO0_ADDR ... ETNA_IMMEDIATE_UBOMAX_ADDR:
-         assert(uinfo->imm_contents[i] == ETNA_IMMEDIATE_UBO0_ADDR);
+         idx = uinfo->imm_contents[i] - ETNA_IMMEDIATE_UBO0_ADDR;
          etna_cmd_stream_reloc(stream, &(struct etna_reloc) {
-            .bo = etna_resource(cb->buffer)->bo,
+            .bo = etna_resource(cb[idx].buffer)->bo,
             .flags = ETNA_RELOC_READ,
-            .offset = cb->buffer_offset + val,
+            .offset = cb[idx].buffer_offset + val,
          });
          break;