freedreno/ir3: consolidate const state
authorRob Clark <robdclark@chromium.org>
Mon, 6 May 2019 21:52:27 +0000 (14:52 -0700)
committerRob Clark <robdclark@chromium.org>
Tue, 7 May 2019 14:26:00 +0000 (07:26 -0700)
Combine the offsets of differenet parts of the constant space with (what
was formerly known as) ir3_driver_const_layout.  Bunch of churn, but no
functional change.

Signed-off-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3_a4xx.c
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_context.c
src/freedreno/ir3/ir3_cp.c
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_nir.h
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 426a143acfbb3899246837bdd7d368834838edee..5fe15cf8e274361056977f1dea439fe1653c48a9 100644 (file)
@@ -217,10 +217,11 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
        /* to calculate the byte offset (yes, uggg) we need (up to) three
         * const values to know the bytes per pixel, and y and z stride:
         */
-       unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-               ctx->so->const_layout.image_dims.off[var->data.driver_location];
+       struct ir3_const_state *const_state = &ctx->so->const_state;
+       unsigned cb = regid(const_state->offsets.image_dims, 0) +
+               const_state->image_dims.off[var->data.driver_location];
 
-       debug_assert(ctx->so->const_layout.image_dims.mask &
+       debug_assert(const_state->image_dims.mask &
                        (1 << var->data.driver_location));
 
        /* offset = coords.x * bytes_per_pixel: */
index 7a3b4a19ad7a3ddb8ea225cf331af895e67b6195..3eb34f44b14621692039fe3e79c38fb6f2eb61ae 100644 (file)
@@ -107,7 +107,8 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
 {
        /* first four vec4 sysval's reserved for UBOs: */
        /* NOTE: dp is in scalar, but there can be >4 dp components: */
-       unsigned n = ctx->so->constbase.driver_param;
+       struct ir3_const_state *const_state = &ctx->so->const_state;
+       unsigned n = const_state->offsets.driver_param;
        unsigned r = regid(n + dp / 4, dp % 4);
        return create_uniform(ctx->block, r);
 }
@@ -683,7 +684,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        /* UBO addresses are the first driver params, but subtract 2 here to
         * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
         * is the uniforms: */
-       unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2;
+       struct ir3_const_state *const_state = &ctx->so->const_state;
+       unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
        const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
 
        int off = 0;
@@ -751,11 +753,12 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                struct ir3_instruction **dst)
 {
        /* SSBO size stored as a const starting at ssbo_sizes: */
+       struct ir3_const_state *const_state = &ctx->so->const_state;
        unsigned blk_idx = nir_src_as_uint(intr->src[0]);
-       unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
-               ctx->so->const_layout.ssbo_size.off[blk_idx];
+       unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
+               const_state->ssbo_size.off[blk_idx];
 
-       debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+       debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
 
        dst[0] = create_uniform(ctx->block, idx);
 }
@@ -1006,8 +1009,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                 * bytes-per-pixel should have been emitted in 2nd slot of
                 * image_dims. See ir3_shader::emit_image_dims().
                 */
-               unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-                       ctx->so->const_layout.image_dims.off[var->data.driver_location];
+               struct ir3_const_state *const_state = &ctx->so->const_state;
+               unsigned cb = regid(const_state->offsets.image_dims, 0) +
+                       const_state->image_dims.off[var->data.driver_location];
                struct ir3_instruction *aux = create_uniform(b, cb + 1);
 
                tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
@@ -2225,7 +2229,6 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
 static void
 emit_stream_out(struct ir3_context *ctx)
 {
-       struct ir3_shader_variant *v = ctx->so;
        struct ir3 *ir = ctx->ir;
        struct ir3_stream_output_info *strmout =
                        &ctx->so->shader->stream_output;
@@ -2283,10 +2286,11 @@ emit_stream_out(struct ir3_context *ctx)
         * stripped out in the backend.
         */
        for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+               struct ir3_const_state *const_state = &ctx->so->const_state;
                unsigned stride = strmout->stride[i];
                struct ir3_instruction *base, *off;
 
-               base = create_uniform(ctx->block, regid(v->constbase.tfbo, i));
+               base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i));
 
                /* 24-bit should be enough: */
                off = ir3_MUL_U(ctx->block, vtxcnt, 0,
index 8c7d9a33f3ad04902ba2b4943b499462681156c8..d2210184a6086d190e004bda71ad5c76275a233c 100644 (file)
@@ -101,51 +101,34 @@ ir3_context_init(struct ir3_compiler *compiler,
                nir_print_shader(ctx->s, stderr);
        }
 
-       ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+       ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
 
-       so->num_uniforms = ctx->s->num_uniforms;
-       so->num_ubos = ctx->s->info.num_ubos;
+       struct ir3_const_state *const_state = &so->const_state;
+       memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
 
-       ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+       ir3_nir_scan_driver_consts(ctx->s, const_state);
+
+       const_state->num_uniforms = ctx->s->num_uniforms;
+       const_state->num_ubos = ctx->s->info.num_ubos;
 
-       /* Layout of constant registers, each section aligned to vec4.  Note
-        * that pointer size (ubo, etc) changes depending on generation.
-        *
-        *    user consts
-        *    UBO addresses
-        *    SSBO sizes
-        *    if (vertex shader) {
-        *        driver params (IR3_DP_*)
-        *        if (stream_output.num_outputs > 0)
-        *           stream-out addresses
-        *    }
-        *    immediates
-        *
-        * Immediates go last mostly because they are inserted in the CP pass
-        * after the nir -> ir3 frontend.
-        *
-        * Note UBO size in bytes should be aligned to vec4
-        */
        debug_assert((ctx->so->shader->ubo_state.size % 16) == 0);
        unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
        unsigned ptrsz = ir3_pointer_size(ctx->compiler);
 
-       memset(&so->constbase, ~0, sizeof(so->constbase));
-
-       if (so->num_ubos > 0) {
-               so->constbase.ubo = constoff;
+       if (const_state->num_ubos > 0) {
+               const_state->offsets.ubo = constoff;
                constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
        }
 
-       if (so->const_layout.ssbo_size.count > 0) {
-               unsigned cnt = so->const_layout.ssbo_size.count;
-               so->constbase.ssbo_sizes = constoff;
+       if (const_state->ssbo_size.count > 0) {
+               unsigned cnt = const_state->ssbo_size.count;
+               const_state->offsets.ssbo_sizes = constoff;
                constoff += align(cnt, 4) / 4;
        }
 
-       if (so->const_layout.image_dims.count > 0) {
-               unsigned cnt = so->const_layout.image_dims.count;
-               so->constbase.image_dims = constoff;
+       if (const_state->image_dims.count > 0) {
+               unsigned cnt = const_state->image_dims.count;
+               const_state->offsets.image_dims = constoff;
                constoff += align(cnt, 4) / 4;
        }
 
@@ -156,17 +139,17 @@ ir3_context_init(struct ir3_compiler *compiler,
                num_driver_params = IR3_DP_CS_COUNT;
        }
 
-       so->constbase.driver_param = constoff;
+       const_state->offsets.driver_param = constoff;
        constoff += align(num_driver_params, 4) / 4;
 
        if ((so->type == MESA_SHADER_VERTEX) &&
                        (compiler->gpu_id < 500) &&
                        so->shader->stream_output.num_outputs > 0) {
-               so->constbase.tfbo = constoff;
+               const_state->offsets.tfbo = constoff;
                constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
        }
 
-       so->constbase.immediate = constoff;
+       const_state->offsets.immediate = constoff;
 
        return ctx;
 }
index 28ba43f09eede29fea92acd40bb1cbc73eafc3ce..983c5fa61f289712fb43af658391af5d4d594a4c 100644 (file)
@@ -323,10 +323,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
                ctx->immediate_idx++;
        }
 
+       struct ir3_const_state *const_state = &ctx->so->const_state;
+
        new_flags &= ~IR3_REG_IMMED;
        new_flags |= IR3_REG_CONST;
        reg->flags = new_flags;
-       reg->num = i + (4 * ctx->so->constbase.immediate);
+       reg->num = i + (4 * const_state->offsets.immediate);
 
        return reg;
 }
index 744fd958fc66b3fee240789489bdae4b7ea757ea..804196f63e9255baba7708f2bb800a49ff843291 100644 (file)
@@ -278,7 +278,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
 void
 ir3_nir_scan_driver_consts(nir_shader *shader,
-               struct ir3_driver_const_layout *layout)
+               struct ir3_const_state *layout)
 {
        nir_foreach_function(function, shader) {
                if (!function->impl)
index b60374410bcfe765bf058ffde9c4acb9aeababa8..bc0d496adfba30f31176edd1326cb02aa66764d5 100644 (file)
@@ -33,7 +33,7 @@
 
 #include "ir3_shader.h"
 
-void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout);
 
 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
 bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
index 92e3e7b251d5d1a91993a950f6e1bc77ecbddb86..63cad3ee4147f0ed769467129a2273e7766d7e7e 100644 (file)
@@ -350,8 +350,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
                                (regid >> 2), "xyzw"[regid & 0x3], i);
        }
 
+       struct ir3_const_state *const_state = &so->const_state;
        for (i = 0; i < so->immediates_count; i++) {
-               fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
+               fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
                fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
                                so->immediates[i].val[0],
                                so->immediates[i].val[1],
index 7c1dc38de236b29c3565221dc0cad2e5a288beaa..448f60521940dfd76cada9c339fa1270863a1c09 100644 (file)
@@ -71,6 +71,14 @@ enum ir3_driver_param {
 
 
 /**
+ * Describes the layout of shader consts.  This includes:
+ *   + Driver lowered UBO ranges
+ *   + SSBO sizes
+ *   + Image sizes/dimensions
+ *   + Driver params (ie. IR3_DP_*)
+ *   + TFBO addresses (for generations that do not have hardware streamout)
+ *   + Lowered immediates
+ *
  * For consts needed to pass internal values to shader which may or may not
  * be required, rather than allocating worst-case const space, we scan the
  * shader and allocate consts as-needed:
@@ -80,8 +88,46 @@ enum ir3_driver_param {
  *
  *   + Image dimensions: needed to calculate pixel offset, but only for
  *     images that have a image_store intrinsic
+ *
+ * Layout of constant registers, each section aligned to vec4.  Note
+ * that pointer size (ubo, etc) changes depending on generation.
+ *
+ *    user consts
+ *    UBO addresses
+ *    SSBO sizes
+ *    if (vertex shader) {
+ *        driver params (IR3_DP_*)
+ *        if (stream_output.num_outputs > 0)
+ *           stream-out addresses
+ *    } else if (compute_shader) {
+ *        driver params (IR3_DP_*)
+ *    }
+ *    immediates
+ *
+ * Immediates go last mostly because they are inserted in the CP pass
+ * after the nir -> ir3 frontend.
+ *
+ * Note UBO size in bytes should be aligned to vec4
  */
-struct ir3_driver_const_layout {
+struct ir3_const_state {
+       /* number of uniforms (in vec4), not including built-in compiler
+        * constants, etc.
+        */
+       unsigned num_uniforms;
+
+       unsigned num_ubos;
+
+       struct {
+               /* user const start at zero */
+               unsigned ubo;
+               /* NOTE that a3xx might need a section for SSBO addresses too */
+               unsigned ssbo_sizes;
+               unsigned image_dims;
+               unsigned driver_param;
+               unsigned tfbo;
+               unsigned immediate;
+       } offsets;
+
        struct {
                uint32_t mask;  /* bitmask of SSBOs that have get_buffer_size */
                uint32_t count; /* number of consts allocated */
@@ -340,7 +386,7 @@ struct ir3_shader_variant {
        bool binning_pass;
        struct ir3_shader_variant *binning;
 
-       struct ir3_driver_const_layout const_layout;
+       struct ir3_const_state const_state;
        struct ir3_info info;
        struct ir3 *ir;
 
@@ -361,13 +407,6 @@ struct ir3_shader_variant {
         */
        unsigned constlen;
 
-       /* number of uniforms (in vec4), not including built-in compiler
-        * constants, etc.
-        */
-       unsigned num_uniforms;
-
-       unsigned num_ubos;
-
        /* About Linkage:
         *   + Let the frag shader determine the position/compmask for the
         *     varyings, since it is the place where we know if the varying
@@ -451,21 +490,6 @@ struct ir3_shader_variant {
 
        bool per_samp;
 
-       /* Layout of constant registers, each section (in vec4). Pointer size
-        * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
-        * UBO and stream-out consts.
-        */
-       struct {
-               /* user const start at zero */
-               unsigned ubo;
-               /* NOTE that a3xx might need a section for SSBO addresses too */
-               unsigned ssbo_sizes;
-               unsigned image_dims;
-               unsigned driver_param;
-               unsigned tfbo;
-               unsigned immediate;
-       } constbase;
-
        unsigned immediates_count;
        unsigned immediates_size;
        struct {
index 0f4427f3028d9e6f9e463d2d19d4f90fc8862c67..3bb29daf9b8d10fdfa55c24e725814b9ee21dae1 100644 (file)
@@ -241,7 +241,8 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
                 * the user consts early to avoid HLSQ lockup caused by
                 * writing too many consts
                 */
-               uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
+               const struct ir3_const_state *const_state = &v->const_state;
+               uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen);
 
                /* and even if the start of the const buffer is before
                 * first_immediate, the end may not be:
@@ -280,9 +281,10 @@ static void
 emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
 {
-       uint32_t offset = v->constbase.ubo;
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t offset = const_state->offsets.ubo;
        if (v->constlen > offset) {
-               uint32_t params = v->num_ubos;
+               uint32_t params = const_state->num_ubos;
                uint32_t offsets[params];
                struct pipe_resource *prscs[params];
 
@@ -309,14 +311,15 @@ static void
 emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
 {
-       uint32_t offset = v->constbase.ssbo_sizes;
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t offset = const_state->offsets.ssbo_sizes;
        if (v->constlen > offset) {
-               uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
-               unsigned mask = v->const_layout.ssbo_size.mask;
+               uint32_t sizes[align(const_state->ssbo_size.count, 4)];
+               unsigned mask = const_state->ssbo_size.mask;
 
                while (mask) {
                        unsigned index = u_bit_scan(&mask);
-                       unsigned off = v->const_layout.ssbo_size.off[index];
+                       unsigned off = const_state->ssbo_size.off[index];
                        sizes[off] = sb->sb[index].buffer_size;
                }
 
@@ -330,16 +333,17 @@ static void
 emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
 {
-       uint32_t offset = v->constbase.image_dims;
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t offset = const_state->offsets.image_dims;
        if (v->constlen > offset) {
-               uint32_t dims[align(v->const_layout.image_dims.count, 4)];
-               unsigned mask = v->const_layout.image_dims.mask;
+               uint32_t dims[align(const_state->image_dims.count, 4)];
+               unsigned mask = const_state->image_dims.mask;
 
                while (mask) {
                        struct pipe_image_view *img;
                        struct fd_resource *rsc;
                        unsigned index = u_bit_scan(&mask);
-                       unsigned off = v->const_layout.image_dims.off[index];
+                       unsigned off = const_state->image_dims.off[index];
 
                        img = &si->si[index];
                        rsc = fd_resource(img->resource);
@@ -382,8 +386,9 @@ static void
 emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
 {
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t base = const_state->offsets.immediate;
        int size = v->immediates_count;
-       uint32_t base = v->constbase.immediate;
 
        /* truncate size to avoid writing constants that shader
         * does not use:
@@ -407,7 +412,8 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
 {
        /* streamout addresses after driver-params: */
-       uint32_t offset = v->constbase.tfbo;
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t offset = const_state->offsets.tfbo;
        if (v->constlen > offset) {
                struct fd_streamout_stateobj *so = &ctx->streamout;
                struct ir3_stream_output_info *info = &v->shader->stream_output;
@@ -534,7 +540,8 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
        /* emit driver params every time: */
        /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
        if (info) {
-               uint32_t offset = v->constbase.driver_param;
+               const struct ir3_const_state *const_state = &v->const_state;
+               uint32_t offset = const_state->offsets.driver_param;
                if (v->constlen > offset) {
                        uint32_t vertex_params[IR3_DP_VS_COUNT] = {
                                [IR3_DP_VTXID_BASE] = info->index_size ?
@@ -628,7 +635,8 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
        emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
 
        /* emit compute-shader driver-params: */
-       uint32_t offset = v->constbase.driver_param;
+       const struct ir3_const_state *const_state = &v->const_state;
+       uint32_t offset = const_state->offsets.driver_param;
        if (v->constlen > offset) {
                ring_wfi(ctx->batch, ring);