From 23e7a34466c448c4c7c9a2c2e4d200dedf2584f7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 6 May 2019 14:52:27 -0700 Subject: [PATCH] freedreno/ir3: consolidate const state Combine the offsets of differenet parts of the constant space with (what was formerly known as) ir3_driver_const_layout. Bunch of churn, but no functional change. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_a4xx.c | 7 +- src/freedreno/ir3/ir3_compiler_nir.c | 22 +++--- src/freedreno/ir3/ir3_context.c | 53 +++++--------- src/freedreno/ir3/ir3_cp.c | 4 +- src/freedreno/ir3/ir3_nir.c | 2 +- src/freedreno/ir3/ir3_nir.h | 2 +- src/freedreno/ir3/ir3_shader.c | 3 +- src/freedreno/ir3/ir3_shader.h | 72 ++++++++++++------- .../drivers/freedreno/ir3/ir3_gallium.c | 38 ++++++---- 9 files changed, 113 insertions(+), 90 deletions(-) diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 426a143acfb..5fe15cf8e27 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -217,10 +217,11 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned cb = regid(const_state->offsets.image_dims, 0) + + const_state->image_dims.off[var->data.driver_location]; - debug_assert(ctx->so->const_layout.image_dims.mask & + debug_assert(const_state->image_dims.mask & (1 << var->data.driver_location)); /* offset = coords.x * bytes_per_pixel: */ diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 7a3b4a19ad7..3eb34f44b14 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -107,7 +107,8 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - unsigned n = ctx->so->constbase.driver_param; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned n = const_state->offsets.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx->block, r); } @@ -683,7 +684,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, /* UBO addresses are the first driver params, but subtract 2 here to * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0 * is the uniforms: */ - unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned ubo = regid(const_state->offsets.ubo, 0) - 2; const unsigned ptrsz = ir3_pointer_size(ctx->compiler); int off = 0; @@ -751,11 +753,12 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { /* SSBO size stored as a const starting at ssbo_sizes: */ + struct ir3_const_state *const_state = &ctx->so->const_state; unsigned blk_idx = nir_src_as_uint(intr->src[0]); - unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) + - ctx->so->const_layout.ssbo_size.off[blk_idx]; + unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + + const_state->ssbo_size.off[blk_idx]; - debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx)); + debug_assert(const_state->ssbo_size.mask & (1 << blk_idx)); dst[0] = create_uniform(ctx->block, idx); } @@ -1006,8 +1009,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, * bytes-per-pixel should have been emitted in 2nd slot of * image_dims. See ir3_shader::emit_image_dims(). */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned cb = regid(const_state->offsets.image_dims, 0) + + const_state->image_dims.off[var->data.driver_location]; struct ir3_instruction *aux = create_uniform(b, cb + 1); tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0); @@ -2225,7 +2229,6 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list) static void emit_stream_out(struct ir3_context *ctx) { - struct ir3_shader_variant *v = ctx->so; struct ir3 *ir = ctx->ir; struct ir3_stream_output_info *strmout = &ctx->so->shader->stream_output; @@ -2283,10 +2286,11 @@ emit_stream_out(struct ir3_context *ctx) * stripped out in the backend. */ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { + struct ir3_const_state *const_state = &ctx->so->const_state; unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; - base = create_uniform(ctx->block, regid(v->constbase.tfbo, i)); + base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i)); /* 24-bit should be enough: */ off = ir3_MUL_U(ctx->block, vtxcnt, 0, diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 8c7d9a33f3a..d2210184a60 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -101,51 +101,34 @@ ir3_context_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stderr); } - ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); + ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); - so->num_uniforms = ctx->s->num_uniforms; - so->num_ubos = ctx->s->info.num_ubos; + struct ir3_const_state *const_state = &so->const_state; + memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); - ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); + ir3_nir_scan_driver_consts(ctx->s, const_state); + + const_state->num_uniforms = ctx->s->num_uniforms; + const_state->num_ubos = ctx->s->info.num_ubos; - /* Layout of constant registers, each section aligned to vec4. Note - * that pointer size (ubo, etc) changes depending on generation. - * - * user consts - * UBO addresses - * SSBO sizes - * if (vertex shader) { - * driver params (IR3_DP_*) - * if (stream_output.num_outputs > 0) - * stream-out addresses - * } - * immediates - * - * Immediates go last mostly because they are inserted in the CP pass - * after the nir -> ir3 frontend. - * - * Note UBO size in bytes should be aligned to vec4 - */ debug_assert((ctx->so->shader->ubo_state.size % 16) == 0); unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4); unsigned ptrsz = ir3_pointer_size(ctx->compiler); - memset(&so->constbase, ~0, sizeof(so->constbase)); - - if (so->num_ubos > 0) { - so->constbase.ubo = constoff; + if (const_state->num_ubos > 0) { + const_state->offsets.ubo = constoff; constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; } - if (so->const_layout.ssbo_size.count > 0) { - unsigned cnt = so->const_layout.ssbo_size.count; - so->constbase.ssbo_sizes = constoff; + if (const_state->ssbo_size.count > 0) { + unsigned cnt = const_state->ssbo_size.count; + const_state->offsets.ssbo_sizes = constoff; constoff += align(cnt, 4) / 4; } - if (so->const_layout.image_dims.count > 0) { - unsigned cnt = so->const_layout.image_dims.count; - so->constbase.image_dims = constoff; + if (const_state->image_dims.count > 0) { + unsigned cnt = const_state->image_dims.count; + const_state->offsets.image_dims = constoff; constoff += align(cnt, 4) / 4; } @@ -156,17 +139,17 @@ ir3_context_init(struct ir3_compiler *compiler, num_driver_params = IR3_DP_CS_COUNT; } - so->constbase.driver_param = constoff; + const_state->offsets.driver_param = constoff; constoff += align(num_driver_params, 4) / 4; if ((so->type == MESA_SHADER_VERTEX) && (compiler->gpu_id < 500) && so->shader->stream_output.num_outputs > 0) { - so->constbase.tfbo = constoff; + const_state->offsets.tfbo = constoff; constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; } - so->constbase.immediate = constoff; + const_state->offsets.immediate = constoff; return ctx; } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 28ba43f09ee..983c5fa61f2 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -323,10 +323,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags ctx->immediate_idx++; } + struct ir3_const_state *const_state = &ctx->so->const_state; + new_flags &= ~IR3_REG_IMMED; new_flags |= IR3_REG_CONST; reg->flags = new_flags; - reg->num = i + (4 * ctx->so->constbase.immediate); + reg->num = i + (4 * const_state->offsets.immediate); return reg; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 744fd958fc6..804196f63e9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -278,7 +278,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, void ir3_nir_scan_driver_consts(nir_shader *shader, - struct ir3_driver_const_layout *layout) + struct ir3_const_state *layout) { nir_foreach_function(function, shader) { if (!function->impl) diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index b60374410bc..bc0d496adfb 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -33,7 +33,7 @@ #include "ir3_shader.h" -void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout); +void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout); bool ir3_nir_apply_trig_workarounds(nir_shader *shader); bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 92e3e7b251d..63cad3ee414 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -350,8 +350,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) (regid >> 2), "xyzw"[regid & 0x3], i); } + struct ir3_const_state *const_state = &so->const_state; for (i = 0; i < so->immediates_count; i++) { - fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i); + fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", so->immediates[i].val[0], so->immediates[i].val[1], diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7c1dc38de23..448f6052194 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -71,6 +71,14 @@ enum ir3_driver_param { /** + * Describes the layout of shader consts. This includes: + * + Driver lowered UBO ranges + * + SSBO sizes + * + Image sizes/dimensions + * + Driver params (ie. IR3_DP_*) + * + TFBO addresses (for generations that do not have hardware streamout) + * + Lowered immediates + * * For consts needed to pass internal values to shader which may or may not * be required, rather than allocating worst-case const space, we scan the * shader and allocate consts as-needed: @@ -80,8 +88,46 @@ enum ir3_driver_param { * * + Image dimensions: needed to calculate pixel offset, but only for * images that have a image_store intrinsic + * + * Layout of constant registers, each section aligned to vec4. Note + * that pointer size (ubo, etc) changes depending on generation. + * + * user consts + * UBO addresses + * SSBO sizes + * if (vertex shader) { + * driver params (IR3_DP_*) + * if (stream_output.num_outputs > 0) + * stream-out addresses + * } else if (compute_shader) { + * driver params (IR3_DP_*) + * } + * immediates + * + * Immediates go last mostly because they are inserted in the CP pass + * after the nir -> ir3 frontend. + * + * Note UBO size in bytes should be aligned to vec4 */ -struct ir3_driver_const_layout { +struct ir3_const_state { + /* number of uniforms (in vec4), not including built-in compiler + * constants, etc. + */ + unsigned num_uniforms; + + unsigned num_ubos; + + struct { + /* user const start at zero */ + unsigned ubo; + /* NOTE that a3xx might need a section for SSBO addresses too */ + unsigned ssbo_sizes; + unsigned image_dims; + unsigned driver_param; + unsigned tfbo; + unsigned immediate; + } offsets; + struct { uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */ uint32_t count; /* number of consts allocated */ @@ -340,7 +386,7 @@ struct ir3_shader_variant { bool binning_pass; struct ir3_shader_variant *binning; - struct ir3_driver_const_layout const_layout; + struct ir3_const_state const_state; struct ir3_info info; struct ir3 *ir; @@ -361,13 +407,6 @@ struct ir3_shader_variant { */ unsigned constlen; - /* number of uniforms (in vec4), not including built-in compiler - * constants, etc. - */ - unsigned num_uniforms; - - unsigned num_ubos; - /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying @@ -451,21 +490,6 @@ struct ir3_shader_variant { bool per_samp; - /* Layout of constant registers, each section (in vec4). Pointer size - * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the - * UBO and stream-out consts. - */ - struct { - /* user const start at zero */ - unsigned ubo; - /* NOTE that a3xx might need a section for SSBO addresses too */ - unsigned ssbo_sizes; - unsigned image_dims; - unsigned driver_param; - unsigned tfbo; - unsigned immediate; - } constbase; - unsigned immediates_count; unsigned immediates_size; struct { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 0f4427f3028..3bb29daf9b8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -241,7 +241,8 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, * the user consts early to avoid HLSQ lockup caused by * writing too many consts */ - uint32_t max_const = MIN2(v->num_uniforms, v->constlen); + const struct ir3_const_state *const_state = &v->const_state; + uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen); /* and even if the start of the const buffer is before * first_immediate, the end may not be: @@ -280,9 +281,10 @@ static void emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - uint32_t offset = v->constbase.ubo; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.ubo; if (v->constlen > offset) { - uint32_t params = v->num_ubos; + uint32_t params = const_state->num_ubos; uint32_t offsets[params]; struct pipe_resource *prscs[params]; @@ -309,14 +311,15 @@ static void emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) { - uint32_t offset = v->constbase.ssbo_sizes; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.ssbo_sizes; if (v->constlen > offset) { - uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)]; - unsigned mask = v->const_layout.ssbo_size.mask; + uint32_t sizes[align(const_state->ssbo_size.count, 4)]; + unsigned mask = const_state->ssbo_size.mask; while (mask) { unsigned index = u_bit_scan(&mask); - unsigned off = v->const_layout.ssbo_size.off[index]; + unsigned off = const_state->ssbo_size.off[index]; sizes[off] = sb->sb[index].buffer_size; } @@ -330,16 +333,17 @@ static void emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) { - uint32_t offset = v->constbase.image_dims; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.image_dims; if (v->constlen > offset) { - uint32_t dims[align(v->const_layout.image_dims.count, 4)]; - unsigned mask = v->const_layout.image_dims.mask; + uint32_t dims[align(const_state->image_dims.count, 4)]; + unsigned mask = const_state->image_dims.mask; while (mask) { struct pipe_image_view *img; struct fd_resource *rsc; unsigned index = u_bit_scan(&mask); - unsigned off = v->const_layout.image_dims.off[index]; + unsigned off = const_state->image_dims.off[index]; img = &si->si[index]; rsc = fd_resource(img->resource); @@ -382,8 +386,9 @@ static void emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { + const struct ir3_const_state *const_state = &v->const_state; + uint32_t base = const_state->offsets.immediate; int size = v->immediates_count; - uint32_t base = v->constbase.immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -407,7 +412,8 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - uint32_t offset = v->constbase.tfbo; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; struct ir3_stream_output_info *info = &v->shader->stream_output; @@ -534,7 +540,8 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info) { - uint32_t offset = v->constbase.driver_param; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { uint32_t vertex_params[IR3_DP_VS_COUNT] = { [IR3_DP_VTXID_BASE] = info->index_size ? @@ -628,7 +635,8 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); /* emit compute-shader driver-params: */ - uint32_t offset = v->constbase.driver_param; + const struct ir3_const_state *const_state = &v->const_state; + uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { ring_wfi(ctx->batch, ring); -- 2.30.2