/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
*/
- unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
- ctx->so->const_layout.image_dims.off[var->data.driver_location];
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned cb = regid(const_state->offsets.image_dims, 0) +
+ const_state->image_dims.off[var->data.driver_location];
- debug_assert(ctx->so->const_layout.image_dims.mask &
+ debug_assert(const_state->image_dims.mask &
(1 << var->data.driver_location));
/* offset = coords.x * bytes_per_pixel: */
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
- unsigned n = ctx->so->constbase.driver_param;
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned n = const_state->offsets.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx->block, r);
}
/* UBO addresses are the first driver params, but subtract 2 here to
* account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
* is the uniforms: */
- unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2;
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
int off = 0;
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
+ struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
- unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
- ctx->so->const_layout.ssbo_size.off[blk_idx];
+ unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
+ const_state->ssbo_size.off[blk_idx];
- debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+ debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
dst[0] = create_uniform(ctx->block, idx);
}
* bytes-per-pixel should have been emitted in 2nd slot of
* image_dims. See ir3_shader::emit_image_dims().
*/
- unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
- ctx->so->const_layout.image_dims.off[var->data.driver_location];
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned cb = regid(const_state->offsets.image_dims, 0) +
+ const_state->image_dims.off[var->data.driver_location];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
static void
emit_stream_out(struct ir3_context *ctx)
{
- struct ir3_shader_variant *v = ctx->so;
struct ir3 *ir = ctx->ir;
struct ir3_stream_output_info *strmout =
&ctx->so->shader->stream_output;
* stripped out in the backend.
*/
for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
- base = create_uniform(ctx->block, regid(v->constbase.tfbo, i));
+ base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
nir_print_shader(ctx->s, stderr);
}
- ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+ ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
- so->num_uniforms = ctx->s->num_uniforms;
- so->num_ubos = ctx->s->info.num_ubos;
+ struct ir3_const_state *const_state = &so->const_state;
+ memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
- ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+ ir3_nir_scan_driver_consts(ctx->s, const_state);
+
+ const_state->num_uniforms = ctx->s->num_uniforms;
+ const_state->num_ubos = ctx->s->info.num_ubos;
- /* Layout of constant registers, each section aligned to vec4. Note
- * that pointer size (ubo, etc) changes depending on generation.
- *
- * user consts
- * UBO addresses
- * SSBO sizes
- * if (vertex shader) {
- * driver params (IR3_DP_*)
- * if (stream_output.num_outputs > 0)
- * stream-out addresses
- * }
- * immediates
- *
- * Immediates go last mostly because they are inserted in the CP pass
- * after the nir -> ir3 frontend.
- *
- * Note UBO size in bytes should be aligned to vec4
- */
debug_assert((ctx->so->shader->ubo_state.size % 16) == 0);
unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
unsigned ptrsz = ir3_pointer_size(ctx->compiler);
- memset(&so->constbase, ~0, sizeof(so->constbase));
-
- if (so->num_ubos > 0) {
- so->constbase.ubo = constoff;
+ if (const_state->num_ubos > 0) {
+ const_state->offsets.ubo = constoff;
constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
}
- if (so->const_layout.ssbo_size.count > 0) {
- unsigned cnt = so->const_layout.ssbo_size.count;
- so->constbase.ssbo_sizes = constoff;
+ if (const_state->ssbo_size.count > 0) {
+ unsigned cnt = const_state->ssbo_size.count;
+ const_state->offsets.ssbo_sizes = constoff;
constoff += align(cnt, 4) / 4;
}
- if (so->const_layout.image_dims.count > 0) {
- unsigned cnt = so->const_layout.image_dims.count;
- so->constbase.image_dims = constoff;
+ if (const_state->image_dims.count > 0) {
+ unsigned cnt = const_state->image_dims.count;
+ const_state->offsets.image_dims = constoff;
constoff += align(cnt, 4) / 4;
}
num_driver_params = IR3_DP_CS_COUNT;
}
- so->constbase.driver_param = constoff;
+ const_state->offsets.driver_param = constoff;
constoff += align(num_driver_params, 4) / 4;
if ((so->type == MESA_SHADER_VERTEX) &&
(compiler->gpu_id < 500) &&
so->shader->stream_output.num_outputs > 0) {
- so->constbase.tfbo = constoff;
+ const_state->offsets.tfbo = constoff;
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
}
- so->constbase.immediate = constoff;
+ const_state->offsets.immediate = constoff;
return ctx;
}
ctx->immediate_idx++;
}
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
- reg->num = i + (4 * ctx->so->constbase.immediate);
+ reg->num = i + (4 * const_state->offsets.immediate);
return reg;
}
void
ir3_nir_scan_driver_consts(nir_shader *shader,
- struct ir3_driver_const_layout *layout)
+ struct ir3_const_state *layout)
{
nir_foreach_function(function, shader) {
if (!function->impl)
#include "ir3_shader.h"
-void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout);
bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
(regid >> 2), "xyzw"[regid & 0x3], i);
}
+ struct ir3_const_state *const_state = &so->const_state;
for (i = 0; i < so->immediates_count; i++) {
- fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
+ fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
so->immediates[i].val[0],
so->immediates[i].val[1],
/**
+ * Describes the layout of shader consts. This includes:
+ * + Driver lowered UBO ranges
+ * + SSBO sizes
+ * + Image sizes/dimensions
+ * + Driver params (ie. IR3_DP_*)
+ * + TFBO addresses (for generations that do not have hardware streamout)
+ * + Lowered immediates
+ *
* For consts needed to pass internal values to shader which may or may not
* be required, rather than allocating worst-case const space, we scan the
* shader and allocate consts as-needed:
*
* + Image dimensions: needed to calculate pixel offset, but only for
* images that have a image_store intrinsic
+ *
+ * Layout of constant registers, each section aligned to vec4. Note
+ * that pointer size (ubo, etc) changes depending on generation.
+ *
+ * user consts
+ * UBO addresses
+ * SSBO sizes
+ * if (vertex shader) {
+ * driver params (IR3_DP_*)
+ * if (stream_output.num_outputs > 0)
+ * stream-out addresses
+ * } else if (compute_shader) {
+ * driver params (IR3_DP_*)
+ * }
+ * immediates
+ *
+ * Immediates go last mostly because they are inserted in the CP pass
+ * after the nir -> ir3 frontend.
+ *
+ * Note UBO size in bytes should be aligned to vec4
*/
-struct ir3_driver_const_layout {
+struct ir3_const_state {
+ /* number of uniforms (in vec4), not including built-in compiler
+ * constants, etc.
+ */
+ unsigned num_uniforms;
+
+ unsigned num_ubos;
+
+ struct {
+ /* user const start at zero */
+ unsigned ubo;
+ /* NOTE that a3xx might need a section for SSBO addresses too */
+ unsigned ssbo_sizes;
+ unsigned image_dims;
+ unsigned driver_param;
+ unsigned tfbo;
+ unsigned immediate;
+ } offsets;
+
struct {
uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */
uint32_t count; /* number of consts allocated */
bool binning_pass;
struct ir3_shader_variant *binning;
- struct ir3_driver_const_layout const_layout;
+ struct ir3_const_state const_state;
struct ir3_info info;
struct ir3 *ir;
*/
unsigned constlen;
- /* number of uniforms (in vec4), not including built-in compiler
- * constants, etc.
- */
- unsigned num_uniforms;
-
- unsigned num_ubos;
-
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
bool per_samp;
- /* Layout of constant registers, each section (in vec4). Pointer size
- * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
- * UBO and stream-out consts.
- */
- struct {
- /* user const start at zero */
- unsigned ubo;
- /* NOTE that a3xx might need a section for SSBO addresses too */
- unsigned ssbo_sizes;
- unsigned image_dims;
- unsigned driver_param;
- unsigned tfbo;
- unsigned immediate;
- } constbase;
-
unsigned immediates_count;
unsigned immediates_size;
struct {
* the user consts early to avoid HLSQ lockup caused by
* writing too many consts
*/
- uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen);
/* and even if the start of the const buffer is before
* first_immediate, the end may not be:
emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
- uint32_t offset = v->constbase.ubo;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.ubo;
if (v->constlen > offset) {
- uint32_t params = v->num_ubos;
+ uint32_t params = const_state->num_ubos;
uint32_t offsets[params];
struct pipe_resource *prscs[params];
emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
{
- uint32_t offset = v->constbase.ssbo_sizes;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.ssbo_sizes;
if (v->constlen > offset) {
- uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
- unsigned mask = v->const_layout.ssbo_size.mask;
+ uint32_t sizes[align(const_state->ssbo_size.count, 4)];
+ unsigned mask = const_state->ssbo_size.mask;
while (mask) {
unsigned index = u_bit_scan(&mask);
- unsigned off = v->const_layout.ssbo_size.off[index];
+ unsigned off = const_state->ssbo_size.off[index];
sizes[off] = sb->sb[index].buffer_size;
}
emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
{
- uint32_t offset = v->constbase.image_dims;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.image_dims;
if (v->constlen > offset) {
- uint32_t dims[align(v->const_layout.image_dims.count, 4)];
- unsigned mask = v->const_layout.image_dims.mask;
+ uint32_t dims[align(const_state->image_dims.count, 4)];
+ unsigned mask = const_state->image_dims.mask;
while (mask) {
struct pipe_image_view *img;
struct fd_resource *rsc;
unsigned index = u_bit_scan(&mask);
- unsigned off = v->const_layout.image_dims.off[index];
+ unsigned off = const_state->image_dims.off[index];
img = &si->si[index];
rsc = fd_resource(img->resource);
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t base = const_state->offsets.immediate;
int size = v->immediates_count;
- uint32_t base = v->constbase.immediate;
/* truncate size to avoid writing constants that shader
* does not use:
struct fd_ringbuffer *ring)
{
/* streamout addresses after driver-params: */
- uint32_t offset = v->constbase.tfbo;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.tfbo;
if (v->constlen > offset) {
struct fd_streamout_stateobj *so = &ctx->streamout;
struct ir3_stream_output_info *info = &v->shader->stream_output;
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info) {
- uint32_t offset = v->constbase.driver_param;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
[IR3_DP_VTXID_BASE] = info->index_size ?
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
/* emit compute-shader driver-params: */
- uint32_t offset = v->constbase.driver_param;
+ const struct ir3_const_state *const_state = &v->const_state;
+ uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);