const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[1])[0];
/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
}, 2);
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
- ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
+ ldgb = ir3_LDGB(b, create_immed(b, ibo_idx), 0,
src0, 0, src1, 0);
ldgb->regs[0]->wrmask = MASK(intr->num_components);
ldgb->cat6.iim_val = intr->num_components;
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[2])[0];
/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
create_immed(b, 0),
}, 2);
- stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
- src0, 0, src1, 0, src2, 0);
+ stgb = ir3_STGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0, src2, 0);
stgb->cat6.iim_val = ncomp;
stgb->cat6.d = 4;
stgb->cat6.type = TYPE_U32;
/* can this be non-const buffer_index? how do we handle that? */
const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
- ssbo = create_immed(b, const_offset->u32[0]);
+
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+ ssbo = create_immed(b, ibo_idx);
offset = ir3_get_src(ctx, &intr->src[1])[0];
struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
/* src0 is value
* one over the other in various cases.
*/
- stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
+ stib = ir3_STIB(b, create_immed(b, ibo_idx), 0,
ir3_create_collect(ctx, value, ncomp), 0,
ir3_create_collect(ctx, coords, ncoords), 0,
offset, 0);
struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
- image = create_immed(b, ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
+ image = create_immed(b, ibo_idx);
/* src0 is value (or uvec2(value, compare))
* src1 is coords
struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
struct ir3_instruction *coords[4];
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
type_t type = ir3_get_image_type(var);
/* hmm, this seems a bit odd, but it is what blob does and (at least
{
struct ir3_block *b = ctx->block;
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
struct ir3_instruction *sam, *lod;
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
#include "ir3_compiler.h"
#include "ir3_context.h"
+#include "ir3_image.h"
#include "ir3_shader.h"
#include "ir3_nir.h"
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
+ ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+
/* Layout of constant registers, each section aligned to vec4. Note
* that pointer size (ubo, etc) changes depending on generation.
*
#include "ir3_image.h"
-/* Images get mapped into SSBO/image state (for store/atomic) and texture
- * state block (for load). To simplify things, invert the image id and
- * map it from end of state block, ie. image 0 becomes num-1, image 1
- * becomes num-2, etc. This potentially avoids needing to re-emit texture
- * state when switching shaders.
- *
- * TODO is max # of samplers and SSBOs the same. This shouldn't be hard-
- * coded. Also, since all the gl shader stages (ie. everything but CS)
- * share the same SSBO/image state block, this might require some more
- * logic if we supported images in anything other than FS..
+
+/*
+ * SSBO/Image to/from IBO/tex hw mapping table:
+ */
+
+void
+ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
+{
+ memset(mapping, IBO_INVALID, sizeof(*mapping));
+ mapping->num_ibo = 0;
+ mapping->num_tex = 0;
+ mapping->tex_base = num_textures;
+}
+
+unsigned
+ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->ssbo_to_ibo[ssbo] = ibo;
+ mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_ibo[ssbo];
+}
+
+unsigned
+ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->ssbo_to_tex[ssbo] = tex;
+ mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
+}
+
+unsigned
+ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_ibo[image] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->image_to_ibo[image] = ibo;
+ mapping->ibo_to_image[ibo] = image;
+ }
+ return mapping->image_to_ibo[image];
+}
+
+unsigned
+ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_tex[image] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->image_to_tex[image] = tex;
+ mapping->tex_to_image[tex] = image;
+ }
+ return mapping->image_to_tex[image] + mapping->tex_base;
+}
+
+/* Helper to parse the deref for an image to get image slot. This should be
+ * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo().
*/
unsigned
-ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
+ir3_get_image_slot(nir_deref_instr *deref)
{
unsigned int loc = 0;
unsigned inner_size = 1;
loc += deref->var->data.driver_location;
- /* TODO figure out real limit per generation, and don't hardcode: */
- const unsigned max_samplers = 16;
- return max_samplers - loc - 1;
+ return loc;
}
/* see tex_info() for equiv logic for texture instructions.. it would be
#include "ir3_context.h"
-unsigned ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref);
+
+void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
+unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image);
+unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
+
+unsigned ir3_get_image_slot(nir_deref_instr *deref);
unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp);
type_t ir3_get_image_type(const nir_variable *var);
unsigned ir3_get_num_components_for_glformat(GLuint format);
/* TODO */
break;
}
-
}
+/**
+ * On a4xx+a5xx, Images share state with textures and SSBOs:
+ *
+ * + Uses texture (cat5) state/instruction (isam) to read
+ * + Uses SSBO state and instructions (cat6) to write and for atomics
+ *
+ * Starting with a6xx, Images and SSBOs are basically the same thing,
+ * with texture state and isam also used for SSBO reads.
+ *
+ * On top of that, gallium makes the SSBO (shader_buffers) state semi
+ * sparse, with the first half of the state space used for atomic
+ * counters lowered to atomic buffers. We could ignore this, but I
+ * don't think we could *really* handle the case of a single shader
+ * that used the max # of textures + images + SSBOs. And once we are
+ * offsetting images by num_ssbos (or visa versa) to map them into
+ * the same hardware state, the hardware state has become coupled to
+ * the shader state, so at this point we might as well just use a
+ * mapping table to remap things from image/SSBO idx to hw idx.
+ *
+ * To make things less (more?) confusing, for the hw "SSBO" state
+ * (since it is really both SSBO and Image) I'll use the name "IBO"
+ */
+struct ir3_ibo_mapping {
+#define IBO_INVALID 0xff
+ /* Maps logical SSBO state to hw state: */
+ uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS];
+ uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS];
+
+ /* Maps logical Image state to hw state: */
+ uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES];
+ uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES];
+
+ /* Maps hw state back to logical SSBO or Image state:
+ *
+ * note IBO_SSBO ORd into values to indicate that the
+ * hw slot is used for SSBO state vs Image state.
+ */
+#define IBO_SSBO 0x80
+ uint8_t ibo_to_image[32];
+ uint8_t tex_to_image[32];
+
+ uint8_t num_ibo;
+ uint8_t num_tex; /* including real textures */
+ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
+};
+
struct ir3_shader_variant {
struct fd_bo *bo;
*/
unsigned varying_in;
+ /* Remapping table to map Image and SSBO to hw state: */
+ struct ir3_ibo_mapping image_mapping;
+
/* number of samplers/textures (which are currently 1:1): */
int num_samp;
static void
emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+ enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
+ const struct ir3_shader_variant *v)
{
unsigned count = util_last_bit(so->enabled_mask);
+ const struct ir3_ibo_mapping *m = &v->image_mapping;
- if (count == 0)
- return;
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ unsigned slot = m->ssbo_to_ibo[i];
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
unsigned sz = buf->buffer_size;
OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
- }
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < count; i++) {
- struct pipe_shader_buffer *buf = &so->sb[i];
+ OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
if (buf->buffer) {
struct fd_resource *rsc = fd_resource(buf->buffer);
OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
emit_border_color(ctx, ring);
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+ emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT], fp);
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
- fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+ fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
}
void
~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
if (dirty & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
+ emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE], cp);
if (dirty & FD_DIRTY_SHADER_IMAGE)
- fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+ fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
}
}
-/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
- * are placedd, in reverse order, at the end of the state block, so for
- * example the sampler state:
- *
- * 0: first texture
- * 1: second texture
- * ....
- * N-1: second image
- * N: first image
- */
-static unsigned
-get_image_slot(unsigned index)
-{
- /* TODO figure out real limit per generation, and don't hardcode.
- * This needs to match get_image_slot() in ir3_compiler_nir.
- * Possibly should be factored out into shared helper?
- */
- const unsigned max_samplers = 16;
- return max_samplers - index - 1;
-}
-
/* Emit required "SSBO" and sampler state. The sampler state is used by the
* hw for imageLoad(), and "SSBO" state for imageStore(). Returns max sampler
* used.
*/
void
fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type shader)
+ enum pipe_shader_type shader, const struct ir3_shader_variant *v)
{
struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
unsigned enabled_mask = so->enabled_mask;
+ const struct ir3_ibo_mapping *m = &v->image_mapping;
while (enabled_mask) {
unsigned index = u_bit_scan(&enabled_mask);
- unsigned slot = get_image_slot(index);
struct fd5_image img;
translate_image(&img, &so->si[index]);
- emit_image_tex(ring, slot, &img, shader);
- emit_image_ssbo(ring, slot, &img, shader);
+ emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
+ emit_image_ssbo(ring, m->image_to_ibo[index], &img, shader);
}
}
#include "freedreno_context.h"
+struct ir3_shader_variant;
void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type shader);
+ enum pipe_shader_type shader, const struct ir3_shader_variant *v);
#endif /* FD5_IMAGE_H_ */