freedreno/a6xx: image/ssbo state emit
authorRob Clark <robdclark@gmail.com>
Mon, 4 Feb 2019 18:30:34 +0000 (13:30 -0500)
committerRob Clark <robdclark@gmail.com>
Sat, 16 Feb 2019 21:28:00 +0000 (16:28 -0500)
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_image.c
src/gallium/drivers/freedreno/a6xx/fd6_image.h
src/gallium/drivers/freedreno/a6xx/fd6_program.c
src/gallium/drivers/freedreno/a6xx/fd6_texture.c

index 1d2077048a4baa665f151b5adc473e27726e23e6..397c04390c7a896e6e28f4e34ab7bc6cbf690f75 100644 (file)
@@ -329,7 +329,10 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
 bool
 fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
-               unsigned bcolor_offset)
+               unsigned bcolor_offset,
+               /* can be NULL if no image/SSBO state to merge in: */
+               const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf,
+               struct fd_shaderimg_stateobj *img)
 {
        bool needs_border = false;
        unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
@@ -357,7 +360,6 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                unreachable("bad state block");
        }
 
-
        if (tex->num_samplers > 0) {
                struct fd_ringbuffer *state =
                        fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4);
@@ -388,10 +390,24 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                fd_ringbuffer_del(state);
        }
 
-       if (tex->num_textures > 0) {
+       unsigned num_merged_textures = tex->num_textures;
+       unsigned num_textures = tex->num_textures;
+       if (v) {
+               num_merged_textures += v->image_mapping.num_tex;
+
+               /* There could be more bound textures than what the shader uses.
+                * Which isn't known at shader compile time.  So in the case we
+                * are merging tex state, only emit the textures that the shader
+                * uses (since the image/SSBO related tex state comes immediately
+                * after)
+                */
+               num_textures = v->image_mapping.tex_base;
+       }
+
+       if (num_merged_textures > 0) {
                struct fd_ringbuffer *state =
-                       fd_ringbuffer_new_object(pipe, tex->num_textures * 16 * 4);
-               for (unsigned i = 0; i < tex->num_textures; i++) {
+                       fd_ringbuffer_new_object(pipe, num_merged_textures * 16 * 4);
+               for (unsigned i = 0; i < num_textures; i++) {
                        static const struct fd6_pipe_sampler_view dummy_view = {};
                        const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
                                fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view;
@@ -424,13 +440,26 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                        OUT_RING(state, 0);
                }
 
+               if (v) {
+                       const struct ir3_ibo_mapping *mapping = &v->image_mapping;
+
+                       for (unsigned i = 0; i < mapping->num_tex; i++) {
+                               unsigned idx = mapping->tex_to_image[i];
+                               if (idx & IBO_SSBO) {
+                                       fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]);
+                               } else {
+                                       fd6_emit_image_tex(state, &img->si[idx]);
+                               }
+                       }
+               }
+
                /* emit texture state: */
                OUT_PKT7(ring, opcode, 3);
                OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
                        CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
                        CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
                        CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE6_0_NUM_UNIT(tex->num_textures));
+                       CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));
                OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
 
                OUT_PKT4(ring, tex_const_reg, 2);
@@ -441,85 +470,81 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
 
        if (tex_count_reg) {
                OUT_PKT4(ring, tex_count_reg, 1);
-               OUT_RING(ring, tex->num_textures);
+               OUT_RING(ring, num_merged_textures);
        }
 
        return needs_border;
 }
 
-static void
-emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum a6xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+/* Emits combined texture state, which also includes any Image/SSBO
+ * related texture state merged in (because we must have all texture
+ * state for a given stage in a single buffer).  In the fast-path, if
+ * we don't need to merge in any image/ssbo related texture state, we
+ * just use cached texture stateobj.  Otherwise we generate a single-
+ * use stateobj.
+ *
+ * TODO Is there some sane way we can still use cached texture stateobj
+ * with image/ssbo in use?
+ *
+ * returns whether border_color is required:
+ */
+static bool
+fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
+               enum pipe_shader_type type, const struct ir3_shader_variant *v)
 {
-       unsigned count = util_last_bit(so->enabled_mask);
-       unsigned opcode;
-
-       if (count == 0)
-               return;
-
-       switch (sb) {
-       case SB6_IBO:
-       case SB6_CS_IBO:
-               opcode = CP_LOAD_STATE6_GEOM;
-               break;
-       default:
-               unreachable("bad state block");
-       }
+       struct fd_context *ctx = emit->ctx;
+       bool needs_border = false;
 
-       OUT_PKT7(ring, opcode, 3 + (4 * count));
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
-                       CP_LOAD_STATE6_0_STATE_TYPE(0) |
-                       CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-                       CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE6_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-       }
+       static const struct {
+               enum a6xx_state_block sb;
+               enum fd6_state_id state_id;
+       } s[PIPE_SHADER_TYPES] = {
+               [PIPE_SHADER_VERTEX]    = { SB6_VS_TEX, FD6_GROUP_VS_TEX },
+               [PIPE_SHADER_FRAGMENT]  = { SB6_FS_TEX, FD6_GROUP_FS_TEX },
+       };
 
-#if 0
-       OUT_PKT7(ring, opcode, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
-                       CP_LOAD_STATE6_0_STATE_TYPE(1) |
-                       CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-                       CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE6_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
-               struct pipe_shader_buffer *buf = &so->sb[i];
-               unsigned sz = buf->buffer_size;
+       debug_assert(s[type].state_id);
 
-               /* width is in dwords, overflows into height: */
-               sz /= 4;
+       if (!v->image_mapping.num_tex) {
+               /* in the fast-path, when we don't have to mix in any image/SSBO
+                * related texture state, we can just lookup the stateobj and
+                * re-emit that:
+                */
+               if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
+                               ctx->tex[type].num_textures > 0) {
+                       struct fd6_texture_state *tex = fd6_texture_state(ctx,
+                                       s[type].sb, &ctx->tex[type]);
 
-               OUT_RING(ring, A6XX_SSBO_1_0_WIDTH(sz));
-               OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(sz >> 16));
-       }
-#endif
+                       needs_border |= tex->needs_border;
 
-       OUT_PKT7(ring, opcode, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
-                       CP_LOAD_STATE6_0_STATE_TYPE(2) |
-                       CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-                       CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE6_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
-               struct pipe_shader_buffer *buf = &so->sb[i];
-               if (buf->buffer) {
-                       struct fd_resource *rsc = fd_resource(buf->buffer);
-                       OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
-               } else {
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
+                       fd6_emit_add_group(emit, tex->stateobj, s[type].state_id, 0x7);
+               }
+       } else {
+               /* In the slow-path, create a one-shot texture state object
+                * if either TEX|PROG|SSBO|IMAGE state is dirty:
+                */
+               if (ctx->dirty_shader[type] &
+                               (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
+                                FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
+                       struct fd_texture_stateobj *tex = &ctx->tex[type];
+                       struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];
+                       struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];
+                       struct fd_ringbuffer *stateobj =
+                               fd_submit_new_ringbuffer(ctx->batch->submit,
+                                       0x1000, FD_RINGBUFFER_STREAMING);
+                       unsigned bcolor_offset =
+                               fd6_border_color_offset(ctx, s[type].sb, tex);
+
+                       needs_border |= fd6_emit_textures(ctx->pipe, stateobj, s[type].sb, tex,
+                                       bcolor_offset, v, buf, img);
+
+                       fd6_emit_add_group(emit, stateobj, s[type].state_id, 0x7);
+
+                       fd_ringbuffer_del(stateobj);
                }
        }
+
+       return needs_border;
 }
 
 static struct fd_ringbuffer *
@@ -906,34 +931,38 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
                OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
        }
 
-       if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) &&
-                       ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) {
-               struct fd6_texture_state *tex = fd6_texture_state(ctx,
-                               SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
-
-               needs_border |= tex->needs_border;
-
-               fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7);
-       }
+       needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vp);
+       needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fp);
 
-       if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) &&
-                       ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) {
-               struct fd6_texture_state *tex = fd6_texture_state(ctx,
-                               SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+       if (needs_border)
+               emit_border_color(ctx, ring);
 
-               needs_border |= tex->needs_border;
+       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] &
+                       (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
+               struct fd_ringbuffer *state =
+                       fd6_build_ibo_state(ctx, fp, PIPE_SHADER_FRAGMENT);
+               struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(
+                       ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
+               const struct ir3_ibo_mapping *mapping = &fp->image_mapping;
 
-               fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7);
-       }
+               OUT_PKT7(obj, CP_LOAD_STATE6, 3);
+               OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) |
+                       CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+                       CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+                       CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
+                       CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo));
+               OUT_RB(obj, state);
 
-       if (needs_border)
-               emit_border_color(ctx, ring);
+               OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2);
+               OUT_RB(obj, state);
 
-       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB6_IBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+               OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1);
+               OUT_RING(obj, mapping->num_ibo);
 
-       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
-               fd6_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+               fd6_emit_add_group(emit, obj, FD6_GROUP_IBO, 0x7);
+               fd_ringbuffer_del(obj);
+               fd_ringbuffer_del(state);
+       }
 
        if (emit->num_groups > 0) {
                OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
@@ -970,7 +999,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if (dirty & FD_DIRTY_SHADER_TEX) {
                bool needs_border = false;
                needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX,
-                               &ctx->tex[PIPE_SHADER_COMPUTE], 0);
+                               &ctx->tex[PIPE_SHADER_COMPUTE], 0, NULL, NULL, NULL);
 
                if (needs_border)
                        emit_border_color(ctx, ring);
@@ -999,11 +1028,11 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
 #endif
 
-       if (dirty & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB6_CS_IBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
-
-       if (dirty & FD_DIRTY_SHADER_IMAGE)
-               fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+//     if (dirty & FD_DIRTY_SHADER_SSBO)
+//             fd6_emit_ssbos(ctx, ring, PIPE_SHADER_COMPUTE);
+//
+//     if (dirty & FD_DIRTY_SHADER_IMAGE)
+//             fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
 }
 
 
index 9e578844741e4228137f157782be62055c099adf..ab7ace4f26f6d515d09728bb8f34bb82841bdbcc 100644 (file)
@@ -53,6 +53,7 @@ enum fd6_state_id {
        FD6_GROUP_FS_CONST,
        FD6_GROUP_VS_TEX,
        FD6_GROUP_FS_TEX,
+       FD6_GROUP_IBO,
        FD6_GROUP_RASTERIZER,
        FD6_GROUP_ZSA,
 };
@@ -173,7 +174,9 @@ fd6_stage2shadersb(gl_shader_stage type)
 
 bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
-               unsigned bcolor_offset);
+               unsigned bcolor_offset,
+               const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf,
+               struct fd_shaderimg_stateobj *img);
 
 void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit);
 
index f4e3492154dccaad6dfaed8e24e1e10187d20244..153c00435f60599ef67774f72529c8efa46026b6 100644 (file)
 #include "fd6_format.h"
 #include "fd6_texture.h"
 
-static enum a6xx_state_block texsb[] = {
-       [PIPE_SHADER_COMPUTE] = SB6_CS_TEX,
-       [PIPE_SHADER_FRAGMENT] = SB6_FS_TEX,
-};
-
-static enum a6xx_state_block imgsb[] = {
-       [PIPE_SHADER_COMPUTE] = SB6_CS_IBO,
-       [PIPE_SHADER_FRAGMENT] = SB6_IBO,
-};
-
 struct fd6_image {
        struct pipe_resource *prsc;
        enum pipe_format pfmt;
@@ -57,16 +47,16 @@ struct fd6_image {
        uint32_t array_pitch;
        struct fd_bo *bo;
        uint32_t offset;
+       bool buffer;
 };
 
-static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
+static void translate_image(struct fd6_image *img, const struct pipe_image_view *pimg)
 {
        enum pipe_format format = pimg->format;
        struct pipe_resource *prsc = pimg->resource;
        struct fd_resource *rsc = fd_resource(prsc);
-       unsigned lvl;
 
-       if (!pimg->resource) {
+       if (!prsc) {
                memset(img, 0, sizeof(*img));
                return;
        }
@@ -81,45 +71,76 @@ static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
        img->bo        = rsc->bo;
 
        if (prsc->target == PIPE_BUFFER) {
-               lvl = 0;
+               img->buffer = true;
                img->offset = pimg->u.buf.offset;
-               img->pitch  = pimg->u.buf.size;
+               img->pitch  = 0;
                img->array_pitch = 0;
+
+               /* size is encoded with low 15b in WIDTH and high bits in
+                * HEIGHT, in units of elements:
+                */
+               unsigned sz = prsc->width0;
+               img->width  = sz & MASK(15);
+               img->height = sz >> 15;
+               img->depth  = 0;
        } else {
-               lvl = pimg->u.tex.level;
+               img->buffer = false;
+               unsigned lvl = pimg->u.tex.level;
                img->offset = rsc->slices[lvl].offset;
                img->pitch  = rsc->slices[lvl].pitch * rsc->cpp;
                img->array_pitch = rsc->layer_size;
-       }
 
-       img->width     = u_minify(prsc->width0, lvl);
-       img->height    = u_minify(prsc->height0, lvl);
-       img->depth     = u_minify(prsc->depth0, lvl);
+               img->width  = u_minify(prsc->width0, lvl);
+               img->height = u_minify(prsc->height0, lvl);
+               img->depth  = u_minify(prsc->depth0, lvl);
+       }
 }
 
-static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
-               struct fd6_image *img, enum pipe_shader_type shader)
+static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg)
 {
-       unsigned opcode = CP_LOAD_STATE6_FRAG;
+       enum pipe_format format = PIPE_FORMAT_R32_UINT;
+       struct pipe_resource *prsc = pimg->buffer;
+       struct fd_resource *rsc = fd_resource(prsc);
 
-       assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+       if (!prsc) {
+               memset(img, 0, sizeof(*img));
+               return;
+       }
+
+       img->prsc      = prsc;
+       img->pfmt      = format;
+       img->fmt       = fd6_pipe2tex(format);
+       img->fetchsize = fd6_pipe2fetchsize(format);
+       img->type      = fd6_tex_type(prsc->target);
+       img->srgb      = util_format_is_srgb(format);
+       img->cpp       = rsc->cpp;
+       img->bo        = rsc->bo;
+       img->buffer    = true;
 
-       OUT_PKT7(ring, opcode, 3 + 12);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+       img->offset = pimg->buffer_offset;
+       img->pitch  = 0;
+       img->array_pitch = 0;
 
+       /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
+        * in units of elements:
+        */
+       unsigned sz = pimg->buffer_size / 4;
+       img->width  = sz & MASK(15);
+       img->height = sz >> 15;
+       img->depth  = 0;
+}
+
+static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
        OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) |
+               A6XX_TEX_CONST_0_TILE_MODE(fd_resource(img->prsc)->tile_mode) |
                fd6_tex_swiz(img->prsc, img->fmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
                        PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
                COND(img->srgb, A6XX_TEX_CONST_0_SRGB));
        OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
                A6XX_TEX_CONST_1_HEIGHT(img->height));
        OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) |
+               COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
                A6XX_TEX_CONST_2_TYPE(img->type) |
                A6XX_TEX_CONST_2_PITCH(img->pitch));
        OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
@@ -136,101 +157,85 @@ static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
 }
 
-static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
-               struct fd6_image *img, enum pipe_shader_type shader)
+void
+fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg)
 {
-       unsigned opcode = CP_LOAD_STATE6_FRAG;
+       struct fd6_image img;
+       translate_image(&img, pimg);
+       emit_image_tex(ring, &img);
+}
 
-       assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+void
+fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf)
+{
+       struct fd6_image img;
+       translate_buf(&img, pbuf);
+       emit_image_tex(ring, &img);
+}
 
-#if 0
-       OUT_PKT7(ring, opcode, 3 + 4);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(0) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0));
-       OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch));
-       OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch));
-       OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp));
-#endif
-
-#if 0
-       OUT_PKT7(ring, opcode, 3 + 2);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(1) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) |
-               A6XX_SSBO_1_0_WIDTH(img->width));
-       OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) |
-               A6XX_SSBO_1_1_DEPTH(img->depth));
-#endif
-
-       OUT_PKT7(ring, opcode, 3 + 2);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(2) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
+       debug_assert(fd_resource(img->prsc)->tile_mode == 0);
+
+       OUT_RING(ring, A6XX_IBO_0_FMT(img->fmt));
+       OUT_RING(ring, A6XX_IBO_1_WIDTH(img->width) |
+               A6XX_IBO_1_HEIGHT(img->height));
+       OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
+               COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
+               A6XX_IBO_2_TYPE(img->type));
+       OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch));
        if (img->bo) {
-               OUT_RELOCW(ring, img->bo, img->offset, 0, 0);
+               OUT_RELOCW(ring, img->bo, img->offset,
+                       (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
        } else {
                OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
        }
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
 }
 
-/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
- * are placedd, in reverse order, at the end of the state block, so for
- * example the sampler state:
- *
- *   0:   first texture
- *   1:   second texture
- *   ....
- *   N-1: second image
- *   N:   first image
- */
-static unsigned
-get_image_slot(unsigned index)
-{
-       /* TODO figure out real limit per generation, and don't hardcode.
-        * This needs to match get_image_slot() in ir3_compiler_nir.
-        * Possibly should be factored out into shared helper?
-        */
-       const unsigned max_samplers = 16;
-       return max_samplers - index - 1;
-}
-
-/* Emit required "SSBO" and sampler state.  The sampler state is used by the
- * hw for imageLoad(), and "SSBO" state for imageStore().  Returns max sampler
- * used.
- */
-void
-fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
+/* Build combined image/SSBO "IBO" state, returns ownership of state reference */
+struct fd_ringbuffer *
+fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
                enum pipe_shader_type shader)
 {
-       struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
-       unsigned enabled_mask = so->enabled_mask;
+       struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
+       struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
+       const struct ir3_ibo_mapping *mapping = &v->image_mapping;
+
+       struct fd_ringbuffer *state =
+               fd_submit_new_ringbuffer(ctx->batch->submit,
+                       mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING);
 
-       while (enabled_mask) {
-               unsigned index = u_bit_scan(&enabled_mask);
-               unsigned slot = get_image_slot(index);
+       assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+
+       for (unsigned i = 0; i < mapping->num_ibo; i++) {
                struct fd6_image img;
+               unsigned idx = mapping->ibo_to_image[i];
 
-               translate_image(&img, &so->si[index]);
+               if (idx & IBO_SSBO) {
+                       translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]);
+               } else {
+                       translate_image(&img, &imgso->si[idx]);
+               }
 
-               emit_image_tex(ring, slot, &img, shader);
-               emit_image_ssbo(ring, slot, &img, shader);
+               emit_image_ssbo(state, &img);
        }
+
+       return state;
 }
index 0ee539327375a3c1db1c3011fd21d1944ba92e51..a2dbfd3c1a8b41c231ffef3feed918e9ab52d839 100644 (file)
 
 #include "freedreno_context.h"
 
-void fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum pipe_shader_type shader);
+void fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg);
+void fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf);
+
+struct ir3_shader_variant;
+struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx,
+               const struct ir3_shader_variant *v, enum pipe_shader_type shader);
 
 #endif /* FD6_IMAGE_H_ */
index 9180154ed68e8befba073e820f6625b435b3ccaa..a9d8384aff71f99b6650429bcea8e789e9cecc0e 100644 (file)
@@ -349,6 +349,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
 
        OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2);
        OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) |
+                        A6XX_SP_VS_CONFIG_NIBO(s[VS].v->image_mapping.num_ibo) |
                         A6XX_SP_VS_CONFIG_NTEX(s[VS].v->num_samp) |
                         A6XX_SP_VS_CONFIG_NSAMP(s[VS].v->num_samp));     /* SP_VS_CONFIG */
        OUT_RING(ring, s[VS].instrlen);                                                   /* SP_VS_INSTRLEN */
@@ -382,6 +383,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
 
        OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2);
        OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) |
+                        A6XX_SP_FS_CONFIG_NIBO(s[FS].v->image_mapping.num_ibo) |
                         A6XX_SP_FS_CONFIG_NTEX(s[FS].v->num_samp) |
                         A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp));     /* SP_FS_CONFIG */
        OUT_RING(ring, s[FS].instrlen);                                                   /* SP_FS_INSTRLEN */
index f4bad031e6b53864edab6d4b84b76198a852b5e9..171a016d985e384908f80968c6299308d4ada2d8 100644 (file)
@@ -448,7 +448,8 @@ fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb,
        state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
        state->needs_border = needs_border;
 
-       fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset);
+       fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset,
+                       NULL, NULL, NULL);
 
        /* NOTE: uses copy of key in state obj, because pointer passed by caller
         * is probably on the stack