freedreno/a6xx: use single format enum
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_image.c
index f7419d8d9ac234ce5f0eadf941d6ea74eeced0c0..6ac3cc5c4c9dc111b5a6c706d8b39e7bffad632e 100644 (file)
 #include "pipe/p_state.h"
 
 #include "freedreno_resource.h"
+#include "freedreno_state.h"
+
 #include "fd6_image.h"
 #include "fd6_format.h"
+#include "fd6_resource.h"
 #include "fd6_texture.h"
 
-static enum a6xx_state_block texsb[] = {
-       [PIPE_SHADER_COMPUTE] = SB6_CS_TEX,
-       [PIPE_SHADER_FRAGMENT] = SB6_FS_TEX,
-};
-
-static enum a6xx_state_block imgsb[] = {
-       [PIPE_SHADER_COMPUTE] = SB6_CS_SSBO,
-       [PIPE_SHADER_FRAGMENT] = SB6_SSBO,
-};
-
 struct fd6_image {
        struct pipe_resource *prsc;
        enum pipe_format pfmt;
-       enum a6xx_tex_fmt fmt;
+       enum a6xx_format fmt;
        enum a6xx_tex_fetchsize fetchsize;
        enum a6xx_tex_type type;
        bool srgb;
        uint32_t cpp;
+       uint32_t level;
        uint32_t width;
        uint32_t height;
        uint32_t depth;
        uint32_t pitch;
        uint32_t array_pitch;
        struct fd_bo *bo;
+       uint32_t ubwc_offset;
        uint32_t offset;
+       bool buffer;
 };
 
-static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
+static void translate_image(struct fd6_image *img, const struct pipe_image_view *pimg)
 {
        enum pipe_format format = pimg->format;
        struct pipe_resource *prsc = pimg->resource;
        struct fd_resource *rsc = fd_resource(prsc);
-       unsigned lvl;
 
-       if (!pimg->resource) {
+       if (!prsc) {
                memset(img, 0, sizeof(*img));
                return;
        }
@@ -77,52 +72,119 @@ static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
        img->fetchsize = fd6_pipe2fetchsize(format);
        img->type      = fd6_tex_type(prsc->target);
        img->srgb      = util_format_is_srgb(format);
-       img->cpp       = rsc->cpp;
+       img->cpp       = rsc->layout.cpp;
        img->bo        = rsc->bo;
 
+       /* Treat cube textures as 2d-array: */
+       if (img->type == A6XX_TEX_CUBE)
+               img->type = A6XX_TEX_2D;
+
        if (prsc->target == PIPE_BUFFER) {
-               lvl = 0;
+               img->buffer = true;
+               img->ubwc_offset = 0;    /* not valid for buffers */
                img->offset = pimg->u.buf.offset;
-               img->pitch  = pimg->u.buf.size;
+               img->pitch  = 0;
                img->array_pitch = 0;
+
+               /* size is encoded with low 15b in WIDTH and high bits in
+                * HEIGHT, in units of elements:
+                */
+               unsigned sz = prsc->width0;
+               img->width  = sz & MASK(15);
+               img->height = sz >> 15;
+               img->depth  = 0;
        } else {
-               lvl = pimg->u.tex.level;
-               img->offset = rsc->slices[lvl].offset;
-               img->pitch  = rsc->slices[lvl].pitch * rsc->cpp;
-               img->array_pitch = rsc->layer_size;
-       }
+               img->buffer = false;
+
+               unsigned lvl = pimg->u.tex.level;
+               struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
+               unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
 
-       img->width     = u_minify(prsc->width0, lvl);
-       img->height    = u_minify(prsc->height0, lvl);
-       img->depth     = u_minify(prsc->depth0, lvl);
+               img->ubwc_offset = fd_resource_ubwc_offset(rsc, lvl, pimg->u.tex.first_layer);
+               img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
+               img->pitch  = slice->pitch * rsc->layout.cpp;
+
+               switch (prsc->target) {
+               case PIPE_TEXTURE_RECT:
+               case PIPE_TEXTURE_1D:
+               case PIPE_TEXTURE_2D:
+                       img->array_pitch = rsc->layout.layer_size;
+                       img->depth = 1;
+                       break;
+               case PIPE_TEXTURE_1D_ARRAY:
+               case PIPE_TEXTURE_2D_ARRAY:
+               case PIPE_TEXTURE_CUBE:
+               case PIPE_TEXTURE_CUBE_ARRAY:
+                       img->array_pitch = rsc->layout.layer_size;
+                       // TODO the CUBE/CUBE_ARRAY might need to be layers/6 for tex state,
+                       // but empirically for ibo state it shouldn't be divided.
+                       img->depth = layers;
+                       break;
+               case PIPE_TEXTURE_3D:
+                       img->array_pitch = slice->size0;
+                       img->depth  = u_minify(prsc->depth0, lvl);
+                       break;
+               default:
+                       break;
+               }
+
+               img->level  = lvl;
+               img->width  = u_minify(prsc->width0, lvl);
+               img->height = u_minify(prsc->height0, lvl);
+       }
 }
 
-static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
-               struct fd6_image *img, enum pipe_shader_type shader)
+static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg)
 {
-       unsigned opcode = CP_LOAD_STATE6_FRAG;
+       enum pipe_format format = PIPE_FORMAT_R32_UINT;
+       struct pipe_resource *prsc = pimg->buffer;
+       struct fd_resource *rsc = fd_resource(prsc);
 
-       assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+       if (!prsc) {
+               memset(img, 0, sizeof(*img));
+               return;
+       }
+
+       img->prsc      = prsc;
+       img->pfmt      = format;
+       img->fmt       = fd6_pipe2tex(format);
+       img->fetchsize = fd6_pipe2fetchsize(format);
+       img->type      = fd6_tex_type(prsc->target);
+       img->srgb      = util_format_is_srgb(format);
+       img->cpp       = rsc->layout.cpp;
+       img->bo        = rsc->bo;
+       img->buffer    = true;
+
+       img->ubwc_offset = 0;    /* not valid for buffers */
+       img->offset = pimg->buffer_offset;
+       img->pitch  = 0;
+       img->array_pitch = 0;
+
+       /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
+        * in units of elements:
+        */
+       unsigned sz = pimg->buffer_size / 4;
+       img->width  = sz & MASK(15);
+       img->height = sz >> 15;
+       img->depth  = 0;
+}
 
-       OUT_PKT7(ring, opcode, 3 + 12);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-
-       OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) |
-               fd6_tex_swiz(img->prsc, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
-                       PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
-               COND(img->srgb, A6XX_TEX_CONST_0_SRGB));
+static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
+       struct fd_resource *rsc = fd_resource(img->prsc);
+       bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
+
+       OUT_RING(ring, fd6_tex_const_0(img->prsc, img->level, img->pfmt,
+                       PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+                       PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
        OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
                A6XX_TEX_CONST_1_HEIGHT(img->height));
        OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) |
+               COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
                A6XX_TEX_CONST_2_TYPE(img->type) |
                A6XX_TEX_CONST_2_PITCH(img->pitch));
-       OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
+       OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch) |
+               COND(ubwc_enabled, A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL));
        if (img->bo) {
                OUT_RELOC(ring, img->bo, img->offset,
                                (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
@@ -130,7 +192,96 @@ static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth));
        }
+
+       OUT_RING(ring, 0x00000000);   /* texconst6 */
+
+       if (ubwc_enabled) {
+               struct fdl_slice *ubwc_slice = &rsc->layout.ubwc_slices[img->level];
+
+               uint32_t block_width, block_height;
+               fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
+               OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
+               OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
+               OUT_RING(ring,
+                               A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch) |
+                               A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(img->width, block_width))) |
+                               A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(img->height, block_height))));
+       } else {
+               OUT_RING(ring, 0x00000000);   /* texconst7 */
+               OUT_RING(ring, 0x00000000);   /* texconst8 */
+               OUT_RING(ring, 0x00000000);   /* texconst9 */
+               OUT_RING(ring, 0x00000000);   /* texconst10 */
+       }
+
+       OUT_RING(ring, 0x00000000);   /* texconst11 */
+       OUT_RING(ring, 0x00000000);   /* texconst12 */
+       OUT_RING(ring, 0x00000000);   /* texconst13 */
+       OUT_RING(ring, 0x00000000);   /* texconst14 */
+       OUT_RING(ring, 0x00000000);   /* texconst15 */
+}
+
+void
+fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg)
+{
+       struct fd6_image img;
+       translate_image(&img, pimg);
+       emit_image_tex(ring, &img);
+}
+
+void
+fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf)
+{
+       struct fd6_image img;
+       translate_buf(&img, pbuf);
+       emit_image_tex(ring, &img);
+}
+
+static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
+       /* If the SSBO isn't present (becasue gallium doesn't pack atomic
+        * counters), zero-fill the slot.
+        */
+       if (!img->prsc) {
+               for (int i = 0; i < 16; i++)
+                       OUT_RING(ring, 0);
+               return;
+       }
+
+       struct fd_resource *rsc = fd_resource(img->prsc);
+       enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level);
+       bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
+
+       OUT_RING(ring, A6XX_IBO_0_FMT(img->fmt) |
+               A6XX_IBO_0_TILE_MODE(tile_mode));
+       OUT_RING(ring, A6XX_IBO_1_WIDTH(img->width) |
+               A6XX_IBO_1_HEIGHT(img->height));
+       OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
+               COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
+               A6XX_IBO_2_TYPE(img->type));
+       OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch) |
+               COND(ubwc_enabled, A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27));
+       if (img->bo) {
+               OUT_RELOCW(ring, img->bo, img->offset,
+                       (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
+       } else {
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
+       }
        OUT_RING(ring, 0x00000000);
+
+       if (ubwc_enabled) {
+               struct fdl_slice *ubwc_slice = &rsc->layout.ubwc_slices[img->level];
+               OUT_RELOCW(ring, rsc->bo, img->ubwc_offset, 0, 0);
+               OUT_RING(ring, A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
+               OUT_RING(ring, A6XX_IBO_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch));
+       } else {
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
@@ -138,99 +289,63 @@ static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
        OUT_RING(ring, 0x00000000);
 }
 
-static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
-               struct fd6_image *img, enum pipe_shader_type shader)
+/* Build combined image/SSBO "IBO" state, returns ownership of state reference */
+struct fd_ringbuffer *
+fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
+               enum pipe_shader_type shader)
 {
-       unsigned opcode = CP_LOAD_STATE6_FRAG;
+       struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
+       struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
+
+       struct fd_ringbuffer *state =
+               fd_submit_new_ringbuffer(ctx->batch->submit,
+                               (v->shader->nir->info.num_ssbos +
+                                v->shader->nir->info.num_images) * 16 * 4,
+                               FD_RINGBUFFER_STREAMING);
 
        assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
 
-#if 0
-       OUT_PKT7(ring, opcode, 3 + 4);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(0) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0));
-       OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch));
-       OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch));
-       OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp));
-#endif
-
-#if 0
-       OUT_PKT7(ring, opcode, 3 + 2);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(1) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) |
-               A6XX_SSBO_1_0_WIDTH(img->width));
-       OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) |
-               A6XX_SSBO_1_1_DEPTH(img->depth));
-#endif
-
-       OUT_PKT7(ring, opcode, 3 + 2);
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
-               CP_LOAD_STATE6_0_STATE_TYPE(2) |
-               CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-               CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
-               CP_LOAD_STATE6_0_NUM_UNIT(1));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-       if (img->bo) {
-               OUT_RELOCW(ring, img->bo, img->offset, 0, 0);
-       } else {
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
+       for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) {
+               struct fd6_image img;
+               translate_buf(&img, &bufso->sb[i]);
+               emit_image_ssbo(state, &img);
        }
-}
 
-/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
- * are placedd, in reverse order, at the end of the state block, so for
- * example the sampler state:
- *
- *   0:   first texture
- *   1:   second texture
- *   ....
- *   N-1: second image
- *   N:   first image
- */
-static unsigned
-get_image_slot(unsigned index)
-{
-       /* TODO figure out real limit per generation, and don't hardcode.
-        * This needs to match get_image_slot() in ir3_compiler_nir.
-        * Possibly should be factored out into shared helper?
-        */
-       const unsigned max_samplers = 16;
-       return max_samplers - index - 1;
+       for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) {
+               struct fd6_image img;
+               translate_image(&img, &imgso->si[i]);
+               emit_image_ssbo(state, &img);
+       }
+
+       return state;
 }
 
-/* Emit required "SSBO" and sampler state.  The sampler state is used by the
- * hw for imageLoad(), and "SSBO" state for imageStore().  Returns max sampler
- * used.
- */
-void
-fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum pipe_shader_type shader)
+static void fd6_set_shader_images(struct pipe_context *pctx,
+               enum pipe_shader_type shader,
+               unsigned start, unsigned count,
+               const struct pipe_image_view *images)
 {
+       struct fd_context *ctx = fd_context(pctx);
        struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
-       unsigned enabled_mask = so->enabled_mask;
 
-       while (enabled_mask) {
-               unsigned index = u_bit_scan(&enabled_mask);
-               unsigned slot = get_image_slot(index);
-               struct fd6_image img;
+       fd_set_shader_images(pctx, shader, start, count, images);
+
+       if (!images)
+               return;
 
-               translate_image(&img, &so->si[index]);
+       for (unsigned i = 0; i < count; i++) {
+               unsigned n = i + start;
+               struct pipe_image_view *buf = &so->si[n];
 
-               emit_image_tex(ring, slot, &img, shader);
-               emit_image_ssbo(ring, slot, &img, shader);
+               if (!buf->resource)
+                       continue;
+
+               fd6_validate_format(ctx, fd_resource(buf->resource), buf->format);
        }
 }
+
+void
+fd6_image_init(struct pipe_context *pctx)
+{
+       pctx->set_shader_images = fd6_set_shader_images;
+}