freedreno/a5xx: image support
authorRob Clark <robdclark@gmail.com>
Sat, 4 Nov 2017 16:52:43 +0000 (12:52 -0400)
committerRob Clark <robdclark@gmail.com>
Sun, 12 Nov 2017 17:28:59 +0000 (12:28 -0500)
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a5xx/fd5_emit.c
src/gallium/drivers/freedreno/a5xx/fd5_image.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_image.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_texture.c
src/gallium/drivers/freedreno/a5xx/fd5_texture.h
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/meson.build

index db716f39d3bce28126d5661b126118b7c2942b28..b109a5a7a21ff3c3cb0eaed28b3543b6c96231d2 100644 (file)
@@ -138,6 +138,8 @@ a5xx_SOURCES := \
        a5xx/fd5_format.h \
        a5xx/fd5_gmem.c \
        a5xx/fd5_gmem.h \
+       a5xx/fd5_image.c \
+       a5xx/fd5_image.h \
        a5xx/fd5_program.c \
        a5xx/fd5_program.h \
        a5xx/fd5_query.c \
index 3d8e43ad3e4d6e76c29d98229a3754c977f088f9..39cc77d40a35569b3574a12b058a6fe7d2c5b5ae 100644 (file)
@@ -37,6 +37,7 @@
 #include "fd5_emit.h"
 #include "fd5_blend.h"
 #include "fd5_context.h"
+#include "fd5_image.h"
 #include "fd5_program.h"
 #include "fd5_rasterizer.h"
 #include "fd5_texture.h"
@@ -764,10 +765,12 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
                needs_border |= emit_textures(ctx, ring, SB4_FS_TEX,
                                &ctx->tex[PIPE_SHADER_FRAGMENT]);
-               OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
-               OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
        }
 
+       OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
+       OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
+                       ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+
        OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
        OUT_RING(ring, 0);
 
@@ -776,6 +779,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
                emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+
+       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
+               fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
 }
 
 void
@@ -806,13 +812,17 @@ fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
                OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
                OUT_RING(ring, 0);
-
-               OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
-               OUT_RING(ring, ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
        }
 
+       OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
+       OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ?
+                       ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
+
        if (dirty & FD_DIRTY_SHADER_SSBO)
                emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
+
+       if (dirty & FD_DIRTY_SHADER_IMAGE)
+               fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
 }
 
 /* emit setup at begin of new cmdstream buffer (don't rely on previous
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
new file mode 100644 (file)
index 0000000..a945e7e
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+
+#include "freedreno_resource.h"
+#include "fd5_image.h"
+#include "fd5_format.h"
+#include "fd5_texture.h"
+
+static enum a4xx_state_block texsb[] = {
+       [PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
+       [PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
+};
+
+static enum a4xx_state_block imgsb[] = {
+       [PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
+       [PIPE_SHADER_FRAGMENT] = SB4_SSBO,
+};
+
+struct fd5_image {
+       enum pipe_format pfmt;
+       enum a5xx_tex_fmt fmt;
+       enum a5xx_tex_fetchsize fetchsize;
+       enum a5xx_tex_type type;
+       bool srgb;
+       uint32_t cpp;
+       uint32_t width;
+       uint32_t height;
+       uint32_t depth;
+       uint32_t pitch;
+       uint32_t array_pitch;
+       struct fd_bo *bo;
+       uint32_t offset;
+};
+
+static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
+{
+       enum pipe_format format = pimg->format;
+       struct pipe_resource *prsc = pimg->resource;
+       struct fd_resource *rsc = fd_resource(prsc);
+       unsigned lvl;
+
+       if (!pimg->resource) {
+               memset(img, 0, sizeof(*img));
+               return;
+       }
+
+       img->pfmt      = format;
+       img->fmt       = fd5_pipe2tex(format);
+       img->fetchsize = fd5_pipe2fetchsize(format);
+       img->type      = fd5_tex_type(prsc->target);
+       img->srgb      = util_format_is_srgb(format);
+       img->cpp       = rsc->cpp;
+       img->bo        = rsc->bo;
+
+       if (prsc->target == PIPE_BUFFER) {
+               lvl = 0;
+               img->offset = pimg->u.buf.offset;
+               img->pitch  = pimg->u.buf.size;
+               img->array_pitch = 0;
+       } else {
+               lvl = pimg->u.tex.level;
+               img->offset = rsc->slices[lvl].offset;
+               img->pitch  = rsc->slices[lvl].pitch * rsc->cpp;
+               img->array_pitch = rsc->layer_size;
+       }
+
+       img->width     = u_minify(prsc->width0, lvl);
+       img->height    = u_minify(prsc->height0, lvl);
+       img->depth     = u_minify(prsc->depth0, lvl);
+}
+
+static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
+               struct fd5_image *img, enum pipe_shader_type shader)
+{
+       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+               CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
+               CP_LOAD_STATE4_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
+               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
+       OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
+               fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+                       PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
+               COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
+       OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
+               A5XX_TEX_CONST_1_HEIGHT(img->height));
+       OUT_RING(ring, A5XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) |
+               A5XX_TEX_CONST_2_TYPE(img->type) |
+               A5XX_TEX_CONST_2_PITCH(img->pitch));
+       OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
+       if (img->bo) {
+               OUT_RELOC(ring, img->bo, img->offset,
+                               (uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
+       } else {
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
+       }
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+}
+
+static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
+               struct fd5_image *img, enum pipe_shader_type shader)
+{
+       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 4);
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+               CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
+               CP_LOAD_STATE4_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
+               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+       OUT_RING(ring, A5XX_SSBO_0_0_BASE_LO(0));
+       OUT_RING(ring, A5XX_SSBO_0_1_PITCH(img->pitch));
+       OUT_RING(ring, A5XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch));
+       OUT_RING(ring, A5XX_SSBO_0_3_CPP(img->cpp));
+
+       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+               CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
+               CP_LOAD_STATE4_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+       OUT_RING(ring, A5XX_SSBO_1_0_FMT(img->fmt) |
+               A5XX_SSBO_1_0_WIDTH(img->width));
+       OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
+               A5XX_SSBO_1_1_DEPTH(img->depth));
+
+       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+               CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
+               CP_LOAD_STATE4_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+       if (img->bo) {
+               OUT_RELOCW(ring, img->bo, img->offset, 0, 0);
+       } else {
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+}
+
+/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
+ * are placedd, in reverse order, at the end of the state block, so for
+ * example the sampler state:
+ *
+ *   0:   first texture
+ *   1:   second texture
+ *   ....
+ *   N-1: second image
+ *   N:   first image
+ */
+static unsigned
+get_image_slot(unsigned index)
+{
+       /* TODO figure out real limit per generation, and don't hardcode.
+        * This needs to match get_image_slot() in ir3_compiler_nir.
+        * Possibly should be factored out into shared helper?
+        */
+       const unsigned max_samplers = 16;
+       return max_samplers - index - 1;
+}
+
+/* Emit required "SSBO" and sampler state.  The sampler state is used by the
+ * hw for imageLoad(), and "SSBO" state for imageStore().  Returns max sampler
+ * used.
+ */
+void
+fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum pipe_shader_type shader)
+{
+       struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
+
+       so->dirty_mask &= so->enabled_mask;
+
+       while (so->dirty_mask) {
+               unsigned index = u_bit_scan(&so->dirty_mask);
+               unsigned slot = get_image_slot(index);
+               struct fd5_image img;
+
+               translate_image(&img, &so->si[index]);
+
+               emit_image_tex(ring, slot, &img, shader);
+               emit_image_ssbo(ring, slot, &img, shader);
+       }
+}
+
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.h b/src/gallium/drivers/freedreno/a5xx/fd5_image.h
new file mode 100644 (file)
index 0000000..98c7faf
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_IMAGE_H_
+#define FD5_IMAGE_H_
+
+#include "freedreno_context.h"
+
+void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum pipe_shader_type shader);
+
+#endif /* FD5_IMAGE_H_ */
index 87b69ea1c112a6402a1696214c2bcc0ad6bc9958..73ba86f0c3a3cf85633afb66150d4cb6fd6490f9 100644 (file)
@@ -186,28 +186,6 @@ fd5_sampler_states_bind(struct pipe_context *pctx,
        }
 }
 
-static enum a5xx_tex_type
-tex_type(unsigned target)
-{
-       switch (target) {
-       default:
-               assert(0);
-       case PIPE_BUFFER:
-       case PIPE_TEXTURE_1D:
-       case PIPE_TEXTURE_1D_ARRAY:
-               return A5XX_TEX_1D;
-       case PIPE_TEXTURE_RECT:
-       case PIPE_TEXTURE_2D:
-       case PIPE_TEXTURE_2D_ARRAY:
-               return A5XX_TEX_2D;
-       case PIPE_TEXTURE_3D:
-               return A5XX_TEX_3D;
-       case PIPE_TEXTURE_CUBE:
-       case PIPE_TEXTURE_CUBE_ARRAY:
-               return A5XX_TEX_CUBE;
-       }
-}
-
 static bool
 use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
 {
@@ -272,7 +250,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
                so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
        }
 
-       so->texconst2 |= A5XX_TEX_CONST_2_TYPE(tex_type(cso->target));
+       so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
 
        switch (cso->target) {
        case PIPE_TEXTURE_1D:
index c4d109376e176eb78f4b5049f5402544b0cd119a..11a3b2ee1e847e95d4018f6e911342dea3b7c847 100644 (file)
@@ -67,4 +67,27 @@ unsigned fd5_get_const_idx(struct fd_context *ctx,
 
 void fd5_texture_init(struct pipe_context *pctx);
 
+
+static inline enum a5xx_tex_type
+fd5_tex_type(unsigned target)
+{
+       switch (target) {
+       default:
+               assert(0);
+       case PIPE_BUFFER:
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_1D_ARRAY:
+               return A5XX_TEX_1D;
+       case PIPE_TEXTURE_RECT:
+       case PIPE_TEXTURE_2D:
+       case PIPE_TEXTURE_2D_ARRAY:
+               return A5XX_TEX_2D;
+       case PIPE_TEXTURE_3D:
+               return A5XX_TEX_3D;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               return A5XX_TEX_CUBE;
+       }
+}
+
 #endif /* FD5_TEXTURE_H_ */
index 673d1e5b030cb4f29799bace078e563cf5ab9fc0..7910bbbdcd511e3d31e0f6a0c49dd90f79e272da 100644 (file)
@@ -558,6 +558,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
                return 0;
        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+       case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
                if (is_a5xx(screen)) {
                        /* a5xx (and a4xx for that matter) has one state-block
                         * for compute-shader SSBO's and another that is shared
@@ -576,6 +577,10 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
                         *
                         * I think that way we could avoid having to patch shaders
                         * for actual SSBO indexes by using a static partitioning.
+                        *
+                        * Note same state block is used for images and buffers,
+                        * but images also need texture state for read access
+                        * (isam/isam.3d)
                         */
                        switch(shader)
                        {
@@ -587,9 +592,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
                        }
                }
                return 0;
-       case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
-               /* probably should be same as MAX_SHADRER_BUFFERS but not implemented yet */
-               return 0;
        }
        debug_printf("unknown shader param %d\n", param);
        return 0;
index 3fb94ed8cff64bb47f543813bf40416ded933291..fe1a902e9e527e7fd0915db29511872b48e47c7e 100644 (file)
@@ -160,6 +160,8 @@ files_libfreedreno = files(
   'a5xx/fd5_format.h',
   'a5xx/fd5_gmem.c',
   'a5xx/fd5_gmem.h',
+  'a5xx/fd5_image.c',
+  'a5xx/fd5_image.h',
   'a5xx/fd5_program.c',
   'a5xx/fd5_program.h',
   'a5xx/fd5_query.c',