From bedbe7f90c811dcf47975008efbd1b54dbdf2756 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 4 Nov 2017 12:52:43 -0400 Subject: [PATCH] freedreno/a5xx: image support --- .../drivers/freedreno/Makefile.sources | 2 + src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 20 +- .../drivers/freedreno/a5xx/fd5_image.c | 223 ++++++++++++++++++ .../drivers/freedreno/a5xx/fd5_image.h | 35 +++ .../drivers/freedreno/a5xx/fd5_texture.c | 24 +- .../drivers/freedreno/a5xx/fd5_texture.h | 23 ++ .../drivers/freedreno/freedreno_screen.c | 8 +- src/gallium/drivers/freedreno/meson.build | 2 + 8 files changed, 306 insertions(+), 31 deletions(-) create mode 100644 src/gallium/drivers/freedreno/a5xx/fd5_image.c create mode 100644 src/gallium/drivers/freedreno/a5xx/fd5_image.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index db716f39d3b..b109a5a7a21 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -138,6 +138,8 @@ a5xx_SOURCES := \ a5xx/fd5_format.h \ a5xx/fd5_gmem.c \ a5xx/fd5_gmem.h \ + a5xx/fd5_image.c \ + a5xx/fd5_image.h \ a5xx/fd5_program.c \ a5xx/fd5_program.h \ a5xx/fd5_query.c \ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 3d8e43ad3e4..39cc77d40a3 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -37,6 +37,7 @@ #include "fd5_emit.h" #include "fd5_blend.h" #include "fd5_context.h" +#include "fd5_image.h" #include "fd5_program.h" #include "fd5_rasterizer.h" #include "fd5_texture.h" @@ -764,10 +765,12 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) { needs_border |= emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); - OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); - OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); } + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ? + ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1); OUT_RING(ring, 0); @@ -776,6 +779,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) + fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT); } void @@ -806,13 +812,17 @@ fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1); - OUT_RING(ring, ctx->tex[PIPE_SHADER_COMPUTE].num_textures); } + OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1); + OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ? + ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures); + if (dirty & FD_DIRTY_SHADER_SSBO) emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]); + + if (dirty & FD_DIRTY_SHADER_IMAGE) + fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE); } /* emit setup at begin of new cmdstream buffer (don't rely on previous diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c new file mode 100644 index 00000000000..a945e7e5efa --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" + +#include "freedreno_resource.h" +#include "fd5_image.h" +#include "fd5_format.h" +#include "fd5_texture.h" + +static enum a4xx_state_block texsb[] = { + [PIPE_SHADER_COMPUTE] = SB4_CS_TEX, + [PIPE_SHADER_FRAGMENT] = SB4_FS_TEX, +}; + +static enum a4xx_state_block imgsb[] = { + [PIPE_SHADER_COMPUTE] = SB4_CS_SSBO, + [PIPE_SHADER_FRAGMENT] = SB4_SSBO, +}; + +struct fd5_image { + enum pipe_format pfmt; + enum a5xx_tex_fmt fmt; + enum a5xx_tex_fetchsize fetchsize; + enum a5xx_tex_type type; + bool srgb; + uint32_t cpp; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t pitch; + uint32_t array_pitch; + struct fd_bo *bo; + uint32_t offset; +}; + +static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg) +{ + enum pipe_format format = pimg->format; + struct pipe_resource *prsc = pimg->resource; + struct fd_resource *rsc = fd_resource(prsc); + unsigned lvl; + + if (!pimg->resource) { + memset(img, 0, sizeof(*img)); + return; + } + + img->pfmt = format; + img->fmt = fd5_pipe2tex(format); + img->fetchsize = fd5_pipe2fetchsize(format); + img->type = fd5_tex_type(prsc->target); + img->srgb = util_format_is_srgb(format); + img->cpp = rsc->cpp; + img->bo = rsc->bo; + + if (prsc->target == PIPE_BUFFER) { + lvl = 0; + img->offset = pimg->u.buf.offset; + img->pitch = pimg->u.buf.size; + img->array_pitch = 0; + } else { + lvl = pimg->u.tex.level; + img->offset = rsc->slices[lvl].offset; + img->pitch = rsc->slices[lvl].pitch * rsc->cpp; + img->array_pitch = rsc->layer_size; + } + + img->width = u_minify(prsc->width0, lvl); + img->height = u_minify(prsc->height0, lvl); + img->depth = u_minify(prsc->depth0, lvl); +} + +static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, + struct fd5_image *img, enum pipe_shader_type shader) +{ + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) | + CP_LOAD_STATE4_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + + OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) | + fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | + COND(img->srgb, A5XX_TEX_CONST_0_SRGB)); + OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) | + A5XX_TEX_CONST_1_HEIGHT(img->height)); + OUT_RING(ring, A5XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) | + A5XX_TEX_CONST_2_TYPE(img->type) | + A5XX_TEX_CONST_2_PITCH(img->pitch)); + OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); + if (img->bo) { + OUT_RELOC(ring, img->bo, img->offset, + (uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth)); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); +} + +static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot, + struct fd5_image *img, enum pipe_shader_type shader) +{ + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 4); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE4_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + OUT_RING(ring, A5XX_SSBO_0_0_BASE_LO(0)); + OUT_RING(ring, A5XX_SSBO_0_1_PITCH(img->pitch)); + OUT_RING(ring, A5XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch)); + OUT_RING(ring, A5XX_SSBO_0_3_CPP(img->cpp)); + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE4_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + OUT_RING(ring, A5XX_SSBO_1_0_FMT(img->fmt) | + A5XX_SSBO_1_0_WIDTH(img->width)); + OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) | + A5XX_SSBO_1_1_DEPTH(img->depth)); + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE4_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + if (img->bo) { + OUT_RELOCW(ring, img->bo, img->offset, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } +} + +/* Note that to avoid conflicts with textures and non-image "SSBO"s, images + * are placedd, in reverse order, at the end of the state block, so for + * example the sampler state: + * + * 0: first texture + * 1: second texture + * .... + * N-1: second image + * N: first image + */ +static unsigned +get_image_slot(unsigned index) +{ + /* TODO figure out real limit per generation, and don't hardcode. + * This needs to match get_image_slot() in ir3_compiler_nir. + * Possibly should be factored out into shared helper? + */ + const unsigned max_samplers = 16; + return max_samplers - index - 1; +} + +/* Emit required "SSBO" and sampler state. The sampler state is used by the + * hw for imageLoad(), and "SSBO" state for imageStore(). Returns max sampler + * used. + */ +void +fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pipe_shader_type shader) +{ + struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader]; + + so->dirty_mask &= so->enabled_mask; + + while (so->dirty_mask) { + unsigned index = u_bit_scan(&so->dirty_mask); + unsigned slot = get_image_slot(index); + struct fd5_image img; + + translate_image(&img, &so->si[index]); + + emit_image_tex(ring, slot, &img, shader); + emit_image_ssbo(ring, slot, &img, shader); + } +} + diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.h b/src/gallium/drivers/freedreno/a5xx/fd5_image.h new file mode 100644 index 00000000000..98c7faf6154 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_IMAGE_H_ +#define FD5_IMAGE_H_ + +#include "freedreno_context.h" + +void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pipe_shader_type shader); + +#endif /* FD5_IMAGE_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c index 87b69ea1c11..73ba86f0c3a 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c @@ -186,28 +186,6 @@ fd5_sampler_states_bind(struct pipe_context *pctx, } } -static enum a5xx_tex_type -tex_type(unsigned target) -{ - switch (target) { - default: - assert(0); - case PIPE_BUFFER: - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return A5XX_TEX_1D; - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_2D_ARRAY: - return A5XX_TEX_2D; - case PIPE_TEXTURE_3D: - return A5XX_TEX_3D; - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - return A5XX_TEX_CUBE; - } -} - static bool use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) { @@ -272,7 +250,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); } - so->texconst2 |= A5XX_TEX_CONST_2_TYPE(tex_type(cso->target)); + so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target)); switch (cso->target) { case PIPE_TEXTURE_1D: diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.h b/src/gallium/drivers/freedreno/a5xx/fd5_texture.h index c4d109376e1..11a3b2ee1e8 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_texture.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_texture.h @@ -67,4 +67,27 @@ unsigned fd5_get_const_idx(struct fd_context *ctx, void fd5_texture_init(struct pipe_context *pctx); + +static inline enum a5xx_tex_type +fd5_tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A5XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A5XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A5XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A5XX_TEX_CUBE; + } +} + #endif /* FD5_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 673d1e5b030..7910bbbdcd5 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -558,6 +558,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: if (is_a5xx(screen)) { /* a5xx (and a4xx for that matter) has one state-block * for compute-shader SSBO's and another that is shared @@ -576,6 +577,10 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, * * I think that way we could avoid having to patch shaders * for actual SSBO indexes by using a static partitioning. + * + * Note same state block is used for images and buffers, + * but images also need texture state for read access + * (isam/isam.3d) */ switch(shader) { @@ -587,9 +592,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, } } return 0; - case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: - /* probably should be same as MAX_SHADRER_BUFFERS but not implemented yet */ - return 0; } debug_printf("unknown shader param %d\n", param); return 0; diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 3fb94ed8cff..fe1a902e9e5 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -160,6 +160,8 @@ files_libfreedreno = files( 'a5xx/fd5_format.h', 'a5xx/fd5_gmem.c', 'a5xx/fd5_gmem.h', + 'a5xx/fd5_image.c', + 'a5xx/fd5_image.h', 'a5xx/fd5_program.c', 'a5xx/fd5_program.h', 'a5xx/fd5_query.c', -- 2.30.2