From 39c5a46a7a7e46fa84b21e749aa9547733f0c813 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Apr 2017 11:25:29 -0400 Subject: [PATCH] freedreno/a5xx: SSBO support To simplify things for now, since all the gfx shader stages share a single SSBO state block, only advertise SSBO support for fragment shader (and compute when we have that). We could possibly use a fixed- partitioning of the SSBO index space to support SSBOs on other stages without having to resort to shader variants. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 69 +++++++++++++++++++ .../drivers/freedreno/a5xx/fd5_program.c | 15 ++-- .../drivers/freedreno/freedreno_screen.c | 37 +++++++++- 3 files changed, 114 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 5b25257167b..a51401b4cfe 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -345,6 +345,72 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, return needs_border; } +static void +emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so) +{ + unsigned count = util_last_bit(so->enabled_mask); + + if (count == 0) + return; + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + if (buf->buffer) { + struct fd_resource *rsc = fd_resource(buf->buffer); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + + // TODO maybe offset encoded somewhere here?? + OUT_RING(ring, (buf->buffer_size << 16)); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + if (buf->buffer) { + struct fd_resource *rsc = fd_resource(buf->buffer); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } +} + void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) { @@ -663,6 +729,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (needs_border) emit_border_color(ctx, ring); + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); } /* emit setup at begin of new cmdstream buffer (don't rely on previous diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 232b3fb8775..54b5d8063c6 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -389,11 +389,16 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit) OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); - OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen)); - OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen)); - OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen)); - OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen)); - OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen)); + OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) | + COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) | + COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) | + COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) | + COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) | + COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE)); OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5); OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 93b434b0bac..15293b1b3be 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -250,6 +250,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 120; return is_ir3(screen) ? 140 : 120; + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + if (is_a5xx(screen)) + return 4; + return 0; + /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: @@ -282,7 +287,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: @@ -439,7 +443,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) static int fd_screen_get_shader_param(struct pipe_screen *pscreen, - enum pipe_shader_type shader, + enum pipe_shader_type shader, enum pipe_shader_cap param) { struct fd_screen *screen = fd_screen(pscreen); @@ -518,6 +522,35 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + if (is_a5xx(screen)) { + /* a5xx (and a4xx for that matter) has one state-block + * for compute-shader SSBO's and another that is shared + * by VS/HS/DS/GS/FS.. so to simplify things for now + * just advertise SSBOs for FS and CS. We could possibly + * do what blob does, and partition the space for + * VS/HS/DS/GS/FS. The blob advertises: + * + * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24 + * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24 + * + * I think that way we could avoid having to patch shaders + * for actual SSBO indexes by using a static partitioning. + */ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: + return 24; + default: + return 0; + } + } + return 0; case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: -- 2.30.2