From adf795432f788b33822d3a94b704be4ca536c8f1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 19 Apr 2016 09:02:23 -0400 Subject: [PATCH] freedreno/a4xx: better workaround for astc+srgb This *seems* like a hw bug, and maybe only applies to certain a4xx variants/revisions. But setting the SRGB bit in sampler view state (texconst0) causes invalid alpha for ASTC textures. Work around this setting up a second texture state and using that to sample alpha separately. This way, srgb->linear conversion happens in hw *prior* to interpolation. This fixes 546 dEQP tests: dEQP-GLES3.functional.texture.*astc*srgb* Signed-off-by: Rob Clark --- .../drivers/freedreno/a4xx/fd4_context.h | 3 + src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 11 ++- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 41 ++++++++- .../drivers/freedreno/a4xx/fd4_texture.c | 42 ++++++++- .../drivers/freedreno/a4xx/fd4_texture.h | 1 + src/gallium/drivers/freedreno/ir3/ir3.h | 6 ++ .../drivers/freedreno/ir3/ir3_cmdline.c | 8 ++ .../drivers/freedreno/ir3/ir3_compiler_nir.c | 92 ++++++++++++++++--- .../drivers/freedreno/ir3/ir3_shader.c | 2 + .../drivers/freedreno/ir3/ir3_shader.h | 11 +++ 10 files changed, 195 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 8996de932b8..9467fc529bf 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -85,6 +85,9 @@ struct fd4_context { */ uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; + /* some state changes require a different shader variant. Keep * track of this so we know when we need to re-emit shader state * due to variant change. See fixup_shader_state() diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index e874d223187..68e1f53e2dd 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -93,12 +93,14 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->vsaturate_s != key->vsaturate_s) || (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r)) + (last_key->vsaturate_r != key->vsaturate_r) || + (last_key->vastc_srgb != key->vastc_srgb)) ctx->prog.dirty |= FD_SHADER_DIRTY_VP; if ((last_key->fsaturate_s != key->fsaturate_s) || (last_key->fsaturate_t != key->fsaturate_t) || - (last_key->fsaturate_r != key->fsaturate_r)) + (last_key->fsaturate_r != key->fsaturate_r) || + (last_key->fastc_srgb != key->fastc_srgb)) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; } @@ -132,13 +134,16 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), .ucp_enables = ctx->rasterizer->clip_plane_enable, - .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate), + .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate || + fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, .vsaturate_r = fd4_ctx->vsaturate_r, .fsaturate_s = fd4_ctx->fsaturate_s, .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, + .vastc_srgb = fd4_ctx->vastc_srgb, + .fastc_srgb = fd4_ctx->fastc_srgb, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index ba5d48909fa..27614f07de5 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -123,7 +123,8 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, static void emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum adreno_state_block sb, struct fd_texture_stateobj *tex) + enum adreno_state_block sb, struct fd_texture_stateobj *tex, + const struct ir3_shader_variant *v) { static const uint32_t bcolor_reg[] = { [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, @@ -174,12 +175,14 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (tex->num_textures > 0) { + unsigned num_textures = tex->num_textures + v->astc_srgb.count; + /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures)); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); + CP_LOAD_STATE_0_NUM_UNIT(num_textures)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_textures; i++) { @@ -202,6 +205,34 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } + + for (i = 0; i < v->astc_srgb.count; i++) { + static const struct fd4_pipe_sampler_view dummy_view = {}; + const struct fd4_pipe_sampler_view *view; + unsigned idx = v->astc_srgb.orig_idx[i]; + + view = tex->textures[idx] ? + fd4_pipe_sampler_view(tex->textures[idx]) : + &dummy_view; + + debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB); + + OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2); + OUT_RING(ring, view->texconst3); + if (view->base.texture) { + struct fd_resource *rsc = fd_resource(view->base.texture); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); + } else { + OUT_RING(ring, 0x00000000); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } else { + debug_assert(v->astc_srgb.count == 0); } OUT_PKT0(ring, bcolor_reg[sb], 1); @@ -681,14 +712,14 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_VERTTEX) { if (vp->has_samp) - emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); + emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp); else dirty &= ~FD_DIRTY_VERTTEX; } if (dirty & FD_DIRTY_FRAGTEX) { if (fp->has_samp) - emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); + emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp); else dirty &= ~FD_DIRTY_FRAGTEX; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index 38348580e21..6d9ecb7da24 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -209,6 +209,13 @@ tex_type(unsigned target) } } +static bool +use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) +{ + return (fd_screen(pctx->screen)->gpu_id == 420) && + (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC); +} + static struct pipe_sampler_view * fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) @@ -233,8 +240,11 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); - if (util_format_is_srgb(cso->format)) + if (util_format_is_srgb(cso->format)) { + if (use_astc_srgb_workaround(pctx, cso->format)) + so->astc_srgb = true; so->texconst0 |= A4XX_TEX_CONST_0_SRGB; + } if (cso->target == PIPE_BUFFER) { unsigned elements = cso->u.buf.last_element - @@ -296,11 +306,39 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, return &so->base; } +static void +fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd4_context *fd4_ctx = fd4_context(ctx); + uint16_t astc_srgb = 0; + unsigned i; + + for (i = 0; i < nr; i++) { + if (views[i]) { + struct fd4_pipe_sampler_view *view = + fd4_pipe_sampler_view(views[i]); + if (view->astc_srgb) + astc_srgb |= (1 << i); + } + } + + fd_set_sampler_views(pctx, shader, start, nr, views); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd4_ctx->fastc_srgb = astc_srgb; + } else if (shader == PIPE_SHADER_VERTEX) { + fd4_ctx->vastc_srgb = astc_srgb; + } +} + void fd4_texture_init(struct pipe_context *pctx) { pctx->create_sampler_state = fd4_sampler_state_create; pctx->bind_sampler_states = fd4_sampler_states_bind; pctx->create_sampler_view = fd4_sampler_view_create; - pctx->set_sampler_views = fd_set_sampler_views; + pctx->set_sampler_views = fd4_set_sampler_views; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h index 6ca34ade60d..21ceadd8c63 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -53,6 +53,7 @@ struct fd4_pipe_sampler_view { struct pipe_sampler_view base; uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; uint32_t offset; + bool astc_srgb; }; static inline struct fd4_pipe_sampler_view * diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f68275e568c..a40d3aa3b40 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -377,6 +377,12 @@ struct ir3 { unsigned keeps_count, keeps_sz; struct ir3_instruction **keeps; + /* Track texture sample instructions which need texture state + * patched in (for astc-srgb workaround): + */ + unsigned astc_srgb_count, astc_srgb_sz; + struct ir3_instruction **astc_srgb; + /* List of blocks: */ struct list_head block_list; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 027673afe1c..b8b9e4a0518 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -94,6 +94,7 @@ static void print_usage(void) printf(" --saturate-s MASK - bitmask of samplers to saturate S coord\n"); printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n"); printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n"); + printf(" --astc-srgb MASK - bitmask of samplers to enable astc-srgb workaround\n"); printf(" --stream-out - enable stream-out (aka transform feedback)\n"); printf(" --ucp MASK - bitmask of enabled user-clip-planes\n"); printf(" --gpu GPU_ID - specify gpu-id (default 320)\n"); @@ -174,6 +175,13 @@ int main(int argc, char **argv) continue; } + if (!strcmp(argv[n], "--astc-srgb")) { + debug_printf(" %s %s", argv[n], argv[n+1]); + key.vastc_srgb = key.fastc_srgb = strtol(argv[n+1], NULL, 0); + n += 2; + continue; + } + if (!strcmp(argv[n], "--stream-out")) { struct pipe_stream_output_info *so = &s.stream_output; debug_printf(" %s", argv[n]); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 940ca7744a2..abdb1c27c91 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -108,8 +108,10 @@ struct ir3_compile { */ bool array_index_add_half; - /* for looking up which system value is which */ - unsigned sysval_semantics[8]; + /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ + unsigned astc_srgb; + + unsigned max_texture_index; /* set if we encounter something we can't handle yet, so we * can bail cleanly and fallback to TGSI compiler f/e @@ -134,6 +136,12 @@ compile_init(struct ir3_compiler *compiler, ctx->levels_add_one = false; ctx->unminify_coords = false; ctx->array_index_add_half = true; + + if (so->type == SHADER_VERTEX) + ctx->astc_srgb = so->key.vastc_srgb; + else if (so->type == SHADER_FRAGMENT) + ctx->astc_srgb = so->key.fastc_srgb; + } else { /* no special handling for "flat" */ ctx->flat_bypass = false; @@ -620,14 +628,14 @@ create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp) */ static void split_dest(struct ir3_block *block, struct ir3_instruction **dst, - struct ir3_instruction *src, unsigned n) + struct ir3_instruction *src, unsigned base, unsigned n) { struct ir3_instruction *prev = NULL; for (int i = 0, j = 0; i < n; i++) { struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; - split->fo.off = i; + split->fo.off = i + base; if (prev) { split->cp.left = prev; @@ -637,7 +645,7 @@ split_dest(struct ir3_block *block, struct ir3_instruction **dst, } prev = split; - if (src->regs[0]->wrmask & (1 << i)) + if (src->regs[0]->wrmask & (1 << (i + base))) dst[j++] = split; } } @@ -1543,12 +1551,35 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) if (opc == OPC_GETLOD) type = TYPE_U32; - sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, - flags, tex->texture_index, tex->texture_index, - create_collect(b, src0, nsrc0), - create_collect(b, src1, nsrc1)); + unsigned tex_idx = tex->texture_index; + + ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx); + + struct ir3_instruction *col0 = create_collect(b, src0, nsrc0); + struct ir3_instruction *col1 = create_collect(b, src1, nsrc1); + + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, + tex_idx, tex_idx, col0, col1); + + if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) { + /* only need first 3 components: */ + sam->regs[0]->wrmask = 0x7; + split_dest(b, dst, sam, 0, 3); - split_dest(b, dst, sam, 4); + /* we need to sample the alpha separately with a non-ASTC + * texture state: + */ + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags, + tex_idx, tex_idx, col0, col1); + + array_insert(ctx->ir->astc_srgb, sam); + + /* fixup .w component: */ + split_dest(b, &dst[3], sam, 3, 1); + } else { + /* normal (non-workaround) case: */ + split_dest(b, dst, sam, 0, 4); + } /* GETLOD returns results in 4.8 fixed point */ if (opc == OPC_GETLOD) { @@ -1576,7 +1607,7 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) /* even though there is only one component, since it ends * up in .z rather than .x, we need a split_dest() */ - split_dest(b, dst, sam, 3); + split_dest(b, dst, sam, 0, 3); /* The # of levels comes from getinfo.z. We need to add 1 to it, since * the value in TEX_CONST_0 is zero-based. @@ -1610,7 +1641,7 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex->texture_index, tex->texture_index, lod, NULL); - split_dest(b, dst, sam, 4); + split_dest(b, dst, sam, 0, 4); /* Array size actually ends up in .w rather than .z. This doesn't * matter for miplevel 0, but for higher mips the value in z is @@ -2268,6 +2299,40 @@ fixup_frag_inputs(struct ir3_compile *ctx) ir->inputs = inputs; } +/* Fixup tex sampler state for astc/srgb workaround instructions. We + * need to assign the tex state indexes for these after we know the + * max tex index. + */ +static void +fixup_astc_srgb(struct ir3_compile *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + /* indexed by original tex idx, value is newly assigned alpha sampler + * state tex idx. Zero is invalid since there is at least one sampler + * if we get here. + */ + unsigned alt_tex_state[16] = {0}; + unsigned tex_idx = ctx->max_texture_index + 1; + unsigned idx = 0; + + so->astc_srgb.base = tex_idx; + + for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) { + struct ir3_instruction *sam = ctx->ir->astc_srgb[i]; + + compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state)); + + if (alt_tex_state[sam->cat5.tex] == 0) { + /* assign new alternate/alpha tex state slot: */ + alt_tex_state[sam->cat5.tex] = tex_idx++; + so->astc_srgb.orig_idx[idx++] = sam->cat5.tex; + so->astc_srgb.count++; + } + + sam->cat5.tex = alt_tex_state[sam->cat5.tex]; + } +} + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so) @@ -2433,6 +2498,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, so->inputs[i].compmask = compmask; } + if (ctx->astc_srgb) + fixup_astc_srgb(ctx); + /* We need to do legalize after (for frag shader's) the "bary.f" * offsets (inloc) have been assigned. */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index c05b52e7a5e..435a565e61e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -223,6 +223,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.vsaturate_s = 0; key.vsaturate_t = 0; key.vsaturate_r = 0; + key.vastc_srgb = 0; } break; case SHADER_VERTEX: @@ -233,6 +234,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.fsaturate_s = 0; key.fsaturate_t = 0; key.fsaturate_r = 0; + key.fastc_srgb = 0; } break; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c89dc29ff08..e81e80d328f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -104,6 +104,9 @@ struct ir3_shader_key { * shader: */ uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; }; static inline bool @@ -222,6 +225,14 @@ struct ir3_shader_variant { uint32_t val[4]; } immediates[64]; + /* for astc srgb workaround, the number/base of additional + * alpha tex states we need, and index of original tex states + */ + struct { + unsigned base, count; + unsigned orig_idx[16]; + } astc_srgb; + /* shader variants form a linked list: */ struct ir3_shader_variant *next; -- 2.30.2