From: Hyunjun Ko Date: Thu, 7 May 2020 06:06:59 +0000 (+0000) Subject: freedreno,tu: Don't request fragcoord components not being read. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=094c7646a3ae4980f76605a922572fe2ed78f6f1;p=mesa.git freedreno,tu: Don't request fragcoord components not being read. v1. Replace the existed bool type with new bitfield and edit register files to take a mask instead of duplicating codes to do masking. v2. Use fragcoord_compmask != 0 instead of fragcoord_compmask > 0 since it represents a bitfield. Tested with dEQP-VK.glsl.builtin_var.simple.fragcoord_xyz/w dEQP-GLES2.functional.shaders.builtin_variable.fragcoord_xyz/w Closes: #2680 Signed-off-by: Hyunjun Ko Part-of: --- diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b3d0556a03e..d40978f574b 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1411,7 +1411,7 @@ get_barycentric_pixel(struct ir3_context *ctx) } static struct ir3_instruction * -get_frag_coord(struct ir3_context *ctx) +get_frag_coord(struct ir3_context *ctx, nir_intrinsic_instr *intr) { if (!ctx->frag_coord) { struct ir3_block *b = ctx->in_block; @@ -1436,9 +1436,11 @@ get_frag_coord(struct ir3_context *ctx) } ctx->frag_coord = ir3_create_collect(ctx, xyzw, 4); - ctx->so->frag_coord = true; } + ctx->so->fragcoord_compmask |= + nir_ssa_def_components_read(&intr->dest.ssa); + return ctx->frag_coord; } @@ -1599,7 +1601,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) emit_intrinsic_load_ubo_ldc(ctx, intr, dst); break; case nir_intrinsic_load_frag_coord: - ir3_split_dest(b, dst, get_frag_coord(ctx), 0, 4); + ir3_split_dest(b, dst, get_frag_coord(ctx, intr), 0, 4); break; case nir_intrinsic_load_sample_pos_from_id: { /* NOTE: blob seems to always use TYPE_F16 and then cov.f16f32, diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index ecb39481927..e401498612f 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -456,7 +456,8 @@ struct ir3_shader_variant { * + From the vert shader, we only need the output regid */ - bool frag_coord, frag_face, color0_mrt; + bool frag_face, color0_mrt; + uint8_t fragcoord_compmask; /* NOTE: for input/outputs, slot is: * gl_vert_attrib - for VS inputs diff --git a/src/freedreno/registers/a3xx.xml b/src/freedreno/registers/a3xx.xml index 93b14e139ff..3605e3ae409 100644 --- a/src/freedreno/registers/a3xx.xml +++ b/src/freedreno/registers/a3xx.xml @@ -870,10 +870,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> controlling blend or readback from GMEM?? --> - - - - + diff --git a/src/freedreno/registers/a4xx.xml b/src/freedreno/registers/a4xx.xml index 0fa914847f3..1c2c48c5e50 100644 --- a/src/freedreno/registers/a4xx.xml +++ b/src/freedreno/registers/a4xx.xml @@ -911,11 +911,7 @@ perhaps they should be taken with a grain of salt - - - - - + diff --git a/src/freedreno/registers/a5xx.xml b/src/freedreno/registers/a5xx.xml index 42726fcebac..945e09e5b36 100644 --- a/src/freedreno/registers/a5xx.xml +++ b/src/freedreno/registers/a5xx.xml @@ -1828,10 +1828,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> Also, when that happens, VARYING bits are turned on as well. --> - - - - + @@ -1982,10 +1979,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set Also, when that happens, VARYING bits are turned on as well. --> - - - - + diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index fdb091c5b56..a831ccc845d 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -1855,10 +1855,7 @@ to upconvert to 32b float internally? - - - - + @@ -2083,10 +2080,7 @@ to upconvert to 32b float internally? - - - - + diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 33629457289..31ec5ee18a5 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1271,12 +1271,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) CONDREG(ij_samp_regid, A6XX_GRAS_CNTL_PERSAMP_VARYING) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_GRAS_CNTL_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_GRAS_CNTL_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_GRAS_CNTL_SIZE | - A6XX_GRAS_CNTL_XCOORD | - A6XX_GRAS_CNTL_YCOORD | - A6XX_GRAS_CNTL_ZCOORD | - A6XX_GRAS_CNTL_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_SIZE | + A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_GRAS_CNTL_SIZE)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2); @@ -1287,12 +1283,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_RB_RENDER_CONTROL0_SIZE | - A6XX_RB_RENDER_CONTROL0_XCOORD | - A6XX_RB_RENDER_CONTROL0_YCOORD | - A6XX_RB_RENDER_CONTROL0_ZCOORD | - A6XX_RB_RENDER_CONTROL0_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_RB_RENDER_CONTROL0_SIZE | + A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_RB_RENDER_CONTROL0_SIZE)); tu_cs_emit(cs, CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 6c533690a0c..a1d5e6eadc4 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -541,10 +541,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd3_blend_stateobj(ctx->blend)->rb_render_control; val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS); - val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD | - A3XX_RB_RENDER_CONTROL_YCOORD | - A3XX_RB_RENDER_CONTROL_ZCOORD | - A3XX_RB_RENDER_CONTROL_WCOORD); + val |= COND(fp->fragcoord_compmask != 0, + A3XX_RB_RENDER_CONTROL_COORD_MASK(fp->fragcoord_compmask)); /* I suppose if we needed to (which I don't *think* we need * to), we could emit this for binning pass too. But we @@ -610,7 +608,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ->gras_cl_clip_cntl; uint8_t planes = ctx->rasterizer->clip_plane_enable; val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); - val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | + val |= COND(fp->fragcoord_compmask != 0, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); if (!emit->key.ucp_enables) val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 1654397d4dc..4e1003dd952 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -592,7 +592,8 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, zsa->rb_depth_control | COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) | COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) | - COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); + COND(fragz && fp->fragcoord_compmask != 0, + A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); /* maybe this register/bitfield needs a better name.. this * appears to be just disabling early-z @@ -600,7 +601,8 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->gras_alpha_control | COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) | - COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS)); + COND(fragz && fp->fragcoord_compmask != 0, + A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS)); } if (dirty & FD_DIRTY_RASTERIZER) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index c6e12a08e70..4c48c617386 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -361,7 +361,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, 0x80000000 | /* XXX */ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | - COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); + COND(s[FS].v->fragcoord_compmask != 0, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | @@ -385,10 +385,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | - COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | - A4XX_RB_RENDER_CONTROL2_YCOORD | - A4XX_RB_RENDER_CONTROL2_ZCOORD | - A4XX_RB_RENDER_CONTROL2_WCOORD)); + COND(s[FS].v->fragcoord_compmask != 0, + A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask))); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(nr) | diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 140c7c5f510..1f23d8ae225 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -613,11 +613,13 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1); OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | - COND(fragz && fp->frag_coord, A5XX_RB_DEPTH_PLANE_CNTL_UNK1)); + COND(fragz && fp->fragcoord_compmask != 0, + A5XX_RB_DEPTH_PLANE_CNTL_UNK1)); OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | - COND(fragz && fp->frag_coord, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1)); + COND(fragz && fp->fragcoord_compmask != 0, + A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1)); } /* NOTE: scissor enabled bit is part of rasterizer state: */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 72f9f99335f..21d94dc72e2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -487,7 +487,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) | COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | - COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) | + COND(s[FS].v->fragcoord_compmask != 0, A5XX_VPC_CNTL_0_VARYING) | 0x10000); // XXX fd5_context(ctx)->max_loc = l.max_loc; @@ -518,7 +518,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | - COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) | + COND(s[FS].v->fragcoord_compmask != 0, A5XX_SP_FS_CTRL_REG0_VARYING) | 0x40006 | /* XXX set pretty much everywhere */ A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | @@ -537,19 +537,15 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) | - COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD | - A5XX_GRAS_CNTL_YCOORD | - A5XX_GRAS_CNTL_ZCOORD | - A5XX_GRAS_CNTL_WCOORD | + COND(s[FS].v->fragcoord_compmask != 0, + A5XX_GRAS_CNTL_COORD_MASK(s[FS].v->fragcoord_compmask) | A5XX_GRAS_CNTL_UNK3) | COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3)); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | - COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | - A5XX_RB_RENDER_CONTROL0_YCOORD | - A5XX_RB_RENDER_CONTROL0_ZCOORD | - A5XX_RB_RENDER_CONTROL0_WCOORD | + COND(s[FS].v->fragcoord_compmask != 0, + A5XX_RB_RENDER_CONTROL0_COORD_MASK(s[FS].v->fragcoord_compmask) | A5XX_RB_RENDER_CONTROL0_UNK3) | COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3)); OUT_RING(ring, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 92da43e5c3c..651d9da781d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -643,12 +643,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, CONDREG(ij_samp_regid, A6XX_GRAS_CNTL_PERSAMP_VARYING) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_GRAS_CNTL_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_GRAS_CNTL_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_GRAS_CNTL_SIZE | - A6XX_GRAS_CNTL_XCOORD | - A6XX_GRAS_CNTL_YCOORD | - A6XX_GRAS_CNTL_ZCOORD | - A6XX_GRAS_CNTL_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_SIZE | + A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_GRAS_CNTL_SIZE)); OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2); @@ -659,12 +655,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_RB_RENDER_CONTROL0_SIZE | - A6XX_RB_RENDER_CONTROL0_XCOORD | - A6XX_RB_RENDER_CONTROL0_YCOORD | - A6XX_RB_RENDER_CONTROL0_ZCOORD | - A6XX_RB_RENDER_CONTROL0_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_RB_RENDER_CONTROL0_SIZE | + A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_RB_RENDER_CONTROL0_SIZE)); OUT_RING(ring,