From: Rob Clark Date: Fri, 26 Sep 2014 14:33:11 +0000 (-0400) Subject: freedreno/a3xx: add flat interpolation mode X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ed48f91275f52f26b513fc2970233063bfa023af;p=mesa.git freedreno/a3xx: add flat interpolation mode Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 73a28dda5e6..624155b0ade 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -525,7 +525,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & FD_DIRTY_PROG) { - fd3_program_emit(ring, prog, key); + bool flat = ctx->rasterizer && ctx->rasterizer->flatshade; + fd3_program_emit(ring, prog, key, flat); } /* TODO we should not need this or fd_wfi() before emit_constants(): diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index ec5f28e5664..c43121993c0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -183,7 +183,7 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); - fd3_program_emit(ring, &ctx->solid_prog, key); + fd3_program_emit(ring, &ctx->solid_prog, key, false); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->solid_vbuf, @@ -408,7 +408,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->solid_prog, key); + fd3_program_emit(ring, &ctx->solid_prog, key, false); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->solid_vbuf, @@ -552,7 +552,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->blit_prog, key); + fd3_program_emit(ring, &ctx->blit_prog, key, false); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->blit_texcoord_vbuf, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 1cf95a722a6..d7fe42ec4df 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -173,7 +173,9 @@ find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic) void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct ir3_shader_key key) + struct fd_program_stateobj *prog, + struct ir3_shader_key key, + boolean rasterflat) { const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; @@ -334,10 +336,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ } - OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); - OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ - OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ - OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); if (fp->writes_pos) { OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE | @@ -360,6 +358,30 @@ fd3_program_emit(struct fd_ringbuffer *ring, COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { + uint32_t vinterp[4] = {0}, flatshade[2] = {0}; + + /* figure out VARYING_INTERP / FLAT_SHAD register values: */ + for (j = -1; (j = next_varying(fp, j)) < (int)fp->inputs_count; ) { + uint32_t interp = fp->inputs[j].interpolate; + if ((interp == TGSI_INTERPOLATE_CONSTANT) || + ((interp == TGSI_INTERPOLATE_COLOR) && rasterflat)) { + /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG + * instead.. rather than -8 everywhere else.. + */ + uint32_t loc = fp->inputs[j].inloc - 8; + + /* currently assuming varyings aligned to 4 (not + * packed): + */ + debug_assert((loc % 4) == 0); + + for (i = 0; i < 4; i++, loc++) { + vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); + flatshade[loc / 32] |= 1 << (loc % 32); + } + } + } + OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | A3XX_VPC_ATTR_THRDASSIGN(1) | @@ -369,16 +391,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); - OUT_RING(ring, fp->shader->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ - OUT_RING(ring, fp->shader->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ - OUT_RING(ring, fp->shader->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ - OUT_RING(ring, fp->shader->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); OUT_RING(ring, fp->shader->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ OUT_RING(ring, fp->shader->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ OUT_RING(ring, fp->shader->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ OUT_RING(ring, fp->shader->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + + OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); + OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ + OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ } OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index cebaeecc5bc..0d50956cc14 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -38,7 +38,9 @@ struct fd3_shader_stateobj { }; void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct ir3_shader_key key); + struct fd_program_stateobj *prog, + struct ir3_shader_key key, + boolean rasterflat); void fd3_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index aaf157f3049..167ed02f85d 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -67,7 +67,7 @@ static const char *solid_vp = static const char *blit_fp = "FRAG \n" "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 \n" - "DCL IN[0], TEXCOORD \n" + "DCL IN[0], TEXCOORD[0], PERSPECTIVE \n" "DCL OUT[0], COLOR \n" "DCL SAMP[0] \n" " 0: TEX OUT[0], IN[0], SAMP[0], 2D \n" @@ -77,7 +77,7 @@ static const char *blit_vp = "VERT \n" "DCL IN[0] \n" "DCL IN[1] \n" - "DCL OUT[0], TEXCOORD \n" + "DCL OUT[0], TEXCOORD[0] \n" "DCL OUT[1], POSITION \n" " 0: MOV OUT[0], IN[0] \n" " 0: MOV OUT[1], IN[1] \n" diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index c6026bbe74a..c2d4942b66d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -2380,6 +2380,7 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) so->inputs[n].compmask = (1 << ncomp) - 1; so->inputs[n].regid = r; so->inputs[n].inloc = ctx->next_inloc; + so->inputs[n].interpolate = decl->Interp.Interpolate; for (j = 0; j < ncomp; j++) { struct ir3_instruction *instr = NULL; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 882893fdde5..ea861649176 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -122,9 +122,20 @@ struct ir3_shader_variant { uint8_t regid; uint8_t compmask; uint8_t ncomp; - /* in theory inloc of fs should match outloc of vs: */ + /* In theory inloc of fs should match outloc of vs. Or + * rather the outloc of the vs is 8 plus the offset passed + * to bary.f. Presumably that +8 is to account for + * gl_Position/gl_PointSize? + * + * NOTE inloc is currently aligned to 4 (we don't try + * to pack varyings). Changing this would likely break + * assumptions in few places (like setting up of flat + * shading in fd3_program) so be sure to check all the + * spots where inloc is used. + */ uint8_t inloc; uint8_t bary; + uint8_t interpolate; } inputs[16 + 2]; /* +POSITION +FACE */ unsigned total_in; /* sum of inputs (scalar) */ @@ -159,9 +170,9 @@ struct ir3_shader { struct ir3_shader_variant *variants; /* so far, only used for blit_prog shader.. values for - * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE + * VPC_VARYING_PS_REPL[i].MODE */ - uint32_t vinterp[4], vpsrepl[4]; + uint32_t vpsrepl[4]; };