X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa3xx%2Ffd3_program.c;h=64eeb106e535ad8f95c97ade27a2dad7e170e7cd;hb=5b2ef7853246b455f793417e5ae74e2a861afcae;hp=a0fa40d1c25be7c3e62f78cb4048071f5a4ea119;hpb=c4ae047cabd8f7ef8ff90add285804635d8e0c50;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index a0fa40d1c25..64eeb106e53 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -28,6 +28,7 @@ #include "pipe/p_state.h" #include "util/u_string.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" @@ -39,21 +40,13 @@ #include "fd3_texture.h" #include "fd3_format.h" -static void -delete_shader_stateobj(struct fd3_shader_stateobj *so) -{ - ir3_shader_destroy(so->shader); - free(so); -} - -static struct fd3_shader_stateobj * +static struct ir3_shader * create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, enum shader_t type) { - struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); - struct ir3_compiler *compiler = fd_context(pctx)->screen->compiler; - so->shader = ir3_shader_create(compiler, cso, type); - return so; + struct fd_context *ctx = fd_context(pctx); + struct ir3_compiler *compiler = ctx->screen->compiler; + return ir3_shader_create(compiler, cso, type, &ctx->debug); } static void * @@ -66,8 +59,8 @@ fd3_fp_state_create(struct pipe_context *pctx, static void fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso) { - struct fd3_shader_stateobj *so = hwcso; - delete_shader_stateobj(so); + struct ir3_shader *so = hwcso; + ir3_shader_destroy(so); } static void * @@ -80,10 +73,24 @@ fd3_vp_state_create(struct pipe_context *pctx, static void fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) { - struct fd3_shader_stateobj *so = hwcso; - delete_shader_stateobj(so); + struct ir3_shader *so = hwcso; + ir3_shader_destroy(so); } +bool +fd3_needs_manual_clipping(const struct ir3_shader *shader, + const struct pipe_rasterizer_state *rast) +{ + uint64_t outputs = ir3_shader_outputs(shader); + + return (!rast->depth_clip || + util_bitcount(rast->clip_plane_enable) > 6 || + outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | + (1ULL << VARYING_SLOT_CLIP_DIST0) | + (1ULL << VARYING_SLOT_CLIP_DIST1))); +} + + static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { @@ -135,19 +142,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; int constmode; - int i, j, k; + int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); vp = fd3_emit_get_vp(emit); - - if (emit->key.binning_pass) { - /* use dummy stateobj to simplify binning vs non-binning: */ - static const struct ir3_shader_variant binning_fp = {}; - fp = &binning_fp; - } else { - fp = fd3_emit_get_fp(emit); - } + fp = fd3_emit_get_fp(emit); vsi = &vp->info; fsi = &fp->info; @@ -266,45 +266,34 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); - for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, vp, fp); + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].slot); - reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid); - reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask); - } + reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].slot); - reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid); - reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask); - } + reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; OUT_RING(ring, reg); } - for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) { + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } @@ -393,12 +382,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, */ unsigned compmask = fp->inputs[j].compmask; - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t inloc = fp->inputs[j].inloc - 8; + uint32_t inloc = fp->inputs[j].inloc; - if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) || + if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) || (fp->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc;