From c4572b7dfe7a4ae9dc6e900f89786fa9cf7769df Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 11 Sep 2015 17:20:48 -0400 Subject: [PATCH] freedreno/ir3: convert from tgsi semantic/index to varying-slot Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 30 +++-- .../drivers/freedreno/a3xx/fd3_program.c | 49 ++++---- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 31 +++-- .../drivers/freedreno/a4xx/fd4_program.c | 56 ++++----- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 112 +++++++++--------- .../drivers/freedreno/ir3/ir3_shader.c | 89 +++++++++----- .../drivers/freedreno/ir3/ir3_shader.h | 60 ++++------ 7 files changed, 234 insertions(+), 193 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index dbddb293a9c..6153d92dc21 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -351,15 +351,27 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unsigned vtxcnt_regid = regid(63, 0); for (i = 0; i < vp->inputs_count; i++) { - uint8_t semantic = sem2name(vp->inputs[i].semantic); - if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE) - vertex_regid = vp->inputs[i].regid; - else if (semantic == TGSI_SEMANTIC_INSTANCEID) - instance_regid = vp->inputs[i].regid; - else if (semantic == IR3_SEMANTIC_VTXCNT) - vtxcnt_regid = vp->inputs[i].regid; - else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) + if (vp->inputs[i].sysval) { + switch(vp->inputs[i].slot) { + case SYSTEM_VALUE_BASE_VERTEX: + /* handled elsewhere */ + break; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + vertex_regid = vp->inputs[i].regid; + break; + case SYSTEM_VALUE_INSTANCE_ID: + instance_regid = vp->inputs[i].regid; + break; + case SYSTEM_VALUE_VERTEX_CNT: + vtxcnt_regid = vp->inputs[i].regid; + break; + default: + unreachable("invalid system value"); + break; + } + } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { last = i; + } } /* hw doesn't like to be configured for zero vbo's, it seems: */ @@ -370,7 +382,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) return; for (i = 0, j = 0; i <= last; i++) { - assert(sem2name(vp->inputs[i].semantic) == 0); + assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index ef8a849617f..4ed04b38dea 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -194,24 +194,17 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; - pos_regid = ir3_find_output_regid(vp, - ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - posz_regid = ir3_find_output_regid(fp, - ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - psize_regid = ir3_find_output_regid(vp, - ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); + pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS); + posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); + psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ); if (fp->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = - ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + ir3_find_output_regid(fp, FRAG_RESULT_COLOR); } else { - for (i = 0; i < fp->outputs_count; i++) { - ir3_semantic sem = fp->outputs[i].semantic; - unsigned idx = sem2idx(sem); - if (sem2name(sem) != TGSI_SEMANTIC_COLOR) - continue; - debug_assert(idx < ARRAY_SIZE(color_regid)); - color_regid[idx] = fp->outputs[i].regid; - } + color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0); + color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1); + color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2); + color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); } /* adjust regids for alpha output formats. there is no alpha render @@ -280,14 +273,14 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, j = ir3_next_varying(fp, j); if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].semantic); + k = ir3_find_output(vp, fp->inputs[j].slot); reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid); reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask); } j = ir3_next_varying(fp, j); if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].semantic); + k = ir3_find_output(vp, fp->inputs[j].slot); reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid); reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask); } @@ -414,14 +407,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, } } - /* Replace the .xy coordinates with S/T from the point sprite. Set - * interpolation bits for .zw such that they become .01 - */ - if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) { - vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) - << ((inloc % 16) * 2); - vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); - vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + gl_varying_slot slot = fp->inputs[j].slot; + + /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ + if (slot >= VARYING_SLOT_VAR0) { + unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & texmask) { + vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) + << ((inloc % 16) * 2); + vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); + vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + } } } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 5f36cef3e9a..c7ed1d2e379 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -333,17 +333,30 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unsigned vtxcnt_regid = regid(63, 0); for (i = 0; i < vp->inputs_count; i++) { - uint8_t semantic = sem2name(vp->inputs[i].semantic); - if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE) - vertex_regid = vp->inputs[i].regid; - else if (semantic == TGSI_SEMANTIC_INSTANCEID) - instance_regid = vp->inputs[i].regid; - else if (semantic == IR3_SEMANTIC_VTXCNT) - vtxcnt_regid = vp->inputs[i].regid; - else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask) + if (vp->inputs[i].sysval) { + switch(vp->inputs[i].slot) { + case SYSTEM_VALUE_BASE_VERTEX: + /* handled elsewhere */ + break; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + vertex_regid = vp->inputs[i].regid; + break; + case SYSTEM_VALUE_INSTANCE_ID: + instance_regid = vp->inputs[i].regid; + break; + case SYSTEM_VALUE_VERTEX_CNT: + vtxcnt_regid = vp->inputs[i].regid; + break; + default: + unreachable("invalid system value"); + break; + } + } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { last = i; + } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ if ((vtx->vtx->num_elements == 0) && (vertex_regid == regid(63, 0)) && @@ -352,7 +365,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) return; for (i = 0, j = 0; i <= last; i++) { - assert(sem2name(vp->inputs[i].semantic) == 0); + assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 619eb860111..e3d5dabab4c 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -227,27 +227,22 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, /* blob seems to always use constmode currently: */ constmode = 1; - pos_regid = ir3_find_output_regid(s[VS].v, - ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - posz_regid = ir3_find_output_regid(s[FS].v, - ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - psize_regid = ir3_find_output_regid(s[VS].v, - ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); + pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); + posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); + psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = - ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { - const struct ir3_shader_variant *fp = s[FS].v; - memset(color_regid, 0, sizeof(color_regid)); - for (i = 0; i < fp->outputs_count; i++) { - ir3_semantic sem = fp->outputs[i].semantic; - unsigned idx = sem2idx(sem); - if (sem2name(sem) != TGSI_SEMANTIC_COLOR) - continue; - debug_assert(idx < ARRAY_SIZE(color_regid)); - color_regid[idx] = fp->outputs[i].regid; - } + color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); + color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); + color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); + color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); + color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); + color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); + color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); + color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* adjust regids for alpha output formats. there is no alpha render @@ -257,7 +252,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, if (util_format_is_alpha(pipe_surface_format(bufs[i]))) color_regid[i] += 3; - /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); @@ -348,14 +342,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { - k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic); + k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask); } j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { - k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic); + k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask); } @@ -513,14 +507,20 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, } } - /* Replace the .xy coordinates with S/T from the point sprite. Set - * interpolation bits for .zw such that they become .01 - */ - if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) { - vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) - << ((inloc % 16) * 2); - vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); - vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + gl_varying_slot slot = s[FS].v->inputs[j].slot; + + /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ + if (slot >= VARYING_SLOT_VAR0) { + unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & texmask) { + vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) + << ((inloc % 16) * 2); + vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); + vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index e4dbe64f753..17bac4106e9 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1349,14 +1349,15 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) } } -static void add_sysval_input(struct ir3_compile *ctx, unsigned name, +static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot, struct ir3_instruction *instr) { struct ir3_shader_variant *so = ctx->so; unsigned r = regid(so->inputs_count, 0); unsigned n = so->inputs_count++; - so->inputs[n].semantic = ir3_semantic_name(name, 0); + so->inputs[n].sysval = true; + so->inputs[n].slot = slot; so->inputs[n].compmask = 1; so->inputs[n].regid = r; so->inputs[n].interpolate = INTERP_QUALIFIER_FLAT; @@ -1437,7 +1438,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_base_vertex: if (!ctx->basevertex) { ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE); - add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX, + add_sysval_input(ctx, SYSTEM_VALUE_BASE_VERTEX, ctx->basevertex); } dst[0] = ctx->basevertex; @@ -1445,7 +1446,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_vertex_id_zero_base: if (!ctx->vertex_id) { ctx->vertex_id = create_input(ctx->block, 0); - add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE, + add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, ctx->vertex_id); } dst[0] = ctx->vertex_id; @@ -1453,7 +1454,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_instance_id: if (!ctx->instance_id) { ctx->instance_id = create_input(ctx->block, 0); - add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID, + add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID, ctx->instance_id); } dst[0] = ctx->instance_id; @@ -2021,7 +2022,7 @@ emit_stream_out(struct ir3_compile *ctx) * of the shader: */ vtxcnt = create_input(ctx->in_block, 0); - add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt); + add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt); maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX); @@ -2139,6 +2140,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) DBG("; in: slot=%u, len=%ux%u, drvloc=%u", slot, array_len, ncomp, n); + so->inputs[n].slot = slot; so->inputs[n].compmask = (1 << ncomp) - 1; so->inputs[n].inloc = ctx->next_inloc; so->inputs[n].interpolate = INTERP_QUALIFIER_NONE; @@ -2146,23 +2148,15 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) so->inputs[n].interpolate = in->data.interpolation; if (ctx->so->type == SHADER_FRAGMENT) { - unsigned semantic_name, semantic_index; - - varying_slot_to_tgsi_semantic(slot, - &semantic_name, &semantic_index); - - so->inputs[n].semantic = - ir3_semantic_name(semantic_name, semantic_index); - for (int i = 0; i < ncomp; i++) { struct ir3_instruction *instr = NULL; unsigned idx = (n * 4) + i; - if (semantic_name == TGSI_SEMANTIC_POSITION) { + if (slot == VARYING_SLOT_POS) { so->inputs[n].bary = false; so->frag_coord = true; instr = create_frag_coord(ctx, i); - } else if (semantic_name == TGSI_SEMANTIC_FACE) { + } else if (slot == VARYING_SLOT_FACE) { so->inputs[n].bary = false; so->frag_face = true; instr = create_frag_face(ctx, i); @@ -2173,10 +2167,18 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) * we need to do flat vs smooth shading depending on * rast state: */ - if ((in->data.interpolation == INTERP_QUALIFIER_NONE) && - ((semantic_name == TGSI_SEMANTIC_COLOR) || - (semantic_name == TGSI_SEMANTIC_BCOLOR))) - so->inputs[n].rasterflat = true; + if (in->data.interpolation == INTERP_QUALIFIER_NONE) { + switch (slot) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + so->inputs[n].rasterflat = true; + break; + default: + break; + } + } if (ctx->flat_bypass) { if ((so->inputs[n].interpolate == INTERP_QUALIFIER_FLAT) || @@ -2193,7 +2195,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) ctx->ir->inputs[idx] = instr; } } else if (ctx->so->type == SHADER_VERTEX) { - so->inputs[n].semantic = 0; for (int i = 0; i < ncomp; i++) { unsigned idx = (n * 4) + i; ctx->ir->inputs[idx] = create_input(ctx->block, idx); @@ -2214,7 +2215,6 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) struct ir3_shader_variant *so = ctx->so; unsigned array_len = MAX2(glsl_get_length(out->type), 1); unsigned ncomp = glsl_get_components(out->type); - unsigned semantic_name, semantic_index; unsigned n = out->data.driver_location; unsigned slot = out->data.location; unsigned comp = 0; @@ -2222,45 +2222,42 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) DBG("; out: slot=%u, len=%ux%u, drvloc=%u", slot, array_len, ncomp, n); - if (ctx->so->type == SHADER_VERTEX) { - varying_slot_to_tgsi_semantic(slot, - &semantic_name, &semantic_index); - - switch (semantic_name) { - case TGSI_SEMANTIC_POSITION: + if (ctx->so->type == SHADER_FRAGMENT) { + switch (slot) { + case FRAG_RESULT_DEPTH: + comp = 2; /* tgsi will write to .z component */ so->writes_pos = true; break; - case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_TEXCOORD: + case FRAG_RESULT_COLOR: + so->color0_mrt = 1; break; default: - compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[semantic_name]); + if (slot >= FRAG_RESULT_DATA0) + break; + compile_error(ctx, "unknown FS output name: %s\n", + gl_frag_result_name(slot)); } - } else if (ctx->so->type == SHADER_FRAGMENT) { - frag_result_to_tgsi_semantic(slot, - &semantic_name, &semantic_index); - - switch (semantic_name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ + } else if (ctx->so->type == SHADER_VERTEX) { + switch (slot) { + case VARYING_SLOT_POS: so->writes_pos = true; break; - case TGSI_SEMANTIC_COLOR: - if (semantic_index == -1) { - semantic_index = 0; - so->color0_mrt = 1; - } + case VARYING_SLOT_PSIZ: + so->writes_psize = true; + break; + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + case VARYING_SLOT_FOGC: break; default: - compile_error(ctx, "unknown FS semantic name: %s\n", - tgsi_semantic_names[semantic_name]); + if (slot >= VARYING_SLOT_VAR0) + break; + if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7)) + break; + compile_error(ctx, "unknown VS output name: %s\n", + gl_varying_slot_name(slot)); } } else { compile_error(ctx, "unknown shader type: %d\n", ctx->so->type); @@ -2268,8 +2265,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - so->outputs[n].semantic = - ir3_semantic_name(semantic_name, semantic_index); + so->outputs[n].slot = slot; so->outputs[n].regid = regid(n, comp); so->outputs_count = MAX2(so->outputs_count, n + 1); @@ -2462,12 +2458,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, /* at this point, for binning pass, throw away unneeded outputs: */ if (so->key.binning_pass) { for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned name = sem2name(so->outputs[i].semantic); - unsigned idx = sem2idx(so->outputs[i].semantic); + unsigned slot = so->outputs[i].slot; /* throw away everything but first position/psize */ - if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || - (name == TGSI_SEMANTIC_PSIZE))) { + if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) { if (i != j) { so->outputs[j] = so->outputs[i]; ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; @@ -2566,7 +2560,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * but what we give the hw is the scalar register: */ if ((so->type == SHADER_FRAGMENT) && - (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION)) + (so->outputs[i].slot == FRAG_RESULT_DEPTH)) so->outputs[i].regid += 2; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 312174c0c6d..7b250509135 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -300,11 +300,11 @@ static void dump_reg(const char *name, uint32_t r) debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]); } -static void dump_semantic(struct ir3_shader_variant *so, - unsigned sem, const char *name) +static void dump_output(struct ir3_shader_variant *so, + unsigned slot, const char *name) { uint32_t regid; - regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0)); + regid = ir3_find_output_regid(so, slot); dump_reg(name, regid); } @@ -355,27 +355,51 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) disasm_a3xx(bin, so->info.sizedwords, 0, so->type); - debug_printf("; %s: outputs:", type); - for (i = 0; i < so->outputs_count; i++) { - uint8_t regid = so->outputs[i].regid; - ir3_semantic sem = so->outputs[i].semantic; - debug_printf(" r%d.%c (%u:%u)", - (regid >> 2), "xyzw"[regid & 0x3], - sem2name(sem), sem2idx(sem)); - } - debug_printf("\n"); - debug_printf("; %s: inputs:", type); - for (i = 0; i < so->inputs_count; i++) { - uint8_t regid = so->inputs[i].regid; - ir3_semantic sem = so->inputs[i].semantic; - debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)", - (regid >> 2), "xyzw"[regid & 0x3], - sem2name(sem), sem2idx(sem), - so->inputs[i].compmask, - so->inputs[i].inloc, - so->inputs[i].bary); + switch (so->type) { + case SHADER_VERTEX: + debug_printf("; %s: outputs:", type); + for (i = 0; i < so->outputs_count; i++) { + uint8_t regid = so->outputs[i].regid; + debug_printf(" r%d.%c (%s)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_varying_slot_name(so->outputs[i].slot)); + } + debug_printf("\n"); + debug_printf("; %s: inputs:", type); + for (i = 0; i < so->inputs_count; i++) { + uint8_t regid = so->inputs[i].regid; + debug_printf(" r%d.%c (cm=%x,il=%u,b=%u)", + (regid >> 2), "xyzw"[regid & 0x3], + so->inputs[i].compmask, + so->inputs[i].inloc, + so->inputs[i].bary); + } + debug_printf("\n"); + break; + case SHADER_FRAGMENT: + debug_printf("; %s: outputs:", type); + for (i = 0; i < so->outputs_count; i++) { + uint8_t regid = so->outputs[i].regid; + debug_printf(" r%d.%c (%s)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_frag_result_name(so->outputs[i].slot)); + } + debug_printf("\n"); + debug_printf("; %s: inputs:", type); + for (i = 0; i < so->inputs_count; i++) { + uint8_t regid = so->inputs[i].regid; + debug_printf(" r%d.%c (%s,cm=%x,il=%u,b=%u)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_varying_slot_name(so->inputs[i].slot), + so->inputs[i].compmask, + so->inputs[i].inloc, + so->inputs[i].bary); + } + debug_printf("\n"); + break; + case SHADER_COMPUTE: + break; } - debug_printf("\n"); /* print generic shader info: */ debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n", @@ -391,13 +415,24 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) /* print shader type specific info: */ switch (so->type) { case SHADER_VERTEX: - dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos"); - dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize"); + dump_output(so, VARYING_SLOT_POS, "pos"); + dump_output(so, VARYING_SLOT_PSIZ, "psize"); break; case SHADER_FRAGMENT: dump_reg("pos (bary)", so->pos_regid); - dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz"); - dump_semantic(so, TGSI_SEMANTIC_COLOR, "color"); + dump_output(so, FRAG_RESULT_DEPTH, "posz"); + if (so->color0_mrt) { + dump_output(so, FRAG_RESULT_COLOR, "color"); + } else { + dump_output(so, FRAG_RESULT_DATA0, "data0"); + dump_output(so, FRAG_RESULT_DATA1, "data1"); + dump_output(so, FRAG_RESULT_DATA2, "data2"); + dump_output(so, FRAG_RESULT_DATA3, "data3"); + dump_output(so, FRAG_RESULT_DATA4, "data4"); + dump_output(so, FRAG_RESULT_DATA5, "data5"); + dump_output(so, FRAG_RESULT_DATA6, "data6"); + dump_output(so, FRAG_RESULT_DATA7, "data7"); + } /* these two are hard-coded since we don't know how to * program them to anything but all 0's... */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 13b3f6a2a85..39b8864329b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -41,28 +41,6 @@ enum ir3_driver_param { IR3_DP_VTXCNT_MAX = 1, }; -/* internal semantic used for passing vtxcnt to vertex shader to - * implement transform feedback: - */ -#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0) - -typedef uint16_t ir3_semantic; /* semantic name + index */ -static inline ir3_semantic -ir3_semantic_name(uint8_t name, uint16_t index) -{ - return (name << 8) | (index & 0xff); -} - -static inline uint8_t sem2name(ir3_semantic sem) -{ - return sem >> 8; -} - -static inline uint16_t sem2idx(ir3_semantic sem) -{ - return sem & 0xff; -} - /* Configuration key used to identify a shader variant.. different * shader variants can be used to implement features not supported * in hw (two sided color), binning-pass vertex shader, etc. @@ -148,10 +126,16 @@ struct ir3_shader_variant { uint8_t pos_regid; bool frag_coord, frag_face, color0_mrt; + /* NOTE: for input/outputs, slot is: + * gl_vert_attrib - for VS inputs + * gl_varying_slot - for VS output / FS input + * gl_frag_result - for FS output + */ + /* varyings/outputs: */ unsigned outputs_count; struct { - ir3_semantic semantic; + uint8_t slot; uint8_t regid; } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_psize; @@ -159,7 +143,7 @@ struct ir3_shader_variant { /* vertices/inputs: */ unsigned inputs_count; struct { - ir3_semantic semantic; + uint8_t slot; uint8_t regid; uint8_t compmask; uint8_t ncomp; @@ -175,7 +159,9 @@ struct ir3_shader_variant { * spots where inloc is used. */ uint8_t inloc; - /* fragment shader specfic: */ + /* vertex shader specific: */ + bool sysval : 1; /* slot is a gl_system_value */ + /* fragment shader specific: */ bool bary : 1; /* fetched varying (vs one loaded into reg) */ bool rasterflat : 1; /* special handling for emit->rasterflat */ enum glsl_interp_qualifier interpolate; @@ -257,12 +243,12 @@ ir3_shader_stage(struct ir3_shader *shader) #include "pipe/p_shader_tokens.h" static inline int -ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic) +ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) { int j; for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].semantic == semantic) + if (so->outputs[j].slot == slot) return j; /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] @@ -272,18 +258,20 @@ ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic) * OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only * a OUT.BCOLOR[n] but no matching OUT.COLOR[n] */ - if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) { - unsigned idx = sem2idx(semantic); - semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx); - } else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) { - unsigned idx = sem2idx(semantic); - semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx); + if (slot == VARYING_SLOT_BFC0) { + slot = VARYING_SLOT_COL0; + } else if (slot == VARYING_SLOT_BFC1) { + slot = VARYING_SLOT_COL1; + } else if (slot == VARYING_SLOT_COL0) { + slot = VARYING_SLOT_BFC0; + } else if (slot == VARYING_SLOT_COL1) { + slot = VARYING_SLOT_BFC1; } else { return 0; } for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].semantic == semantic) + if (so->outputs[j].slot == slot) return j; debug_assert(0); @@ -301,11 +289,11 @@ ir3_next_varying(const struct ir3_shader_variant *so, int i) } static inline uint32_t -ir3_find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic) +ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) { int j; for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].semantic == semantic) + if (so->outputs[j].slot == slot) return so->outputs[j].regid; return regid(63, 0); } -- 2.30.2