From: Rob Clark Date: Thu, 10 Sep 2015 20:09:13 +0000 (-0400) Subject: freedreno/ir3: add support for ucp X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=91ec210ea8e35af8a7b30fa599b67b1faa55f34c;p=mesa.git freedreno/ir3: add support for ucp Use nir_lower_clip pass for adding the VS/FS instructions to handle user-clip-planes and CLIPDIST. Wire up support for load_user_clip_plane intrinsic to fetch ucp[plane] values as driver-params (passed as const's to the shader). Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index ede29f445dc..cbf748a00df 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -94,6 +94,7 @@ static void print_usage(void) printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n"); printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n"); printf(" --stream-out - enable stream-out (aka transform feedback)\n"); + printf(" --ucp MASK - bitmask of enabled user-clip-planes\n"); printf(" --help - show this message\n"); } @@ -190,6 +191,13 @@ int main(int argc, char **argv) continue; } + if (!strcmp(argv[n], "--ucp")) { + debug_printf(" %s %s", argv[n], argv[n+1]); + key.ucp_enables = strtol(argv[n+1], NULL, 0); + n += 2; + continue; + } + if (!strcmp(argv[n], "--help")) { print_usage(); return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 17bac4106e9..d72464fb5a5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -127,7 +127,8 @@ struct ir3_compile { static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock); -static struct nir_shader *to_nir(const struct tgsi_token *tokens) +static struct nir_shader *to_nir(const struct tgsi_token *tokens, + struct ir3_shader_variant *so) { struct nir_shader_compiler_options options = { .lower_fpow = true, @@ -149,6 +150,11 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens) nir_opt_global_to_local(s); nir_convert_to_ssa(s); + if (s->stage == MESA_SHADER_VERTEX) { + nir_lower_clip_vs(s, so->key.ucp_enables); + } else if (s->stage == MESA_SHADER_FRAGMENT) { + nir_lower_clip_fs(s, so->key.ucp_enables); + } nir_lower_idiv(s); nir_lower_load_const_to_scalar(s); @@ -251,7 +257,7 @@ compile_init(struct ir3_compiler *compiler, lowered_tokens = lower_tgsi(ctx, tokens, so); if (!lowered_tokens) lowered_tokens = tokens; - ctx->s = to_nir(lowered_tokens); + ctx->s = to_nir(lowered_tokens, so); if (lowered_tokens != tokens) free((void *)lowered_tokens); @@ -263,7 +269,7 @@ compile_init(struct ir3_compiler *compiler, * num_uniform * vec4 - user consts * 4 * vec4 - UBO addresses * if (vertex shader) { - * 1 * vec4 - driver params (IR3_DP_*) + * N * vec4 - driver params (IR3_DP_*) * 1 * vec4 - stream-out addresses * } * @@ -275,8 +281,8 @@ compile_init(struct ir3_compiler *compiler, so->first_immediate += 4; if (so->type == SHADER_VERTEX) { - /* one (vec4) slot for driver params (see ir3_driver_param): */ - so->first_immediate++; + /* driver params (see ir3_driver_param): */ + so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */ /* one (vec4) slot for stream-output base addresses: */ so->first_immediate++; } @@ -828,7 +834,9 @@ static struct ir3_instruction * create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ - unsigned r = regid(ctx->so->first_driver_param + 4, dp); + /* NOTE: dp is in scalar, but there can be >4 dp components: */ + unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF; + unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx, r); } @@ -1199,7 +1207,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, struct ir3_block *b = ctx->block; struct ir3_instruction *addr, *src0, *src1; /* UBO addresses are the first driver params: */ - unsigned ubo = regid(ctx->so->first_driver_param, 0); + unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); unsigned off = intr->const_index[0]; /* First src is ubo index, which could either be an immed or not: */ @@ -1459,6 +1467,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) } dst[0] = ctx->instance_id; break; + case nir_intrinsic_load_user_clip_plane: + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); + } + break; case nir_intrinsic_discard_if: case nir_intrinsic_discard: { struct ir3_instruction *cond, *kill; @@ -2066,7 +2080,7 @@ emit_stream_out(struct ir3_compile *ctx) unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; - base = create_uniform(ctx, regid(v->first_driver_param + 5, i)); + base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i)); /* 24-bit should be enough: */ off = ir3_MUL_U(ctx->block, vtxcnt, 0, @@ -2250,6 +2264,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) case VARYING_SLOT_BFC0: case VARYING_SLOT_BFC1: case VARYING_SLOT_FOGC: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: break; default: if (slot >= VARYING_SLOT_VAR0) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 7b250509135..7b565332256 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -501,7 +501,7 @@ static void emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - uint32_t offset = v->first_driver_param; /* UBOs after user consts */ + uint32_t offset = v->first_driver_param + IR3_UBOS_OFF; if (v->constlen > offset) { struct fd_context *ctx = fd_context(v->shader->pctx); uint32_t params = MIN2(4, v->constlen - offset) * 4; @@ -554,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring) static void emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { - uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/ + /* streamout addresses after driver-params: */ + uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF; if (v->constlen > offset) { struct fd_context *ctx = fd_context(v->shader->pctx); struct fd_streamout_stateobj *so = &ctx->streamout; @@ -657,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info && (v->type == SHADER_VERTEX)) { - uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */ + uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF; if (v->constlen >= offset) { - uint32_t vertex_params[4] = { + uint32_t vertex_params[IR3_DP_COUNT] = { [IR3_DP_VTXID_BASE] = info->indexed ? info->index_bias : info->start, [IR3_DP_VTXCNT_MAX] = max_tf_vtx(v), }; + /* if no user-clip-planes, we don't need to emit the + * entire thing: + */ + uint32_t vertex_params_size = 4; + + if (v->key.ucp_enables) { + struct pipe_clip_state *ucp = &ctx->ucp; + unsigned pos = IR3_DP_UCP0_X; + for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { + for (unsigned j = 0; j < 4; j++) { + vertex_params[pos] = fui(ucp->ucp[i][j]); + pos++; + } + } + vertex_params_size = ARRAY_SIZE(vertex_params); + } fd_wfi(ctx, ring); ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0, - ARRAY_SIZE(vertex_params), vertex_params, NULL); + vertex_params_size, vertex_params, NULL); /* if needed, emit stream-out buffer addresses: */ if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 39b8864329b..6dc0ce1133f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -39,8 +39,29 @@ enum ir3_driver_param { IR3_DP_VTXID_BASE = 0, IR3_DP_VTXCNT_MAX = 1, + /* user-clip-plane components, up to 8x vec4's: */ + IR3_DP_UCP0_X = 4, + /* .... */ + IR3_DP_UCP7_W = 35, + IR3_DP_COUNT = 36 /* must be aligned to vec4 */ }; +/* Layout of constant registers: + * + * num_uniform * vec4 - user consts + * 4 * vec4 - UBO addresses + * if (vertex shader) { + * N * vec4 - driver params (IR3_DP_*) + * 1 * vec4 - stream-out addresses + * } + * + * TODO this could be made more dynamic, to at least skip sections + * that we don't need.. + */ +#define IR3_UBOS_OFF 0 /* UBOs after user consts */ +#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */ +#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4) + /* Configuration key used to identify a shader variant.. different * shader variants can be used to implement features not supported * in hw (two sided color), binning-pass vertex shader, etc. @@ -48,6 +69,11 @@ enum ir3_driver_param { struct ir3_shader_key { union { struct { + /* + * Combined Vertex/Fragment shader parameters: + */ + unsigned ucp_enables : 8; + /* do we need to check {v,f}saturate_{s,t,r}? */ unsigned has_per_samp : 1;