printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n");
printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n");
printf(" --stream-out - enable stream-out (aka transform feedback)\n");
+ printf(" --ucp MASK - bitmask of enabled user-clip-planes\n");
printf(" --help - show this message\n");
}
continue;
}
+ if (!strcmp(argv[n], "--ucp")) {
+ debug_printf(" %s %s", argv[n], argv[n+1]);
+ key.ucp_enables = strtol(argv[n+1], NULL, 0);
+ n += 2;
+ continue;
+ }
+
if (!strcmp(argv[n], "--help")) {
print_usage();
return 0;
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(const struct tgsi_token *tokens,
+ struct ir3_shader_variant *so)
{
struct nir_shader_compiler_options options = {
.lower_fpow = true,
nir_opt_global_to_local(s);
nir_convert_to_ssa(s);
+ if (s->stage == MESA_SHADER_VERTEX) {
+ nir_lower_clip_vs(s, so->key.ucp_enables);
+ } else if (s->stage == MESA_SHADER_FRAGMENT) {
+ nir_lower_clip_fs(s, so->key.ucp_enables);
+ }
nir_lower_idiv(s);
nir_lower_load_const_to_scalar(s);
lowered_tokens = lower_tgsi(ctx, tokens, so);
if (!lowered_tokens)
lowered_tokens = tokens;
- ctx->s = to_nir(lowered_tokens);
+ ctx->s = to_nir(lowered_tokens, so);
if (lowered_tokens != tokens)
free((void *)lowered_tokens);
* num_uniform * vec4 - user consts
* 4 * vec4 - UBO addresses
* if (vertex shader) {
- * 1 * vec4 - driver params (IR3_DP_*)
+ * N * vec4 - driver params (IR3_DP_*)
* 1 * vec4 - stream-out addresses
* }
*
so->first_immediate += 4;
if (so->type == SHADER_VERTEX) {
- /* one (vec4) slot for driver params (see ir3_driver_param): */
- so->first_immediate++;
+ /* driver params (see ir3_driver_param): */
+ so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */
/* one (vec4) slot for stream-output base addresses: */
so->first_immediate++;
}
create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
- unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+ /* NOTE: dp is in scalar, but there can be >4 dp components: */
+ unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+ unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx, r);
}
struct ir3_block *b = ctx->block;
struct ir3_instruction *addr, *src0, *src1;
/* UBO addresses are the first driver params: */
- unsigned ubo = regid(ctx->so->first_driver_param, 0);
+ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
unsigned off = intr->const_index[0];
/* First src is ubo index, which could either be an immed or not: */
}
dst[0] = ctx->instance_id;
break;
+ case nir_intrinsic_load_user_clip_plane:
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+ }
+ break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
- base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+ base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
case VARYING_SLOT_BFC0:
case VARYING_SLOT_BFC1:
case VARYING_SLOT_FOGC:
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
break;
default:
if (slot >= VARYING_SLOT_VAR0)
emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
struct fd_constbuf_stateobj *constbuf)
{
- uint32_t offset = v->first_driver_param; /* UBOs after user consts */
+ uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
if (v->constlen > offset) {
struct fd_context *ctx = fd_context(v->shader->pctx);
uint32_t params = MIN2(4, v->constlen - offset) * 4;
static void
emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
{
- uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/
+ /* streamout addresses after driver-params: */
+ uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
if (v->constlen > offset) {
struct fd_context *ctx = fd_context(v->shader->pctx);
struct fd_streamout_stateobj *so = &ctx->streamout;
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info && (v->type == SHADER_VERTEX)) {
- uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
+ uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
if (v->constlen >= offset) {
- uint32_t vertex_params[4] = {
+ uint32_t vertex_params[IR3_DP_COUNT] = {
[IR3_DP_VTXID_BASE] = info->indexed ?
info->index_bias : info->start,
[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
};
+ /* if no user-clip-planes, we don't need to emit the
+ * entire thing:
+ */
+ uint32_t vertex_params_size = 4;
+
+ if (v->key.ucp_enables) {
+ struct pipe_clip_state *ucp = &ctx->ucp;
+ unsigned pos = IR3_DP_UCP0_X;
+ for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+ for (unsigned j = 0; j < 4; j++) {
+ vertex_params[pos] = fui(ucp->ucp[i][j]);
+ pos++;
+ }
+ }
+ vertex_params_size = ARRAY_SIZE(vertex_params);
+ }
fd_wfi(ctx, ring);
ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
- ARRAY_SIZE(vertex_params), vertex_params, NULL);
+ vertex_params_size, vertex_params, NULL);
/* if needed, emit stream-out buffer addresses: */
if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
enum ir3_driver_param {
IR3_DP_VTXID_BASE = 0,
IR3_DP_VTXCNT_MAX = 1,
+ /* user-clip-plane components, up to 8x vec4's: */
+ IR3_DP_UCP0_X = 4,
+ /* .... */
+ IR3_DP_UCP7_W = 35,
+ IR3_DP_COUNT = 36 /* must be aligned to vec4 */
};
+/* Layout of constant registers:
+ *
+ * num_uniform * vec4 - user consts
+ * 4 * vec4 - UBO addresses
+ * if (vertex shader) {
+ * N * vec4 - driver params (IR3_DP_*)
+ * 1 * vec4 - stream-out addresses
+ * }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
+#define IR3_UBOS_OFF 0 /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */
+#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
+
/* Configuration key used to identify a shader variant.. different
* shader variants can be used to implement features not supported
* in hw (two sided color), binning-pass vertex shader, etc.
struct ir3_shader_key {
union {
struct {
+ /*
+ * Combined Vertex/Fragment shader parameters:
+ */
+ unsigned ucp_enables : 8;
+
/* do we need to check {v,f}saturate_{s,t,r}? */
unsigned has_per_samp : 1;