freedreno/ir3: add support for ucp
authorRob Clark <robclark@freedesktop.org>
Thu, 10 Sep 2015 20:09:13 +0000 (16:09 -0400)
committerRob Clark <robclark@freedesktop.org>
Thu, 17 Sep 2015 23:57:52 +0000 (19:57 -0400)
Use nir_lower_clip pass for adding the VS/FS instructions to handle
user-clip-planes and CLIPDIST.  Wire up support for load_user_clip_plane
intrinsic to fetch ucp[plane] values as driver-params (passed as const's
to the shader).

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index ede29f445dc96dfd7804423ab765ed1c0b6913db..cbf748a00dfc21317c94ac419fcea00515ac343d 100644 (file)
@@ -94,6 +94,7 @@ static void print_usage(void)
        printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
        printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
        printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
+       printf("    --ucp MASK        - bitmask of enabled user-clip-planes\n");
        printf("    --help            - show this message\n");
 }
 
@@ -190,6 +191,13 @@ int main(int argc, char **argv)
                        continue;
                }
 
+               if (!strcmp(argv[n], "--ucp")) {
+                       debug_printf(" %s %s", argv[n], argv[n+1]);
+                       key.ucp_enables = strtol(argv[n+1], NULL, 0);
+                       n += 2;
+                       continue;
+               }
+
                if (!strcmp(argv[n], "--help")) {
                        print_usage();
                        return 0;
index 17bac4106e9e477b940bda0f4a3bb296f86cfcc9..d72464fb5a59ac16c23cc094a5eb776f2c5ea19f 100644 (file)
@@ -127,7 +127,8 @@ struct ir3_compile {
 static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
 static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
 
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(const struct tgsi_token *tokens,
+               struct ir3_shader_variant *so)
 {
        struct nir_shader_compiler_options options = {
                        .lower_fpow = true,
@@ -149,6 +150,11 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
 
        nir_opt_global_to_local(s);
        nir_convert_to_ssa(s);
+       if (s->stage == MESA_SHADER_VERTEX) {
+               nir_lower_clip_vs(s, so->key.ucp_enables);
+       } else if (s->stage == MESA_SHADER_FRAGMENT) {
+               nir_lower_clip_fs(s, so->key.ucp_enables);
+       }
        nir_lower_idiv(s);
        nir_lower_load_const_to_scalar(s);
 
@@ -251,7 +257,7 @@ compile_init(struct ir3_compiler *compiler,
        lowered_tokens = lower_tgsi(ctx, tokens, so);
        if (!lowered_tokens)
                lowered_tokens = tokens;
-       ctx->s = to_nir(lowered_tokens);
+       ctx->s = to_nir(lowered_tokens, so);
 
        if (lowered_tokens != tokens)
                free((void *)lowered_tokens);
@@ -263,7 +269,7 @@ compile_init(struct ir3_compiler *compiler,
         *    num_uniform * vec4  -  user consts
         *    4 * vec4            -  UBO addresses
         *    if (vertex shader) {
-        *        1 * vec4        -  driver params (IR3_DP_*)
+        *        N * vec4        -  driver params (IR3_DP_*)
         *        1 * vec4        -  stream-out addresses
         *    }
         *
@@ -275,8 +281,8 @@ compile_init(struct ir3_compiler *compiler,
        so->first_immediate += 4;
 
        if (so->type == SHADER_VERTEX) {
-               /* one (vec4) slot for driver params (see ir3_driver_param): */
-               so->first_immediate++;
+               /* driver params (see ir3_driver_param): */
+               so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
                /* one (vec4) slot for stream-output base addresses: */
                so->first_immediate++;
        }
@@ -828,7 +834,9 @@ static struct ir3_instruction *
 create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
 {
        /* first four vec4 sysval's reserved for UBOs: */
-       unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+       /* NOTE: dp is in scalar, but there can be >4 dp components: */
+       unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+       unsigned r = regid(n + dp / 4, dp % 4);
        return create_uniform(ctx, r);
 }
 
@@ -1199,7 +1207,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
        struct ir3_block *b = ctx->block;
        struct ir3_instruction *addr, *src0, *src1;
        /* UBO addresses are the first driver params: */
-       unsigned ubo = regid(ctx->so->first_driver_param, 0);
+       unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
        unsigned off = intr->const_index[0];
 
        /* First src is ubo index, which could either be an immed or not: */
@@ -1459,6 +1467,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
                }
                dst[0] = ctx->instance_id;
                break;
+       case nir_intrinsic_load_user_clip_plane:
+               for (int i = 0; i < intr->num_components; i++) {
+                       unsigned n = idx * 4 + i;
+                       dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+               }
+               break;
        case nir_intrinsic_discard_if:
        case nir_intrinsic_discard: {
                struct ir3_instruction *cond, *kill;
@@ -2066,7 +2080,7 @@ emit_stream_out(struct ir3_compile *ctx)
                unsigned stride = strmout->stride[i];
                struct ir3_instruction *base, *off;
 
-               base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+               base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
 
                /* 24-bit should be enough: */
                off = ir3_MUL_U(ctx->block, vtxcnt, 0,
@@ -2250,6 +2264,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
                case VARYING_SLOT_BFC0:
                case VARYING_SLOT_BFC1:
                case VARYING_SLOT_FOGC:
+               case VARYING_SLOT_CLIP_DIST0:
+               case VARYING_SLOT_CLIP_DIST1:
                        break;
                default:
                        if (slot >= VARYING_SLOT_VAR0)
index 7b2505091358961185982d6eb1ca4e19f5557b44..7b5653322560c7f42589baeb6bcabd555974f96d 100644 (file)
@@ -501,7 +501,7 @@ static void
 emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
                struct fd_constbuf_stateobj *constbuf)
 {
-       uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+       uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
        if (v->constlen > offset) {
                struct fd_context *ctx = fd_context(v->shader->pctx);
                uint32_t params = MIN2(4, v->constlen - offset) * 4;
@@ -554,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 static void
 emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 {
-       uint32_t offset = v->first_driver_param + 5;  /* streamout addresses after driver-params*/
+       /* streamout addresses after driver-params: */
+       uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
        if (v->constlen > offset) {
                struct fd_context *ctx = fd_context(v->shader->pctx);
                struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -657,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
        /* emit driver params every time: */
        /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
        if (info && (v->type == SHADER_VERTEX)) {
-               uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+               uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
                if (v->constlen >= offset) {
-                       uint32_t vertex_params[4] = {
+                       uint32_t vertex_params[IR3_DP_COUNT] = {
                                [IR3_DP_VTXID_BASE] = info->indexed ?
                                                info->index_bias : info->start,
                                [IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
                        };
+                       /* if no user-clip-planes, we don't need to emit the
+                        * entire thing:
+                        */
+                       uint32_t vertex_params_size = 4;
+
+                       if (v->key.ucp_enables) {
+                               struct pipe_clip_state *ucp = &ctx->ucp;
+                               unsigned pos = IR3_DP_UCP0_X;
+                               for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+                                       for (unsigned j = 0; j < 4; j++) {
+                                               vertex_params[pos] = fui(ucp->ucp[i][j]);
+                                               pos++;
+                                       }
+                               }
+                               vertex_params_size = ARRAY_SIZE(vertex_params);
+                       }
 
                        fd_wfi(ctx, ring);
                        ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-                                       ARRAY_SIZE(vertex_params), vertex_params, NULL);
+                                       vertex_params_size, vertex_params, NULL);
 
                        /* if needed, emit stream-out buffer addresses: */
                        if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
index 39b8864329bd9bdc5fa57ad236647d591bc9fa05..6dc0ce1133f517c2e3560263bc4fc8ada90ba4e9 100644 (file)
 enum ir3_driver_param {
        IR3_DP_VTXID_BASE = 0,
        IR3_DP_VTXCNT_MAX = 1,
+       /* user-clip-plane components, up to 8x vec4's: */
+       IR3_DP_UCP0_X     = 4,
+       /* .... */
+       IR3_DP_UCP7_W     = 35,
+       IR3_DP_COUNT      = 36   /* must be aligned to vec4 */
 };
 
+/* Layout of constant registers:
+ *
+ *    num_uniform * vec4  -  user consts
+ *    4 * vec4            -  UBO addresses
+ *    if (vertex shader) {
+ *        N * vec4        -  driver params (IR3_DP_*)
+ *        1 * vec4        -  stream-out addresses
+ *    }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
+#define IR3_UBOS_OFF         0  /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
+
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
@@ -48,6 +69,11 @@ enum ir3_driver_param {
 struct ir3_shader_key {
        union {
                struct {
+                       /*
+                        * Combined Vertex/Fragment shader parameters:
+                        */
+                       unsigned ucp_enables : 8;
+
                        /* do we need to check {v,f}saturate_{s,t,r}? */
                        unsigned has_per_samp : 1;