freedreno/ir3+a6xx: same VBO state for draw/binning
authorRob Clark <robdclark@chromium.org>
Fri, 2 Aug 2019 21:07:47 +0000 (14:07 -0700)
committerRob Clark <robdclark@chromium.org>
Tue, 13 Aug 2019 15:11:26 +0000 (08:11 -0700)
Worth ~+20% on gl_driver2

Signed-off-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_ra.c
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index cbbb9bb61b0ce9cd9086a010724d0fd5e5990759..872a6fb0fc296ee0c3fb7b6fc2e5f3a3275a0d51 100644 (file)
@@ -1081,7 +1081,7 @@ void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* register assignment: */
 struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
-int ir3_ra(struct ir3 *ir3);
+int ir3_ra(struct ir3_shader_variant *v);
 
 /* legalize: */
 void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
index dca55f33b38e65bafefb95e015f0c41276042d1f..3f4a0f43c99f316b4249c2739159e2d317369641 100644 (file)
@@ -2906,6 +2906,32 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
                fixup_binning_pass(ctx);
 
+       /* for a6xx+, binning and draw pass VS use same VBO state, so we
+        * need to make sure not to remove any inputs that are used by
+        * the nonbinning VS.
+        */
+       if (ctx->compiler->gpu_id >= 600 && so->binning_pass) {
+               debug_assert(so->type == MESA_SHADER_VERTEX);
+               for (int i = 0; i < ir->ninputs; i++) {
+                       struct ir3_instruction *in = ir->inputs[i];
+
+                       if (!in)
+                               continue;
+
+                       unsigned n = i / 4;
+                       unsigned c = i % 4;
+
+                       debug_assert(n < so->nonbinning->inputs_count);
+
+                       if (so->nonbinning->inputs[n].sysval)
+                               continue;
+
+                       /* be sure to keep inputs, even if only used in VS */
+                       if (so->nonbinning->inputs[n].compmask & (1 << c))
+                               array_insert(in->block, in->block->keeps, in);
+               }
+       }
+
        /* Insert mov if there's same instruction for each output.
         * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
         */
@@ -2962,7 +2988,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                ir3_print(ir);
        }
 
-       ret = ir3_ra(ir);
+       ret = ir3_ra(so);
        if (ret) {
                DBG("RA failed!");
                goto out;
@@ -3003,13 +3029,17 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                for (j = 0; j < 4; j++) {
                        struct ir3_instruction *in = inputs[(i*4) + j];
 
-                       if (in && !(in->flags & IR3_INSTR_UNUSED)) {
-                               reg = in->regs[0]->num - j;
-                               if (half) {
-                                       compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
-                               } else {
-                                       half = !!(in->regs[0]->flags & IR3_REG_HALF);
-                               }
+                       if (!in)
+                               continue;
+
+                       if (in->flags & IR3_INSTR_UNUSED)
+                               continue;
+
+                       reg = in->regs[0]->num - j;
+                       if (half) {
+                               compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+                       } else {
+                               half = !!(in->regs[0]->flags & IR3_REG_HALF);
                        }
                }
                so->inputs[i].regid = reg;
index 980cd62c48b21135417cabcef84370c4a3df9143..a641661a44123fd57b780b3ad815f4f7bf4b5121 100644 (file)
@@ -330,6 +330,7 @@ struct ir3_ra_instr_data {
 
 /* register-assign context, per-shader */
 struct ir3_ra_ctx {
+       struct ir3_shader_variant *v;
        struct ir3 *ir;
 
        struct ir3_ra_reg_set *set;
@@ -1091,6 +1092,60 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static int
 ra_alloc(struct ir3_ra_ctx *ctx)
 {
+       /* Pre-assign VS inputs on a6xx+ binning pass shader, to align
+        * with draw pass VS, so binning and draw pass can both use the
+        * same VBO state.
+        *
+        * Note that VS inputs are expected to be full precision.
+        */
+       bool pre_assign_inputs = (ctx->ir->compiler->gpu_id >= 600) &&
+                       (ctx->ir->type == MESA_SHADER_VERTEX) &&
+                       ctx->v->binning_pass;
+
+       if (pre_assign_inputs) {
+               for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+                       struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+                       if (!instr)
+                               continue;
+
+                       debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
+
+                       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+                       /* only consider the first component: */
+                       if (id->off > 0)
+                               continue;
+
+                       unsigned name = ra_name(ctx, id);
+
+                       unsigned n = i / 4;
+                       unsigned c = i % 4;
+
+                       /* 'base' is in scalar (class 0) but we need to map that
+                        * the conflicting register of the appropriate class (ie.
+                        * input could be vec2/vec3/etc)
+                        *
+                        * Note that the higher class (larger than scalar) regs
+                        * are setup to conflict with others in the same class,
+                        * so for example, R1 (scalar) is also the first component
+                        * of D1 (vec2/double):
+                        *
+                        *    Single (base) |  Double
+                        *    --------------+---------------
+                        *       R0         |  D0
+                        *       R1         |  D0 D1
+                        *       R2         |     D1 D2
+                        *       R3         |        D2
+                        *           .. and so on..
+                        */
+                       unsigned reg = ctx->set->gpr_to_ra_reg[id->cls]
+                                       [ctx->v->nonbinning->inputs[n].regid + c];
+
+                       ra_set_node_reg(ctx->g, name, reg);
+               }
+       }
+
        /* pre-assign array elements:
         */
        list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
@@ -1118,6 +1173,35 @@ retry:
                        }
                }
 
+               /* also need to not conflict with any pre-assigned inputs: */
+               if (pre_assign_inputs) {
+                       for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+                               struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+                               if (!instr)
+                                       continue;
+
+                               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+                               /* only consider the first component: */
+                               if (id->off > 0)
+                                       continue;
+
+                               unsigned name = ra_name(ctx, id);
+
+                               /* Check if array intersects with liverange AND register
+                                * range of the input:
+                                */
+                               if (intersects(arr->start_ip, arr->end_ip,
+                                               ctx->def[name], ctx->use[name]) &&
+                                       intersects(base, base + arr->length,
+                                               i, i + class_sizes[id->cls])) {
+                                       base = MAX2(base, i + class_sizes[id->cls]);
+                                       goto retry;
+                               }
+                       }
+               }
+
                arr->reg = base;
 
                for (unsigned i = 0; i < arr->length; i++) {
@@ -1140,11 +1224,12 @@ retry:
        return 0;
 }
 
-int ir3_ra(struct ir3 *ir)
+int ir3_ra(struct ir3_shader_variant *v)
 {
        struct ir3_ra_ctx ctx = {
-                       .ir = ir,
-                       .set = ir->compiler->set,
+                       .v = v,
+                       .ir = v->ir,
+                       .set = v->ir->compiler->set,
        };
        int ret;
 
index 7c686f0ee2a3a735640337fb507f1d2e3407a2bf..aae7baeb2e06a106d626e17957b83e279019a6f5 100644 (file)
@@ -178,9 +178,14 @@ assemble_variant(struct ir3_shader_variant *v)
        v->ir = NULL;
 }
 
+/*
+ * For creating normal shader variants, 'nonbinning' is NULL.  For
+ * creating binning pass shader, it is link to corresponding normal
+ * (non-binning) variant.
+ */
 static struct ir3_shader_variant *
 create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
-               bool binning_pass)
+               struct ir3_shader_variant *nonbinning)
 {
        struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
        int ret;
@@ -190,7 +195,8 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 
        v->id = ++shader->variant_count;
        v->shader = shader;
-       v->binning_pass = binning_pass;
+       v->binning_pass = !!nonbinning;
+       v->nonbinning = nonbinning;
        v->key = *key;
        v->type = shader->type;
 
@@ -226,7 +232,7 @@ shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
                        return v;
 
        /* compile new variant if it doesn't exist already: */
-       v = create_variant(shader, key, false);
+       v = create_variant(shader, key, NULL);
        if (v) {
                v->next = shader->variants;
                shader->variants = v;
@@ -246,7 +252,7 @@ ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 
        if (v && binning_pass) {
                if (!v->binning) {
-                       v->binning = create_variant(shader, key, true);
+                       v->binning = create_variant(shader, key, v);
                        *created = true;
                }
                mtx_unlock(&shader->variants_lock);
index 53889c7f2eddd55e20f4499d000b90b21ae0ce32..f6896c3526ba7efbea615bb0e3e3089e6cb31970 100644 (file)
@@ -391,7 +391,10 @@ struct ir3_shader_variant {
         * which is pointed to by so->binning:
         */
        bool binning_pass;
-       struct ir3_shader_variant *binning;
+//     union {
+               struct ir3_shader_variant *binning;
+               struct ir3_shader_variant *nonbinning;
+//     };
 
        struct ir3_info info;
        struct ir3 *ir;
index ef584177d160d58ebae8650e16ef900b6b56b09c..59e0a9780e078463f2c28b15fa8dfdd0d9c2f64c 100644 (file)
@@ -791,10 +791,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
                struct fd_ringbuffer *state;
 
                state = build_vbo_state(emit, emit->vs);
-               fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x6);
-
-               state = build_vbo_state(emit, emit->bs);
-               fd6_emit_take_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1);
+               fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x7);
        }
 
        if (dirty & FD_DIRTY_ZSA) {
index 2ffb76c39006cb8ddfd06c5406360f02aa592aa3..bc66884fb5a2398434b636f0c06dfaba29ebef50 100644 (file)
@@ -49,7 +49,6 @@ enum fd6_state_id {
        FD6_GROUP_LRZ,
        FD6_GROUP_LRZ_BINNING,
        FD6_GROUP_VBO,
-       FD6_GROUP_VBO_BINNING,
        FD6_GROUP_VS_CONST,
        FD6_GROUP_FS_CONST,
        FD6_GROUP_VS_TEX,
index 3aa91c312b32ed39a3d8eddf510d4ace95f1e8cc..a2acaa7b5c28344c7128a036b1a638d506970efe 100644 (file)
@@ -703,6 +703,14 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
        state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
        state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
 
+#ifdef DEBUG
+       for (unsigned i = 0; i < bs->inputs_count; i++) {
+               if (vs->inputs[i].sysval)
+                       continue;
+               debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
+       }
+#endif
+
        setup_config_stateobj(state->config_stateobj, state);
        setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
        setup_stateobj(state->stateobj, ctx->screen, state, key, false);