freedreno/ir3: add meta instruction for pre-fs texture fetch
authorRob Clark <robdclark@chromium.org>
Fri, 11 Oct 2019 22:57:22 +0000 (15:57 -0700)
committerRob Clark <robdclark@gmail.com>
Fri, 18 Oct 2019 21:11:54 +0000 (21:11 +0000)
Add a placeholder instruction to track texture fetches made prior to FS
shader dispatch.  These, like meta:input instructions are scheduled
before any real instructions, so that RA realizes their result values
are live before the first real instruction.  And to give legalize a way
to track usage of fetched sample requiring (sy) sync flags.

There is some related special handling for varying texcoord inputs used
for pre-fs-fetch, so that they are not DCE'd and remain in linkage
between FS and previous stage.  Note that we could almost avoid this
special handling by giving meta:tex_prefetch real src arguments, except
that in the FS stage, inputs are actual bary.f/ldlv instructions.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/ir3/instr-a3xx.h
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_legalize.c
src/freedreno/ir3/ir3_print.c
src/freedreno/ir3/ir3_sched.c

index b0db28eb63583a07bfd8f896caaa75f0e770b9a3..3887736444d47b5c1082d98edf688c0504ef880d 100644 (file)
@@ -212,6 +212,11 @@ typedef enum {
        OPC_META_FO         = _OPC(-1, 2),
        OPC_META_FI         = _OPC(-1, 3),
 
+       /* placeholder for texture fetches that run before FS invocation
+        * starts:
+        */
+       OPC_META_TEX_PREFETCH = _OPC(-1, 4),
+
 } opc_t;
 
 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
index 8b8788a8a97571a84fcc32177423b2e977d40e64..8b7bdc618b2579a0ddd5e29c151a7cf48bff92ca 100644 (file)
@@ -267,6 +267,10 @@ struct ir3_instruction {
                struct {
                        int off;              /* component/offset */
                } fo;
+               struct {
+                       unsigned samp, tex;
+                       unsigned input_offset;
+               } prefetch;
                struct {
                        /* for sysvals, identifies the sysval type.  Mostly so we can
                         * identify the special cases where a sysval should not be DCE'd
@@ -1465,6 +1469,9 @@ INSTR4F(G, ATOMIC_XOR)
 INSTR0(BAR)
 INSTR0(FENCE)
 
+/* meta instructions: */
+INSTR0(META_TEX_PREFETCH);
+
 /* ************************************************************************* */
 /* split this out or find some helper to use.. like main/bitset.h.. */
 
index 39bef63a7803d6547eaabbf1a0ef637832fa4da3..38c60f8cfe698324bf5dc0da041b1b625dac85b7 100644 (file)
@@ -2635,6 +2635,16 @@ pack_inlocs(struct ir3_context *ctx)
                                compile_assert(ctx, i < so->inputs_count);
 
                                used_components[i] |= 1 << j;
+                       } else if (instr->opc == OPC_META_TEX_PREFETCH) {
+                               for (int n = 0; n < 2; n++) {
+                                       unsigned inloc = instr->prefetch.input_offset + n;
+                                       unsigned i = inloc / 4;
+                                       unsigned j = inloc % 4;
+
+                                       compile_assert(ctx, i < so->inputs_count);
+
+                                       used_components[i] |= 1 << j;
+                               }
                        }
                }
        }
index 1af80367e50af8497b98aa96f3b4e98ffb3007e0..9a0c83042a45c0a3440c9580e3e26264cf530949 100644 (file)
@@ -118,7 +118,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 
                n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
 
-               if (is_meta(n))
+               /* _meta::tex_prefetch instructions removed later in
+                * collect_tex_prefetches()
+                */
+               if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH))
                        continue;
 
                if (is_input(n)) {
@@ -237,7 +240,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                if (is_sfu(n))
                        regmask_set(&state->needs_ss, n->regs[0]);
 
-               if (is_tex(n)) {
+               if (is_tex(n) || (n->opc == OPC_META_TEX_PREFETCH)) {
                        regmask_set(&state->needs_sy, n->regs[0]);
                        ctx->need_pixlod = true;
                } else if (n->opc == OPC_RESINFO) {
index cc6572d90565f38f56c84a6a1b9e3375e2afa43c..63bb5b16f7a64621268da0d7ece4508b2ff6756d 100644 (file)
@@ -53,6 +53,7 @@ static void print_instr_name(struct ir3_instruction *instr)
                case OPC_META_INPUT:  printf("_meta:in");   break;
                case OPC_META_FO:     printf("_meta:fo");   break;
                case OPC_META_FI:     printf("_meta:fi");   break;
+               case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break;
 
                /* shouldn't hit here.. just for debugging: */
                default: printf("_meta:%d", instr->opc);    break;
@@ -181,6 +182,9 @@ print_instr(struct ir3_instruction *instr, int lvl)
 
        if (instr->opc == OPC_META_FO) {
                printf(", off=%d", instr->fo.off);
+       } else if (instr->opc == OPC_META_TEX_PREFETCH) {
+               printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
+                               instr->prefetch.samp, instr->prefetch.input_offset);
        }
 
        if (is_flow(instr) && instr->cat0.target) {
index 96897f60e9249ef3c74196d125041fbe543424da..635c693095afa7e42eac1a25d06d4b182085f5a3 100644 (file)
@@ -788,7 +788,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
         * occupied), and move remaining to depth sorted list:
         */
        list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
-               if (instr->opc == OPC_META_INPUT) {
+               if ((instr->opc == OPC_META_INPUT) ||
+                               (instr->opc == OPC_META_TEX_PREFETCH)) {
                        schedule(ctx, instr);
                } else {
                        ir3_insert_by_depth(instr, &ctx->depth_list);