pan/mdg: Enable out-of-order execution after texture ops
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 21 May 2020 23:14:23 +0000 (19:14 -0400)
committerMarge Bot <eric+marge@anholt.net>
Mon, 1 Jun 2020 18:38:49 +0000 (18:38 +0000)
We don't make great use of it (due to the scheduler not being aware
yet), but we can pack for it regardless and maybe pick up some win.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5286>

src/panfrost/midgard/compiler.h
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_emit.c

index e8cb79ba163d6a7667c4d2ebdcae68b27fe1064f..1765bfd8551a1468a3e06165d43de24193e99d1c 100644 (file)
@@ -646,6 +646,7 @@ void mir_analyze_helper_requirements(compiler_context *ctx);
 
 void emit_binary_bundle(
         compiler_context *ctx,
+        midgard_block *block,
         midgard_bundle *bundle,
         struct util_dynarray *emission,
         int next_tag);
index e0d2f26a245ef37ef3efcf5c3f6499d5231d111f..1e20cfc3d4d77c00f93c09caf5ea543097329612 100644 (file)
@@ -2811,7 +2811,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
                         if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
                                 lookahead = source_order_bundles[current_bundle + 1]->tag;
 
-                        emit_binary_bundle(ctx, bundle, compiled, lookahead);
+                        emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
                         ++current_bundle;
                 }
 
index cf283dd3ed5b1650f1f8f756ec0be125ca0b3cf1..34b06a2caf441f193417db1f13bbbd668b2077fc 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "compiler.h"
 #include "midgard_ops.h"
+#include "midgard_quirks.h"
 
 static midgard_int_mod
 mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
@@ -356,6 +357,50 @@ mir_pack_swizzle_tex(midgard_instruction *ins)
         /* TODO: bias component */
 }
 
+/* Up to 3 { ALU, LDST } bundles can execute in parallel with a texture op.
+ * Given a texture op, lookahead to see how many such bundles we can flag for
+ * OoO execution */
+
+static bool
+mir_can_run_ooo(midgard_block *block, midgard_bundle *bundle,
+                unsigned dependency)
+{
+        /* Don't read out of bounds */
+        if (bundle >= (midgard_bundle *) ((char *) block->bundles.data + block->bundles.size))
+                return false;
+
+        /* Texture ops can't execute with other texture ops */
+        if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
+                return false;
+
+        /* Ensure there is no read-after-write dependency */
+
+        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
+                midgard_instruction *ins = bundle->instructions[i];
+
+                mir_foreach_src(ins, s) {
+                        if (ins->src[s] == dependency)
+                                return false;
+                }
+        }
+
+        /* Otherwise, we're okay */
+        return true;
+}
+
+static void
+mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instruction *ins)
+{
+        unsigned count = 0;
+
+        for (count = 0; count < 3; ++count) {
+                if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
+                        break;
+        }
+
+        ins->texture.out_of_order = count;
+}
+
 /* Load store masks are 4-bits. Load/store ops pack for that. vec4 is the
  * natural mask width; vec8 is constrained to be in pairs, vec2 is duplicated. TODO: 8-bit?
  */
@@ -529,6 +574,7 @@ midgard_sampler_type(nir_alu_type t) {
 
 void
 emit_binary_bundle(compiler_context *ctx,
+                   midgard_block *block,
                    midgard_bundle *bundle,
                    struct util_dynarray *emission,
                    int next_tag)
@@ -615,6 +661,9 @@ emit_binary_bundle(compiler_context *ctx,
 
                 mir_pack_swizzle_tex(ins);
 
+                if (!(ctx->quirks & MIDGARD_NO_OOO))
+                        mir_pack_tex_ooo(block, bundle, ins);
+
                 unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
                 unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);