Added few more stubs so that control reaches to DestroyDevice().

[mesa.git] / src / intel / compiler / brw_fs_scoreboard.cpp
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp

index f05a150e00fca757b6fdc5935d1593b185431e61..0e810218db6d3eac24b97db27def957cd8cd032e 100644 (file)
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -52,9 +52,6 @@
   *  - tdr0 thread dependency register
   */
  
-#include <tuple>
-#include <vector>
-
  #include "brw_fs.h"
  #include "brw_cfg.h"
  
@@ -103,19 +100,30 @@ namespace {
      */
     typedef int ordered_address;
  
+   /**
+    * Return the number of instructions in the program.
+    */
+   unsigned
+   num_instructions(const backend_shader *shader)
+   {
+      return shader->cfg->blocks[shader->cfg->num_blocks - 1]->end_ip + 1;
+   }
+
     /**
      * Calculate the local ordered_address instruction counter at every
      * instruction of the shader for subsequent constant-time look-up.
      */
-   std::vector<ordered_address>
+   ordered_address *
     ordered_inst_addresses(const fs_visitor *shader)
     {
-      std::vector<ordered_address> jps;
+      ordered_address *jps = new ordered_address[num_instructions(shader)];
        ordered_address jp = 0;
+      unsigned ip = 0;
  
        foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
-         jps.push_back(jp);
+         jps[ip] = jp;
           jp += ordered_unit(inst);
+         ip++;
        }
  
        return jps;
@@ -174,6 +182,17 @@ namespace {
      * only if i == j for every pair of unsigned integers i and j.
      */
     struct equivalence_relation {
+      equivalence_relation(unsigned n) : is(new unsigned[n]), n(n)
+      {
+         for (unsigned i = 0; i < n; i++)
+            is[i] = i;
+      }
+
+      ~equivalence_relation()
+      {
+         delete[] is;
+      }
+
        /**
         * Return equivalence class index of the specified element.  Effectively
         * this is the numeric value of an arbitrary representative from the
@@ -185,7 +204,7 @@ namespace {
        unsigned
        lookup(unsigned i) const
        {
-         if (i < is.size() && is[i] != i)
+         if (i < n && is[i] != i)
              return lookup(is[i]);
           else
              return i;
@@ -195,12 +214,13 @@ namespace {
         * Create an array with the results of the lookup() method for
         * constant-time evaluation.
         */
-      std::vector<unsigned>
-      flatten() const {
-         std::vector<unsigned> ids;
+      unsigned *
+      flatten() const
+      {
+         unsigned *ids = new unsigned[n];
  
-         for (const auto i : is)
-            ids.push_back(lookup(i));
+         for (unsigned i = 0; i < n; i++)
+            ids[i] = lookup(i);
  
           return ids;
        }
@@ -223,6 +243,11 @@ namespace {
        }
  
     private:
+      equivalence_relation(const equivalence_relation &);
+
+      equivalence_relation &
+      operator=(const equivalence_relation &);
+
        /**
         * Assign the representative of \p from to be equivalent to \p to.
         *
@@ -233,17 +258,17 @@ namespace {
        assign(unsigned from, unsigned to)
        {
           if (from != to) {
-            if (from < is.size() && is[from] != from)
-               assign(is[from], to);
+            assert(from < n);
  
-            for (unsigned i = is.size(); i <= from; i++)
-               is.push_back(i);
+            if (is[from] != from)
+               assign(is[from], to);
  
              is[from] = to;
           }
        }
  
-      std::vector<unsigned> is;
+      unsigned *is;
+      unsigned n;
     };
  
     /**
@@ -256,21 +281,24 @@ namespace {
         * No dependency information.
         */
        dependency() : ordered(TGL_REGDIST_NULL), jp(INT_MIN),
-                     unordered(TGL_SBID_NULL), id(0) {}
+                     unordered(TGL_SBID_NULL), id(0),
+                     exec_all(false) {}
  
        /**
         * Construct a dependency on the in-order instruction with the provided
         * ordered_address instruction counter.
         */
-      dependency(tgl_regdist_mode mode, ordered_address jp) :
-         ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0) {}
+      dependency(tgl_regdist_mode mode, ordered_address jp, bool exec_all) :
+         ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
+         exec_all(exec_all) {}
  
        /**
         * Construct a dependency on the out-of-order instruction with the
         * specified synchronization token.
         */
-      dependency(tgl_sbid_mode mode, unsigned id) :
-         ordered(TGL_REGDIST_NULL), jp(INT_MIN), unordered(mode), id(id) {}
+      dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
+         ordered(TGL_REGDIST_NULL), jp(INT_MIN), unordered(mode), id(id),
+         exec_all(exec_all) {}
  
        /**
         * Synchronization mode of in-order dependency, or zero if no in-order
@@ -302,6 +330,14 @@ namespace {
        /** Synchronization token of out-of-order dependency. */
        unsigned id;
  
+      /**
+       * Whether the dependency could be run with execution masking disabled,
+       * which might lead to the unwanted execution of the generating
+       * instruction in cases where a BB is executed with all channels
+       * disabled due to hardware bug GEN:BUG:1407528679.
+       */
+      bool exec_all;
+
        /**
         * Trivial in-order dependency that's always satisfied.
         *
@@ -318,7 +354,8 @@ namespace {
           return dep0.ordered == dep1.ordered &&
                  dep0.jp == dep1.jp &&
                  dep0.unordered == dep1.unordered &&
-                dep0.id == dep1.id;
+                dep0.id == dep1.id &&
+                dep0.exec_all == dep1.exec_all;
        }
  
        friend bool
@@ -328,7 +365,7 @@ namespace {
        }
     };
  
-   const dependency dependency::done = dependency(TGL_REGDIST_SRC, INT_MIN);
+   const dependency dependency::done = dependency(TGL_REGDIST_SRC, INT_MIN, false);
  
     /**
      * Return whether \p dep contains any dependency information.
@@ -363,6 +400,8 @@ namespace {
                            dep1.unordered ? dep1.id : dep0.id);
        }
  
+      dep.exec_all = dep0.exec_all || dep1.exec_all;
+
        return dep;
     }
  
@@ -559,32 +598,86 @@ namespace {
      * Dependency list handling.
      * @{
      */
+   struct dependency_list {
+      dependency_list() : deps(NULL), n(0) {}
+
+      ~dependency_list()
+      {
+         free(deps);
+      }
+
+      void
+      push_back(const dependency &dep)
+      {
+         deps = (dependency *)realloc(deps, (n + 1) * sizeof(*deps));
+         deps[n++] = dep;
+      }
+
+      unsigned
+      size() const
+      {
+         return n;
+      }
+
+      const dependency &
+      operator[](unsigned i) const
+      {
+         assert(i < n);
+         return deps[i];
+      }
+
+      dependency &
+      operator[](unsigned i)
+      {
+         assert(i < n);
+         return deps[i];
+      }
+
+   private:
+      dependency_list(const dependency_list &);
+      dependency_list &
+      operator=(const dependency_list &);
+
+      dependency *deps;
+      unsigned n;
+   };
  
     /**
      * Add dependency \p dep to the list of dependencies of an instruction
      * \p deps.
      */
     void
-   add_dependency(const std::vector<unsigned> &ids,
-                  std::vector<dependency> &deps, dependency dep)
+   add_dependency(const unsigned *ids, dependency_list &deps, dependency dep)
     {
        if (is_valid(dep)) {
           /* Translate the unordered dependency token first in order to keep
            * the list minimally redundant.
            */
-         if (dep.unordered && dep.id < ids.size())
+         if (dep.unordered)
              dep.id = ids[dep.id];
  
           /* Try to combine the specified dependency with any existing ones. */
-         for (auto &dep1 : deps) {
-            if (dep.ordered && dep1.ordered) {
-               dep1.jp = MAX2(dep1.jp, dep.jp);
-               dep1.ordered |= dep.ordered;
+         for (unsigned i = 0; i < deps.size(); i++) {
+            /* Don't combine otherwise matching dependencies if there is an
+             * exec_all mismatch which would cause a SET dependency to gain an
+             * exec_all flag, since that would prevent it from being baked
+             * into the instruction we want to allocate an SBID for.
+             */
+            if (deps[i].exec_all != dep.exec_all &&
+                (!deps[i].exec_all || (dep.unordered & TGL_SBID_SET)) &&
+                (!dep.exec_all || (deps[i].unordered & TGL_SBID_SET)))
+               continue;
+
+            if (dep.ordered && deps[i].ordered) {
+               deps[i].jp = MAX2(deps[i].jp, dep.jp);
+               deps[i].ordered |= dep.ordered;
+               deps[i].exec_all |= dep.exec_all;
                 dep.ordered = TGL_REGDIST_NULL;
              }
  
-            if (dep.unordered && dep1.unordered && dep1.id == dep.id) {
-               dep1.unordered |= dep.unordered;
+            if (dep.unordered && deps[i].unordered && deps[i].id == dep.id) {
+               deps[i].unordered |= dep.unordered;
+               deps[i].exec_all |= dep.exec_all;
                 dep.unordered = TGL_SBID_NULL;
              }
           }
@@ -597,20 +690,22 @@ namespace {
  
     /**
      * Construct a tgl_swsb annotation encoding any ordered dependencies from
-    * the dependency list \p deps of an instruction with ordered_address
-    * \p jp.
+    * the dependency list \p deps of an instruction with ordered_address \p
+    * jp.  If \p exec_all is false only dependencies known to be executed with
+    * channel masking applied will be considered in the calculation.
      */
     tgl_swsb
-   ordered_dependency_swsb(const std::vector<dependency> &deps,
-                           const ordered_address &jp)
+   ordered_dependency_swsb(const dependency_list &deps,
+                           const ordered_address &jp,
+                           bool exec_all)
     {
        unsigned min_dist = ~0u;
  
-      for (const auto &dep : deps) {
-         if (dep.ordered) {
-            const unsigned dist = jp - dep.jp;
+      for (unsigned i = 0; i < deps.size(); i++) {
+         if (deps[i].ordered && exec_all >= deps[i].exec_all) {
+            const unsigned dist = jp - deps[i].jp;
              const unsigned max_dist = 10;
-            assert(jp > dep.jp);
+            assert(jp > deps[i].jp);
              if (dist <= max_dist)
                 min_dist = MIN3(min_dist, dist, 7);
           }
@@ -621,28 +716,35 @@ namespace {
  
     /**
      * Return whether the dependency list \p deps of an instruction with
-    * ordered_address \p jp has any non-trivial ordered dependencies.
+    * ordered_address \p jp has any non-trivial ordered dependencies.  If \p
+    * exec_all is false only dependencies known to be executed with channel
+    * masking applied will be considered in the calculation.
      */
     bool
-   find_ordered_dependency(const std::vector<dependency> &deps,
-                           const ordered_address &jp)
+   find_ordered_dependency(const dependency_list &deps,
+                           const ordered_address &jp,
+                           bool exec_all)
     {
-      return ordered_dependency_swsb(deps, jp).regdist;
+      return ordered_dependency_swsb(deps, jp, exec_all).regdist;
     }
  
     /**
      * Return the full tgl_sbid_mode bitset for the first unordered dependency
      * on the list \p deps that matches the specified tgl_sbid_mode, or zero if
-    * no such dependency is present.
+    * no such dependency is present.  If \p exec_all is false only
+    * dependencies known to be executed with channel masking applied will be
+    * considered in the calculation.
      */
     tgl_sbid_mode
-   find_unordered_dependency(const std::vector<dependency> &deps,
-                             tgl_sbid_mode unordered)
+   find_unordered_dependency(const dependency_list &deps,
+                             tgl_sbid_mode unordered,
+                             bool exec_all)
     {
        if (unordered) {
-         for (const auto &dep : deps) {
-            if (unordered & dep.unordered)
-               return dep.unordered;
+         for (unsigned i = 0; i < deps.size(); i++) {
+            if ((unordered & deps[i].unordered) &&
+                exec_all >= deps[i].exec_all)
+               return deps[i].unordered;
           }
        }
  
@@ -657,20 +759,21 @@ namespace {
      */
     tgl_sbid_mode
     baked_unordered_dependency_mode(const fs_inst *inst,
-                                   const std::vector<dependency> &deps,
+                                   const dependency_list &deps,
                                     const ordered_address &jp)
     {
-      const bool has_ordered = find_ordered_dependency(deps, jp);
+      const bool exec_all = inst->force_writemask_all;
+      const bool has_ordered = find_ordered_dependency(deps, jp, exec_all);
  
-      if (find_unordered_dependency(deps, TGL_SBID_SET))
-         return find_unordered_dependency(deps, TGL_SBID_SET);
+      if (find_unordered_dependency(deps, TGL_SBID_SET, exec_all))
+         return find_unordered_dependency(deps, TGL_SBID_SET, exec_all);
        else if (has_ordered && is_unordered(inst))
           return TGL_SBID_NULL;
-      else if (find_unordered_dependency(deps, TGL_SBID_DST) &&
+      else if (find_unordered_dependency(deps, TGL_SBID_DST, exec_all) &&
                 (!has_ordered || !is_unordered(inst)))
-         return find_unordered_dependency(deps, TGL_SBID_DST);
+         return find_unordered_dependency(deps, TGL_SBID_DST, exec_all);
        else if (!has_ordered)
-         return find_unordered_dependency(deps, TGL_SBID_SRC);
+         return find_unordered_dependency(deps, TGL_SBID_SRC, exec_all);
        else
           return TGL_SBID_NULL;
     }
@@ -687,18 +790,20 @@ namespace {
      * instruction \p inst.
      */
     void
-   update_inst_scoreboard(const fs_visitor *shader,
-                          const std::vector<ordered_address> &jps,
+   update_inst_scoreboard(const ordered_address *jps,
                            const fs_inst *inst, unsigned ip, scoreboard &sb)
     {
+      const bool exec_all = inst->force_writemask_all;
+
        /* Track any source registers that may be fetched asynchronously by this
         * instruction, otherwise clear the dependency in order to avoid
         * subsequent redundant synchronization.
         */
        for (unsigned i = 0; i < inst->sources; i++) {
           const dependency rd_dep =
-            inst->is_payload(i) || inst->is_math() ? dependency(TGL_SBID_SRC, ip) :
-            ordered_unit(inst) ? dependency(TGL_REGDIST_SRC, jps[ip]) :
+            (inst->is_payload(i) ||
+             inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
+            ordered_unit(inst) ? dependency(TGL_REGDIST_SRC, jps[ip], exec_all) :
              dependency::done;
  
           for (unsigned j = 0; j < regs_read(inst, i); j++)
@@ -706,7 +811,7 @@ namespace {
        }
  
        if (is_send(inst) && inst->base_mrf != -1) {
-         const dependency rd_dep = dependency(TGL_SBID_SRC, ip);
+         const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
  
           for (unsigned j = 0; j < inst->mlen; j++)
              sb.set(brw_uvec_mrf(8, inst->base_mrf + j, 0), rd_dep);
@@ -714,8 +819,8 @@ namespace {
  
        /* Track any destination registers of this instruction. */
        const dependency wr_dep =
-         is_unordered(inst) ? dependency(TGL_SBID_DST, ip) :
-         ordered_unit(inst) ? dependency(TGL_REGDIST_DST, jps[ip]) :
+         is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
+         ordered_unit(inst) ? dependency(TGL_REGDIST_DST, jps[ip], exec_all) :
           dependency();
  
        if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
@@ -730,15 +835,15 @@ namespace {
      * unconditionally resolved) dependencies at the end of each block of the
      * program.
      */
-   std::vector<scoreboard>
+   scoreboard *
     gather_block_scoreboards(const fs_visitor *shader,
-                            const std::vector<ordered_address> &jps)
+                            const ordered_address *jps)
     {
-      std::vector<scoreboard> sbs(shader->cfg->num_blocks);
+      scoreboard *sbs = new scoreboard[shader->cfg->num_blocks];
        unsigned ip = 0;
  
        foreach_block_and_inst(block, fs_inst, inst, shader->cfg)
-         update_inst_scoreboard(shader, jps, inst, ip++, sbs[block->num]);
+         update_inst_scoreboard(jps, inst, ip++, sbs[block->num]);
  
        return sbs;
     }
@@ -750,15 +855,14 @@ namespace {
      * Calculates the set of dependencies potentially pending at the beginning
      * of each block, and returns it as an array of scoreboard objects.
      */
-   std::pair<std::vector<scoreboard>, std::vector<unsigned>>
+   scoreboard *
     propagate_block_scoreboards(const fs_visitor *shader,
-                               const std::vector<ordered_address> &jps)
+                               const ordered_address *jps,
+                               equivalence_relation &eq)
     {
-      const std::vector<scoreboard> delta_sbs =
-         gather_block_scoreboards(shader, jps);
-      std::vector<scoreboard> in_sbs(shader->cfg->num_blocks);
-      std::vector<scoreboard> out_sbs(shader->cfg->num_blocks);
-      equivalence_relation eq;
+      const scoreboard *delta_sbs = gather_block_scoreboards(shader, jps);
+      scoreboard *in_sbs = new scoreboard[shader->cfg->num_blocks];
+      scoreboard *out_sbs = new scoreboard[shader->cfg->num_blocks];
  
        for (bool progress = true; progress;) {
           progress = false;
@@ -784,63 +888,68 @@ namespace {
           }
        }
  
-      return { std::move(in_sbs), eq.flatten() };
+      delete[] delta_sbs;
+      delete[] out_sbs;
+
+      return in_sbs;
     }
  
     /**
      * Return the list of potential dependencies of each instruction in the
      * shader based on the result of global dependency analysis.
      */
-   std::vector<std::vector<dependency>>
+   dependency_list *
     gather_inst_dependencies(const fs_visitor *shader,
-                            const std::vector<ordered_address> &jps)
+                            const ordered_address *jps)
     {
-      std::vector<scoreboard> sbs;
-      std::vector<unsigned> ids;
-      std::vector<std::vector<dependency>> deps;
+      equivalence_relation eq(num_instructions(shader));
+      scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq);
+      const unsigned *ids = eq.flatten();
+      dependency_list *deps = new dependency_list[num_instructions(shader)];
        unsigned ip = 0;
  
-      std::tie(sbs, ids) = propagate_block_scoreboards(shader, jps);
-
        foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
+         const bool exec_all = inst->force_writemask_all;
           scoreboard &sb = sbs[block->num];
-         std::vector<dependency> inst_deps;
  
           for (unsigned i = 0; i < inst->sources; i++) {
              for (unsigned j = 0; j < regs_read(inst, i); j++)
-               add_dependency(ids, inst_deps, dependency_for_read(
+               add_dependency(ids, deps[ip], dependency_for_read(
                    sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
           }
  
           if (is_send(inst) && inst->base_mrf != -1) {
              for (unsigned j = 0; j < inst->mlen; j++)
-               add_dependency(ids, inst_deps, dependency_for_read(
+               add_dependency(ids, deps[ip], dependency_for_read(
                    sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
           }
  
           if (is_unordered(inst))
-            add_dependency(ids, inst_deps, dependency(TGL_SBID_SET, ip));
+            add_dependency(ids, deps[ip],
+                           dependency(TGL_SBID_SET, ip, exec_all));
  
           if (!inst->no_dd_check) {
              if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
                 for (unsigned j = 0; j < regs_written(inst); j++) {
-                  add_dependency(ids, inst_deps, dependency_for_write(inst,
+                  add_dependency(ids, deps[ip], dependency_for_write(inst,
                       sb.get(byte_offset(inst->dst, REG_SIZE * j))));
                 }
              }
  
              if (is_send(inst) && inst->base_mrf != -1) {
-               for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
-                  add_dependency(ids, inst_deps, dependency_for_write(inst,
+               for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
+                  add_dependency(ids, deps[ip], dependency_for_write(inst,
                       sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
              }
           }
  
-         deps.push_back(inst_deps);
-         update_inst_scoreboard(shader, jps, inst, ip, sb);
+         update_inst_scoreboard(jps, inst, ip, sb);
           ip++;
        }
  
+      delete[] sbs;
+      delete[] ids;
+
        return deps;
     }
  
@@ -850,30 +959,39 @@ namespace {
      * Allocate SBID tokens to track the execution of every out-of-order
      * instruction of the shader.
      */
-   std::vector<std::vector<dependency>>
+   dependency_list *
     allocate_inst_dependencies(const fs_visitor *shader,
-                              const std::vector<std::vector<dependency>> &deps0)
+                              const dependency_list *deps0)
     {
        /* XXX - Use bin-packing algorithm to assign hardware SBIDs optimally in
         *       shaders with a large number of SEND messages.
         */
-      std::vector<std::vector<dependency>> deps1;
-      std::vector<unsigned> ids(deps0.size(), ~0u);
+
+      /* Allocate an unordered dependency ID to hardware SBID translation
+       * table with as many entries as instructions there are in the shader,
+       * which is the maximum number of unordered IDs we can find in the
+       * program.
+       */
+      unsigned *ids = new unsigned[num_instructions(shader)];
+      for (unsigned ip = 0; ip < num_instructions(shader); ip++)
+         ids[ip] = ~0u;
+
+      dependency_list *deps1 = new dependency_list[num_instructions(shader)];
        unsigned next_id = 0;
  
-      for (const auto &inst_deps0 : deps0) {
-         std::vector<dependency> inst_deps1;
+      for (unsigned ip = 0; ip < num_instructions(shader); ip++) {
+         for (unsigned i = 0; i < deps0[ip].size(); i++) {
+            const dependency &dep = deps0[ip][i];
  
-         for (const auto &dep : inst_deps0) {
              if (dep.unordered && ids[dep.id] == ~0u)
                 ids[dep.id] = (next_id++) & 0xf;
  
-            add_dependency(ids, inst_deps1, dep);
+            add_dependency(ids, deps1[ip], dep);
           }
-
-         deps1.push_back(inst_deps1);
        }
  
+      delete[] ids;
+
        return deps1;
     }
  
@@ -884,21 +1002,28 @@ namespace {
      */
     void
     emit_inst_dependencies(fs_visitor *shader,
-                          const std::vector<ordered_address> &jps,
-                          const std::vector<std::vector<dependency>> &deps)
+                          const ordered_address *jps,
+                          const dependency_list *deps)
     {
        unsigned ip = 0;
  
        foreach_block_and_inst_safe(block, fs_inst, inst, shader->cfg) {
-         tgl_swsb swsb = ordered_dependency_swsb(deps[ip], jps[ip]);
+         const bool exec_all = inst->force_writemask_all;
+         tgl_swsb swsb = ordered_dependency_swsb(deps[ip], jps[ip], exec_all);
           const tgl_sbid_mode unordered_mode =
              baked_unordered_dependency_mode(inst, deps[ip], jps[ip]);
  
-         for (const auto &dep : deps[ip]) {
+         for (unsigned i = 0; i < deps[ip].size(); i++) {
+            const dependency &dep = deps[ip][i];
+
              if (dep.unordered) {
-               if (unordered_mode == dep.unordered && !swsb.mode) {
+               if (unordered_mode == dep.unordered &&
+                   exec_all >= dep.exec_all && !swsb.mode) {
                    /* Bake unordered dependency into the instruction's SWSB if
-                   * possible.
+                   * possible, except in cases where the current instruction
+                   * isn't marked NoMask but the dependency is, since that
+                   * might lead to data coherency issues due to
+                   * GEN:BUG:1407528679.
                     */
                    swsb.sbid = dep.id;
                    swsb.mode = dep.unordered;
@@ -907,7 +1032,7 @@ namespace {
                     * instruction.
                     */
                    const fs_builder ibld = fs_builder(shader, block, inst)
-                     .exec_all().group(1, 0);
+                                          .exec_all().group(1, 0);
                    fs_inst *sync = ibld.emit(BRW_OPCODE_SYNC, ibld.null_reg_ud(),
                                              brw_imm_ud(TGL_SYNC_NOP));
                    sync->sched.sbid = dep.id;
@@ -917,6 +1042,27 @@ namespace {
              }
           }
  
+         for (unsigned i = 0; i < deps[ip].size(); i++) {
+            const dependency &dep = deps[ip][i];
+
+            if (dep.ordered && dep.exec_all > exec_all &&
+                find_ordered_dependency(deps[ip], jps[ip], true)) {
+               /* If the current instruction is not marked NoMask but an
+                * ordered dependency is, perform the synchronization as a
+                * separate NoMask SYNC instruction in order to avoid data
+                * coherency issues due to GEN:BUG:1407528679.  The similar
+                * scenario with unordered dependencies should have been
+                * handled above.
+                */
+               const fs_builder ibld = fs_builder(shader, block, inst)
+                                       .exec_all().group(1, 0);
+               fs_inst *sync = ibld.emit(BRW_OPCODE_SYNC, ibld.null_reg_ud(),
+                                         brw_imm_ud(TGL_SYNC_NOP));
+               sync->sched = ordered_dependency_swsb(deps[ip], jps[ip], true);
+               break;
+            }
+         }
+
           /* Update the IR. */
           inst->sched = swsb;
           inst->no_dd_check = inst->no_dd_clear = false;
@@ -929,10 +1075,13 @@ bool
  fs_visitor::lower_scoreboard()
  {
     if (devinfo->gen >= 12) {
-      const std::vector<ordered_address> jps = ordered_inst_addresses(this);
-      emit_inst_dependencies(this, jps,
-         allocate_inst_dependencies(this,
-            gather_inst_dependencies(this, jps)));
+      const ordered_address *jps = ordered_inst_addresses(this);
+      const dependency_list *deps0 = gather_inst_dependencies(this, jps);
+      const dependency_list *deps1 = allocate_inst_dependencies(this, deps0);
+      emit_inst_dependencies(this, jps, deps1);
+      delete[] deps1;
+      delete[] deps0;
+      delete[] jps;
     }
  
     return true;