aco: keep track of which events are used in a barrier
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 11 Feb 2020 16:52:20 +0000 (16:52 +0000)
committerMarge Bot <eric+marge@anholt.net>
Tue, 3 Mar 2020 15:38:13 +0000 (15:38 +0000)
And properly handle unordered events so that they always wait for 0.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 93c8ebfa780 ('aco: Initial commit of independent AMD compiler')
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3774>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3774>

src/amd/compiler/aco_insert_waitcnt.cpp

index 8d8024f5aa236a06ff488aae130b955d123e03a7..e0184993c6b4d3857c89ccd7c242a48f691668c7 100644 (file)
@@ -247,6 +247,7 @@ struct wait_ctx {
    bool pending_s_buffer_store = false; /* GFX10 workaround */
 
    wait_imm barrier_imm[barrier_count];
+   uint16_t barrier_events[barrier_count]; /* use wait_event notion */
 
    std::map<PhysReg,wait_entry> gpr_map;
 
@@ -291,8 +292,11 @@ struct wait_ctx {
          }
       }
 
-      for (unsigned i = 0; i < barrier_count; i++)
+      for (unsigned i = 0; i < barrier_count; i++) {
          changed |= barrier_imm[i].combine(other->barrier_imm[i]);
+         changed |= other->barrier_events[i] & ~barrier_events[i];
+         barrier_events[i] |= other->barrier_events[i];
+      }
 
       return changed;
    }
@@ -452,14 +456,25 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
       /* update barrier wait imms */
       for (unsigned i = 0; i < barrier_count; i++) {
          wait_imm& bar = ctx.barrier_imm[i];
-         if (bar.exp != wait_imm::unset_counter && imm.exp <= bar.exp)
+         uint16_t& bar_ev = ctx.barrier_events[i];
+         if (bar.exp != wait_imm::unset_counter && imm.exp <= bar.exp) {
             bar.exp = wait_imm::unset_counter;
-         if (bar.vm != wait_imm::unset_counter && imm.vm <= bar.vm)
+            bar_ev &= ~exp_events;
+         }
+         if (bar.vm != wait_imm::unset_counter && imm.vm <= bar.vm) {
             bar.vm = wait_imm::unset_counter;
-         if (bar.lgkm != wait_imm::unset_counter && imm.lgkm <= bar.lgkm)
+            bar_ev &= ~(vm_events & ~event_flat);
+         }
+         if (bar.lgkm != wait_imm::unset_counter && imm.lgkm <= bar.lgkm) {
             bar.lgkm = wait_imm::unset_counter;
-         if (bar.vs != wait_imm::unset_counter && imm.vs <= bar.vs)
+            bar_ev &= ~(lgkm_events & ~event_flat);
+         }
+         if (bar.vs != wait_imm::unset_counter && imm.vs <= bar.vs) {
             bar.vs = wait_imm::unset_counter;
+            bar_ev &= ~vs_events;
+         }
+         if (bar.vm == wait_imm::unset_counter && bar.lgkm == wait_imm::unset_counter)
+            bar_ev &= ~event_flat;
       }
 
       /* remove all gprs with higher counter from map */
@@ -491,12 +506,19 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
    return imm;
 }
 
-void update_barrier_imm(wait_ctx& ctx, uint8_t counters, barrier_interaction barrier)
+void update_barrier_counter(uint8_t *ctr, unsigned max)
+{
+   if (*ctr != wait_imm::unset_counter && *ctr < max)
+      (*ctr)++;
+}
+
+void update_barrier_imm(wait_ctx& ctx, uint8_t counters, wait_event event, barrier_interaction barrier)
 {
-   unsigned barrier_index = ffs(barrier) - 1;
    for (unsigned i = 0; i < barrier_count; i++) {
       wait_imm& bar = ctx.barrier_imm[i];
-      if (i == barrier_index) {
+      uint16_t& bar_ev = ctx.barrier_events[i];
+      if (barrier & (1 << i)) {
+         bar_ev |= event;
          if (counters & counter_lgkm)
             bar.lgkm = 0;
          if (counters & counter_vm)
@@ -505,15 +527,15 @@ void update_barrier_imm(wait_ctx& ctx, uint8_t counters, barrier_interaction bar
             bar.exp = 0;
          if (counters & counter_vs)
             bar.vs = 0;
-      } else {
-         if (counters & counter_lgkm && bar.lgkm != wait_imm::unset_counter && bar.lgkm < ctx.max_lgkm_cnt)
-            bar.lgkm++;
-         if (counters & counter_vm && bar.vm != wait_imm::unset_counter && bar.vm < ctx.max_vm_cnt)
-            bar.vm++;
-         if (counters & counter_exp && bar.exp != wait_imm::unset_counter && bar.exp < ctx.max_exp_cnt)
-            bar.exp++;
-         if (counters & counter_vs && bar.vs != wait_imm::unset_counter && bar.vs < ctx.max_vs_cnt)
-            bar.vs++;
+      } else if (!(bar_ev & ctx.unordered_events) && !(ctx.unordered_events & event)) {
+         if (counters & counter_lgkm && (bar_ev & lgkm_events) == event)
+            update_barrier_counter(&bar.lgkm, ctx.max_lgkm_cnt);
+         if (counters & counter_vm && (bar_ev & vm_events) == event)
+            update_barrier_counter(&bar.vm, ctx.max_vm_cnt);
+         if (counters & counter_exp && (bar_ev & exp_events) == event)
+            update_barrier_counter(&bar.exp, ctx.max_exp_cnt);
+         if (counters & counter_vs && (bar_ev & vs_events) == event)
+            update_barrier_counter(&bar.vs, ctx.max_vs_cnt);
       }
    }
 }
@@ -531,7 +553,7 @@ void update_counters(wait_ctx& ctx, wait_event event, barrier_interaction barrie
    if (counters & counter_vs && ctx.vs_cnt <= ctx.max_vs_cnt)
       ctx.vs_cnt++;
 
-   update_barrier_imm(ctx, counters, barrier);
+   update_barrier_imm(ctx, counters, event, barrier);
 
    if (ctx.unordered_events & event)
       return;
@@ -569,7 +591,7 @@ void update_counters_for_flat_load(wait_ctx& ctx, barrier_interaction barrier=ba
    if (ctx.vm_cnt <= ctx.max_vm_cnt)
       ctx.vm_cnt++;
 
-   update_barrier_imm(ctx, counter_vm | counter_lgkm, barrier);
+   update_barrier_imm(ctx, counter_vm | counter_lgkm, event_flat, barrier);
 
    for (std::pair<PhysReg,wait_entry> e : ctx.gpr_map)
    {