panfrost: Hoist blend finalize calls
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_temprename.cpp
index b0ec854be29d19da09e377de96adccb9d0a19bac..210c25e8ba8435966b515e229b868262a0794c3c 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include "st_glsl_to_tgsi_temprename.h"
+#include "st_glsl_to_tgsi_array_merge.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_strings.h"
 #include "program/prog_instruction.h"
@@ -239,6 +240,27 @@ private:
    bool needs_component_tracking;
 };
 
+/* Class to track array access.
+ * Compared to the temporary tracking this is very simplified, mainly because
+ * with the likely indirect access one can not really establish access
+ * patterns for individual elements. Instead the life range evaluation is
+ * always for the whole array, handles only loops and the fact whether a
+ * value was accessed conditionally in a loop.
+ */
+class array_access {
+public:
+   array_access();
+   void record_access(int line, prog_scope *scope, int swizzle);
+   void get_required_live_range(array_live_range &lr);
+private:
+   int first_access;
+   int last_access;
+   prog_scope *first_access_scope;
+   prog_scope *last_access_scope;
+   unsigned accumulated_swizzle:4;
+   int conditional_access_in_loop:1;
+};
+
 prog_scope_storage::prog_scope_storage(void *mc, int n):
    mem_ctx(mc),
    current_slot(0)
@@ -494,13 +516,8 @@ void temp_access::record_write(int line, prog_scope *scope, int writemask)
       comp[3].record_write(line, scope);
 }
 
-void temp_access::record_read(int line, prog_scope *scope, int swizzle)
+void temp_access::record_read(int line, prog_scope *scope, int readmask)
 {
-   int readmask = 0;
-   for (int idx = 0; idx < 4; ++idx) {
-      int swz = GET_SWZ(swizzle, idx);
-      readmask |= (1 << swz) & 0xF;
-   }
    update_access_mask(readmask);
 
    if (readmask & WRITEMASK_X)
@@ -513,6 +530,86 @@ void temp_access::record_read(int line, prog_scope *scope, int swizzle)
       comp[3].record_read(line, scope);
 }
 
+array_access::array_access():
+   first_access(-1),
+   last_access(-1),
+   first_access_scope(nullptr),
+   last_access_scope(nullptr),
+   accumulated_swizzle(0),
+   conditional_access_in_loop(false)
+{
+}
+
+void array_access::record_access(int line, prog_scope *scope, int swizzle)
+{
+   if (!first_access_scope) {
+      first_access = line;
+      first_access_scope = scope;
+   }
+   last_access_scope = scope;
+   last_access = line;
+   accumulated_swizzle |= swizzle;
+   if (scope->in_ifelse_scope() && scope->innermost_loop())
+      conditional_access_in_loop = true;
+}
+
+void array_access::get_required_live_range(array_live_range& lr)
+{
+   RENAME_DEBUG(debug_log << "first_access_scope=" << first_access_scope << "\n");
+   RENAME_DEBUG(debug_log << "last_access_scope=" << last_access_scope << "\n");
+
+   if (first_access_scope == last_access_scope) {
+      lr.set_live_range(first_access, last_access);
+      lr.set_access_mask(accumulated_swizzle);
+      return;
+   }
+
+   const prog_scope *shared_scope = first_access_scope;
+   const prog_scope *other_scope = last_access_scope;
+
+   assert(shared_scope);
+   RENAME_DEBUG(debug_log << "shared_scope=" << shared_scope << "\n");
+
+   if (conditional_access_in_loop) {
+      const prog_scope *help = shared_scope->outermost_loop();
+      if (help) {
+        shared_scope = help;
+      } else {
+        help = other_scope->outermost_loop();
+        if (help)
+           other_scope = help;
+      }
+      if (first_access > shared_scope->begin())
+        first_access = shared_scope->begin();
+      if (last_access < shared_scope->end())
+        last_access = shared_scope->end();
+   }
+
+   /* See if any of the two is the parent of the other. */
+   if (other_scope->contains_range_of(*shared_scope)) {
+      shared_scope = other_scope;
+   } else while (!shared_scope->contains_range_of(*other_scope)) {
+      assert(shared_scope->parent());
+      if (shared_scope->type() == loop_body) {
+        if (last_access < shared_scope->end())
+            last_access = shared_scope->end();
+      }
+      shared_scope = shared_scope->parent();
+   }
+
+   while (shared_scope != other_scope) {
+      if (other_scope->type() == loop_body) {
+        if (last_access < other_scope->end())
+            last_access = other_scope->end();
+      }
+      other_scope = other_scope->parent();
+   }
+
+   lr.set_live_range(first_access, last_access);
+   lr.set_access_mask(accumulated_swizzle);
+}
+
+
 inline static register_live_range make_live_range(int b, int e)
 {
    register_live_range lt;
@@ -898,50 +995,67 @@ register_live_range temp_comp_access::get_required_live_range()
 
 /* Helper class for sorting and searching the registers based
  * on live ranges. */
-class access_record {
+class register_merge_record {
 public:
    int begin;
    int end;
    int reg;
    bool erase;
 
-   bool operator < (const access_record& rhs) const {
+   bool operator < (const register_merge_record& rhs) const {
       return begin < rhs.begin;
    }
 };
 
 class access_recorder {
 public:
-   access_recorder(int _ntemps);
+   access_recorder(int _ntemps, int _narrays);
    ~access_recorder();
 
    void record_read(const st_src_reg& src, int line, prog_scope *scope);
-   void record_write(const st_dst_reg& src, int line, prog_scope *scope);
+   void record_write(const st_dst_reg& src, int line, prog_scope *scope,
+                    bool no_reswizzle);
 
-   void get_required_live_ranges(register_live_range *live_ranges);
+   void get_required_live_ranges(register_live_range *register_live_ranges,
+                                array_live_range *array_live_ranges);
 private:
 
    int ntemps;
-   temp_access *acc;
-
+   int narrays;
+   temp_access *temp_acc;
+   array_access *array_acc;
 };
 
-access_recorder::access_recorder(int _ntemps):
-   ntemps(_ntemps)
+access_recorder::access_recorder(int _ntemps, int _narrays):
+   ntemps(_ntemps),
+   narrays(_narrays)
 {
-   acc = new temp_access[ntemps];
+   temp_acc = new temp_access[ntemps];
+   array_acc = new array_access[narrays];
 }
 
 access_recorder::~access_recorder()
 {
-   delete[] acc;
+   delete[] array_acc;
+   delete[] temp_acc;
 }
 
 void access_recorder::record_read(const st_src_reg& src, int line,
                                   prog_scope *scope)
 {
+   int readmask = 0;
+   for (int idx = 0; idx < 4; ++idx) {
+      int swz = GET_SWZ(src.swizzle, idx);
+      readmask |= (1 << swz) & 0xF;
+   }
+
    if (src.file == PROGRAM_TEMPORARY)
-      acc[src.index].record_read(line, scope, src.swizzle);
+      temp_acc[src.index].record_read(line, scope, readmask);
+
+   if (src.file == PROGRAM_ARRAY) {
+      assert(src.array_id <= narrays);
+      array_acc[src.array_id - 1].record_access(line, scope, readmask);
+   }
 
    if (src.reladdr)
       record_read(*src.reladdr, line, scope);
@@ -950,10 +1064,21 @@ void access_recorder::record_read(const st_src_reg& src, int line,
 }
 
 void access_recorder::record_write(const st_dst_reg& dst, int line,
-                                   prog_scope *scope)
+                                  prog_scope *scope, bool can_reswizzle)
 {
    if (dst.file == PROGRAM_TEMPORARY)
-      acc[dst.index].record_write(line, scope, dst.writemask);
+      temp_acc[dst.index].record_write(line, scope, dst.writemask);
+
+   if (dst.file == PROGRAM_ARRAY) {
+      assert(dst.array_id <= narrays);
+
+      /* If the array is written as dst of a multi-dst operation, we must not
+       * reswizzle the access, because we would have to reswizzle also the
+       * other dst. For now just fill the mask to make interleaving impossible.
+       */
+      array_acc[dst.array_id - 1].record_access(line, scope,
+                                               can_reswizzle ? dst.writemask: 0xF);
+   }
 
    if (dst.reladdr)
       record_read(*dst.reladdr, line, scope);
@@ -961,14 +1086,24 @@ void access_recorder::record_write(const st_dst_reg& dst, int line,
       record_read(*dst.reladdr2, line, scope);
 }
 
-void access_recorder::get_required_live_ranges(struct register_live_range *live_ranges)
+void access_recorder::get_required_live_ranges(struct register_live_range *register_live_ranges,
+                                              class array_live_range *array_live_ranges)
 {
-   RENAME_DEBUG(debug_log << "=========live_ranges ==============\n");
+   RENAME_DEBUG(debug_log << "== register live ranges ==========\n");
    for(int i = 0; i < ntemps; ++i) {
       RENAME_DEBUG(debug_log << setw(4) << i);
-      live_ranges[i] = acc[i].get_required_live_range();
-      RENAME_DEBUG(debug_log << ": [" << live_ranges[i].begin << ", "
-                  << live_ranges[i].end << "]\n");
+      register_live_ranges[i] = temp_acc[i].get_required_live_range();
+      RENAME_DEBUG(debug_log << ": [" << register_live_ranges[i].begin << ", "
+                  << register_live_ranges[i].end << "]\n");
+   }
+   RENAME_DEBUG(debug_log << "==================================\n\n");
+
+   RENAME_DEBUG(debug_log << "== array live ranges ==========\n");
+   for(int i = 0; i < narrays; ++i) {
+      RENAME_DEBUG(debug_log<< setw(4) << i);
+      array_acc[i].get_required_live_range(array_live_ranges[i]);
+      RENAME_DEBUG(debug_log << ": [" <<array_live_ranges[i].begin() << ", "
+                       << array_live_ranges[i].end() << "]\n");
    }
    RENAME_DEBUG(debug_log << "==================================\n\n");
 }
@@ -986,7 +1121,8 @@ static void dump_instruction(ostream& os, int line, prog_scope *scope,
  */
 bool
 get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
-                                       int ntemps, struct register_live_range *live_ranges)
+                 int ntemps, struct register_live_range *register_live_ranges,
+                 int narrays, class array_live_range *array_live_ranges)
 {
    int line = 0;
    int loop_id = 1;
@@ -1011,7 +1147,7 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
 
    prog_scope_storage scopes(mem_ctx, n_scopes);
 
-   access_recorder access(ntemps);
+   access_recorder access(ntemps, narrays);
 
    prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
 
@@ -1137,9 +1273,11 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
          for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
             access.record_read(inst->tex_offsets[j], line, cur_scope);
          }
-         for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
-            access.record_write(inst->dst[j], line, cur_scope);
+        unsigned ndst = num_inst_dst_regs(inst);
+        for (unsigned j = 0; j < ndst; j++) {
+           access.record_write(inst->dst[j], line, cur_scope, ndst == 1);
          }
+        access.record_read(inst->resource, line, cur_scope);
       }
       }
       ++line;
@@ -1153,7 +1291,7 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
    if (cur_scope->end() < 0)
       cur_scope->set_end(line - 1);
 
-   access.get_required_live_ranges(live_ranges);
+   access.get_required_live_ranges(register_live_ranges, array_live_ranges);
    return true;
 }
 
@@ -1163,14 +1301,14 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
  * end points at the element past the end of the search range, and
  * the array comprising [start, end) must be sorted in ascending order.
  */
-static access_record*
-find_next_rename(access_record* start, access_record* end, int bound)
+static register_merge_record*
+find_next_rename(register_merge_record* start, register_merge_record* end, int bound)
 {
    int delta = (end - start);
 
    while (delta > 0) {
       int half = delta >> 1;
-      access_record* middle = start + half;
+      register_merge_record* middle = start + half;
 
       if (bound <= middle->begin) {
          delta = half;
@@ -1185,9 +1323,9 @@ find_next_rename(access_record* start, access_record* end, int bound)
 }
 
 #ifndef USE_STL_SORT
-static int access_record_compare (const void *a, const void *b) {
-   const access_record *aa = static_cast<const access_record*>(a);
-   const access_record *bb = static_cast<const access_record*>(b);
+static int register_merge_record_compare (const void *a, const void *b) {
+   const register_merge_record *aa = static_cast<const register_merge_record*>(a);
+   const register_merge_record *bb = static_cast<const register_merge_record*>(b);
    return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
 }
 #endif
@@ -1196,9 +1334,9 @@ static int access_record_compare (const void *a, const void *b) {
  * search to find suitable merge candidates. */
 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
                                  const struct register_live_range *live_ranges,
-                                  struct rename_reg_pair *result)
+                                 struct rename_reg_pair *result)
 {
-   access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
+   register_merge_record *reg_access = ralloc_array(mem_ctx, register_merge_record, ntemps);
 
    int used_temps = 0;
    for (int i = 0; i < ntemps; ++i) {
@@ -1214,16 +1352,17 @@ void get_temp_registers_remapping(void *mem_ctx, int ntemps,
 #ifdef USE_STL_SORT
    std::sort(reg_access, reg_access + used_temps);
 #else
-   std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
+   std::qsort(reg_access, used_temps, sizeof(register_merge_record),
+             register_merge_record_compare);
 #endif
 
-   access_record *trgt = reg_access;
-   access_record *reg_access_end = reg_access + used_temps;
-   access_record *first_erase = reg_access_end;
-   access_record *search_start = trgt + 1;
+   register_merge_record *trgt = reg_access;
+   register_merge_record *reg_access_end = reg_access + used_temps;
+   register_merge_record *first_erase = reg_access_end;
+   register_merge_record *search_start = trgt + 1;
 
    while (trgt != reg_access_end) {
-      access_record *src = find_next_rename(search_start, reg_access_end,
+      register_merge_record *src = find_next_rename(search_start, reg_access_end,
                                             trgt->end);
       if (src != reg_access_end) {
          result[src->reg].new_reg = trgt->reg;
@@ -1242,8 +1381,8 @@ void get_temp_registers_remapping(void *mem_ctx, int ntemps,
          /* Moving to the next target register it is time to remove
           * the already merged registers from the search range */
          if (first_erase != reg_access_end) {
-            access_record *outp = first_erase;
-            access_record *inp = first_erase + 1;
+           register_merge_record *outp = first_erase;
+           register_merge_record *inp = first_erase + 1;
 
             while (inp != reg_access_end) {
                if (!inp->erase)