intel/compiler: detect if atomic load store operations are used
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_temprename.cpp
index bc0b2f20b620c5bd6f3eff8d2fdbbabe37e4ea96..210c25e8ba8435966b515e229b868262a0794c3c 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include "st_glsl_to_tgsi_temprename.h"
+#include "st_glsl_to_tgsi_array_merge.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_strings.h"
 #include "program/prog_instruction.h"
@@ -239,6 +240,27 @@ private:
    bool needs_component_tracking;
 };
 
+/* Class to track array access.
+ * Compared to the temporary tracking this is very simplified, mainly because
+ * with the likely indirect access one can not really establish access
+ * patterns for individual elements. Instead the life range evaluation is
+ * always for the whole array, handles only loops and the fact whether a
+ * value was accessed conditionally in a loop.
+ */
+class array_access {
+public:
+   array_access();
+   void record_access(int line, prog_scope *scope, int swizzle);
+   void get_required_live_range(array_live_range &lr);
+private:
+   int first_access;
+   int last_access;
+   prog_scope *first_access_scope;
+   prog_scope *last_access_scope;
+   unsigned accumulated_swizzle:4;
+   int conditional_access_in_loop:1;
+};
+
 prog_scope_storage::prog_scope_storage(void *mc, int n):
    mem_ctx(mc),
    current_slot(0)
@@ -494,13 +516,8 @@ void temp_access::record_write(int line, prog_scope *scope, int writemask)
       comp[3].record_write(line, scope);
 }
 
-void temp_access::record_read(int line, prog_scope *scope, int swizzle)
+void temp_access::record_read(int line, prog_scope *scope, int readmask)
 {
-   int readmask = 0;
-   for (int idx = 0; idx < 4; ++idx) {
-      int swz = GET_SWZ(swizzle, idx);
-      readmask |= (1 << swz) & 0xF;
-   }
    update_access_mask(readmask);
 
    if (readmask & WRITEMASK_X)
@@ -513,6 +530,86 @@ void temp_access::record_read(int line, prog_scope *scope, int swizzle)
       comp[3].record_read(line, scope);
 }
 
+array_access::array_access():
+   first_access(-1),
+   last_access(-1),
+   first_access_scope(nullptr),
+   last_access_scope(nullptr),
+   accumulated_swizzle(0),
+   conditional_access_in_loop(false)
+{
+}
+
+void array_access::record_access(int line, prog_scope *scope, int swizzle)
+{
+   if (!first_access_scope) {
+      first_access = line;
+      first_access_scope = scope;
+   }
+   last_access_scope = scope;
+   last_access = line;
+   accumulated_swizzle |= swizzle;
+   if (scope->in_ifelse_scope() && scope->innermost_loop())
+      conditional_access_in_loop = true;
+}
+
+void array_access::get_required_live_range(array_live_range& lr)
+{
+   RENAME_DEBUG(debug_log << "first_access_scope=" << first_access_scope << "\n");
+   RENAME_DEBUG(debug_log << "last_access_scope=" << last_access_scope << "\n");
+
+   if (first_access_scope == last_access_scope) {
+      lr.set_live_range(first_access, last_access);
+      lr.set_access_mask(accumulated_swizzle);
+      return;
+   }
+
+   const prog_scope *shared_scope = first_access_scope;
+   const prog_scope *other_scope = last_access_scope;
+
+   assert(shared_scope);
+   RENAME_DEBUG(debug_log << "shared_scope=" << shared_scope << "\n");
+
+   if (conditional_access_in_loop) {
+      const prog_scope *help = shared_scope->outermost_loop();
+      if (help) {
+        shared_scope = help;
+      } else {
+        help = other_scope->outermost_loop();
+        if (help)
+           other_scope = help;
+      }
+      if (first_access > shared_scope->begin())
+        first_access = shared_scope->begin();
+      if (last_access < shared_scope->end())
+        last_access = shared_scope->end();
+   }
+
+   /* See if any of the two is the parent of the other. */
+   if (other_scope->contains_range_of(*shared_scope)) {
+      shared_scope = other_scope;
+   } else while (!shared_scope->contains_range_of(*other_scope)) {
+      assert(shared_scope->parent());
+      if (shared_scope->type() == loop_body) {
+        if (last_access < shared_scope->end())
+            last_access = shared_scope->end();
+      }
+      shared_scope = shared_scope->parent();
+   }
+
+   while (shared_scope != other_scope) {
+      if (other_scope->type() == loop_body) {
+        if (last_access < other_scope->end())
+            last_access = other_scope->end();
+      }
+      other_scope = other_scope->parent();
+   }
+
+   lr.set_live_range(first_access, last_access);
+   lr.set_access_mask(accumulated_swizzle);
+}
+
+
 inline static register_live_range make_live_range(int b, int e)
 {
    register_live_range lt;
@@ -912,36 +1009,53 @@ public:
 
 class access_recorder {
 public:
-   access_recorder(int _ntemps);
+   access_recorder(int _ntemps, int _narrays);
    ~access_recorder();
 
    void record_read(const st_src_reg& src, int line, prog_scope *scope);
-   void record_write(const st_dst_reg& src, int line, prog_scope *scope);
+   void record_write(const st_dst_reg& src, int line, prog_scope *scope,
+                    bool no_reswizzle);
 
-   void get_required_live_ranges(register_live_range *register_live_ranges);
+   void get_required_live_ranges(register_live_range *register_live_ranges,
+                                array_live_range *array_live_ranges);
 private:
 
    int ntemps;
+   int narrays;
    temp_access *temp_acc;
-
+   array_access *array_acc;
 };
 
-access_recorder::access_recorder(int _ntemps):
-   ntemps(_ntemps)
+access_recorder::access_recorder(int _ntemps, int _narrays):
+   ntemps(_ntemps),
+   narrays(_narrays)
 {
    temp_acc = new temp_access[ntemps];
+   array_acc = new array_access[narrays];
 }
 
 access_recorder::~access_recorder()
 {
+   delete[] array_acc;
    delete[] temp_acc;
 }
 
 void access_recorder::record_read(const st_src_reg& src, int line,
                                   prog_scope *scope)
 {
+   int readmask = 0;
+   for (int idx = 0; idx < 4; ++idx) {
+      int swz = GET_SWZ(src.swizzle, idx);
+      readmask |= (1 << swz) & 0xF;
+   }
+
    if (src.file == PROGRAM_TEMPORARY)
-      temp_acc[src.index].record_read(line, scope, src.swizzle);
+      temp_acc[src.index].record_read(line, scope, readmask);
+
+   if (src.file == PROGRAM_ARRAY) {
+      assert(src.array_id <= narrays);
+      array_acc[src.array_id - 1].record_access(line, scope, readmask);
+   }
 
    if (src.reladdr)
       record_read(*src.reladdr, line, scope);
@@ -950,18 +1064,30 @@ void access_recorder::record_read(const st_src_reg& src, int line,
 }
 
 void access_recorder::record_write(const st_dst_reg& dst, int line,
-                                   prog_scope *scope)
+                                  prog_scope *scope, bool can_reswizzle)
 {
    if (dst.file == PROGRAM_TEMPORARY)
       temp_acc[dst.index].record_write(line, scope, dst.writemask);
 
+   if (dst.file == PROGRAM_ARRAY) {
+      assert(dst.array_id <= narrays);
+
+      /* If the array is written as dst of a multi-dst operation, we must not
+       * reswizzle the access, because we would have to reswizzle also the
+       * other dst. For now just fill the mask to make interleaving impossible.
+       */
+      array_acc[dst.array_id - 1].record_access(line, scope,
+                                               can_reswizzle ? dst.writemask: 0xF);
+   }
+
    if (dst.reladdr)
       record_read(*dst.reladdr, line, scope);
    if (dst.reladdr2)
       record_read(*dst.reladdr2, line, scope);
 }
 
-void access_recorder::get_required_live_ranges(struct register_live_range *register_live_ranges)
+void access_recorder::get_required_live_ranges(struct register_live_range *register_live_ranges,
+                                              class array_live_range *array_live_ranges)
 {
    RENAME_DEBUG(debug_log << "== register live ranges ==========\n");
    for(int i = 0; i < ntemps; ++i) {
@@ -971,6 +1097,15 @@ void access_recorder::get_required_live_ranges(struct register_live_range *regis
                   << register_live_ranges[i].end << "]\n");
    }
    RENAME_DEBUG(debug_log << "==================================\n\n");
+
+   RENAME_DEBUG(debug_log << "== array live ranges ==========\n");
+   for(int i = 0; i < narrays; ++i) {
+      RENAME_DEBUG(debug_log<< setw(4) << i);
+      array_acc[i].get_required_live_range(array_live_ranges[i]);
+      RENAME_DEBUG(debug_log << ": [" <<array_live_ranges[i].begin() << ", "
+                       << array_live_ranges[i].end() << "]\n");
+   }
+   RENAME_DEBUG(debug_log << "==================================\n\n");
 }
 
 }
@@ -986,7 +1121,8 @@ static void dump_instruction(ostream& os, int line, prog_scope *scope,
  */
 bool
 get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
-                 int ntemps, struct register_live_range *register_live_ranges)
+                 int ntemps, struct register_live_range *register_live_ranges,
+                 int narrays, class array_live_range *array_live_ranges)
 {
    int line = 0;
    int loop_id = 1;
@@ -1011,7 +1147,7 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
 
    prog_scope_storage scopes(mem_ctx, n_scopes);
 
-   access_recorder access(ntemps);
+   access_recorder access(ntemps, narrays);
 
    prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
 
@@ -1137,9 +1273,11 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
          for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
             access.record_read(inst->tex_offsets[j], line, cur_scope);
          }
-         for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
-            access.record_write(inst->dst[j], line, cur_scope);
+        unsigned ndst = num_inst_dst_regs(inst);
+        for (unsigned j = 0; j < ndst; j++) {
+           access.record_write(inst->dst[j], line, cur_scope, ndst == 1);
          }
+        access.record_read(inst->resource, line, cur_scope);
       }
       }
       ++line;
@@ -1153,7 +1291,7 @@ get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions,
    if (cur_scope->end() < 0)
       cur_scope->set_end(line - 1);
 
-   access.get_required_live_ranges(register_live_ranges);
+   access.get_required_live_ranges(register_live_ranges, array_live_ranges);
    return true;
 }