aco/wave32: Fix reductions.

[mesa.git] / src / amd / compiler / aco_ir.h
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h

index a6fe846c74d031ed663448a2eb1525bd70b99e40..1f4721f5ffdf31ca236644bdca39b68595054c8d 100644 (file)
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -37,6 +37,7 @@
  #include "aco_util.h"
  
  struct radv_nir_compiler_options;
+struct radv_shader_args;
  struct radv_shader_info;
  
  namespace aco {
@@ -110,6 +111,53 @@ enum barrier_interaction {
     barrier_count = 4,
  };
  
+enum fp_round {
+   fp_round_ne = 0,
+   fp_round_pi = 1,
+   fp_round_ni = 2,
+   fp_round_tz = 3,
+};
+
+enum fp_denorm {
+   /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
+    * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
+   fp_denorm_flush = 0x0,
+   fp_denorm_keep = 0x3,
+};
+
+struct float_mode {
+   /* matches encoding of the MODE register */
+   union {
+      struct {
+          fp_round round32:2;
+          fp_round round16_64:2;
+          unsigned denorm32:2;
+          unsigned denorm16_64:2;
+      };
+      uint8_t val = 0;
+   };
+   /* if false, optimizations which may remove infs/nan/-0.0 can be done */
+   bool preserve_signed_zero_inf_nan32:1;
+   bool preserve_signed_zero_inf_nan16_64:1;
+   /* if false, optimizations which may remove denormal flushing can be done */
+   bool must_flush_denorms32:1;
+   bool must_flush_denorms16_64:1;
+   bool care_about_round32:1;
+   bool care_about_round16_64:1;
+
+   /* Returns true if instructions using the mode "other" can safely use the
+    * current one instead. */
+   bool canReplace(float_mode other) const noexcept {
+      return val == other.val &&
+             (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
+             (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
+             (must_flush_denorms32  || !other.must_flush_denorms32) &&
+             (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
+             (care_about_round32 || !other.care_about_round32) &&
+             (care_about_round16_64 || !other.care_about_round16_64);
+   }
+};
+
  constexpr Format asVOP3(Format format) {
     return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
  };
@@ -796,12 +844,15 @@ struct MIMG_instruction : public Instruction {
   *
   */
  struct FLAT_instruction : public Instruction {
-   uint16_t offset; /* Vega only */
+   uint16_t offset; /* Vega/Navi only */
     bool slc; /* system level coherent */
     bool glc; /* globally coherent */
     bool dlc; /* NAVI: device level coherent */
     bool lds;
     bool nv;
+   bool disable_wqm; /* Require an exec mask without helper invocations */
+   bool can_reorder;
+   barrier_interaction barrier;
  };
  
  struct Export_instruction : public Instruction {
@@ -923,7 +974,8 @@ constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
        return static_cast<MIMG_instruction*>(instr)->barrier;
     case Format::FLAT:
     case Format::GLOBAL:
-      return barrier_buffer;
+   case Format::SCRATCH:
+      return static_cast<FLAT_instruction*>(instr)->barrier;
     case Format::DS:
        return barrier_shared;
     default:
@@ -1019,6 +1071,7 @@ struct RegisterDemand {
  
  /* CFG */
  struct Block {
+   float_mode fp_mode;
     unsigned index;
     unsigned offset = 0;
     std::vector<aco_ptr<Instruction>> instructions;
@@ -1086,6 +1139,7 @@ static constexpr Stage geometry_gs = sw_gs | hw_gs;
  
  class Program final {
  public:
+   float_mode next_fp_mode;
     std::vector<Block> blocks;
     RegisterDemand max_reg_demand = RegisterDemand();
     uint16_t num_waves = 0;
@@ -1095,6 +1149,7 @@ public:
     enum chip_class chip_class;
     enum radeon_family family;
     unsigned wave_size;
+   RegClass lane_mask;
     Stage stage; /* Stage */
     bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
     bool needs_wqm = false; /* there exists a p_wqm instruction */
@@ -1133,11 +1188,13 @@ public:
  
     Block* create_and_insert_block() {
        blocks.emplace_back(blocks.size());
+      blocks.back().fp_mode = next_fp_mode;
        return &blocks.back();
     }
  
     Block* insert_block(Block&& block) {
        block.index = blocks.size();
+      block.fp_mode = next_fp_mode;
        blocks.emplace_back(std::move(block));
        return &blocks.back();
     }
@@ -1157,8 +1214,7 @@ void select_program(Program *program,
                      unsigned shader_count,
                      struct nir_shader *const *shaders,
                      ac_shader_config* config,
-                    struct radv_shader_info *info,
-                    struct radv_nir_compiler_options *options);
+                    struct radv_shader_args *args);
  
  void lower_wqm(Program* program, live& live_vars,
                 const struct radv_nir_compiler_options *options);