#include "aco_util.h"
struct radv_nir_compiler_options;
+struct radv_shader_args;
struct radv_shader_info;
namespace aco {
barrier_count = 4,
};
+enum fp_round {
+ fp_round_ne = 0,
+ fp_round_pi = 1,
+ fp_round_ni = 2,
+ fp_round_tz = 3,
+};
+
+enum fp_denorm {
+ /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
+ * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
+ fp_denorm_flush = 0x0,
+ fp_denorm_keep = 0x3,
+};
+
+struct float_mode {
+ /* matches encoding of the MODE register */
+ union {
+ struct {
+ fp_round round32:2;
+ fp_round round16_64:2;
+ unsigned denorm32:2;
+ unsigned denorm16_64:2;
+ };
+ uint8_t val = 0;
+ };
+ /* if false, optimizations which may remove infs/nan/-0.0 can be done */
+ bool preserve_signed_zero_inf_nan32:1;
+ bool preserve_signed_zero_inf_nan16_64:1;
+ /* if false, optimizations which may remove denormal flushing can be done */
+ bool must_flush_denorms32:1;
+ bool must_flush_denorms16_64:1;
+ bool care_about_round32:1;
+ bool care_about_round16_64:1;
+
+ /* Returns true if instructions using the mode "other" can safely use the
+ * current one instead. */
+ bool canReplace(float_mode other) const noexcept {
+ return val == other.val &&
+ (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
+ (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
+ (must_flush_denorms32 || !other.must_flush_denorms32) &&
+ (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
+ (care_about_round32 || !other.care_about_round32) &&
+ (care_about_round16_64 || !other.care_about_round16_64);
+ }
+};
+
constexpr Format asVOP3(Format format) {
return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
};
*
*/
struct FLAT_instruction : public Instruction {
- uint16_t offset; /* Vega only */
+ uint16_t offset; /* Vega/Navi only */
bool slc; /* system level coherent */
bool glc; /* globally coherent */
bool dlc; /* NAVI: device level coherent */
bool lds;
bool nv;
+ bool disable_wqm; /* Require an exec mask without helper invocations */
+ bool can_reorder;
+ barrier_interaction barrier;
};
struct Export_instruction : public Instruction {
return static_cast<MIMG_instruction*>(instr)->barrier;
case Format::FLAT:
case Format::GLOBAL:
- return barrier_buffer;
+ case Format::SCRATCH:
+ return static_cast<FLAT_instruction*>(instr)->barrier;
case Format::DS:
return barrier_shared;
default:
/* CFG */
struct Block {
+ float_mode fp_mode;
unsigned index;
unsigned offset = 0;
std::vector<aco_ptr<Instruction>> instructions;
class Program final {
public:
+ float_mode next_fp_mode;
std::vector<Block> blocks;
RegisterDemand max_reg_demand = RegisterDemand();
uint16_t num_waves = 0;
enum chip_class chip_class;
enum radeon_family family;
unsigned wave_size;
+ RegClass lane_mask;
Stage stage; /* Stage */
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
bool needs_wqm = false; /* there exists a p_wqm instruction */
Block* create_and_insert_block() {
blocks.emplace_back(blocks.size());
+ blocks.back().fp_mode = next_fp_mode;
return &blocks.back();
}
Block* insert_block(Block&& block) {
block.index = blocks.size();
+ block.fp_mode = next_fp_mode;
blocks.emplace_back(std::move(block));
return &blocks.back();
}
unsigned shader_count,
struct nir_shader *const *shaders,
ac_shader_config* config,
- struct radv_shader_info *info,
- struct radv_nir_compiler_options *options);
+ struct radv_shader_args *args);
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);