#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits)))
+/**
+ * \brief Checks if the provided value is a denorm and flushes it to zero.
+ */
+static void
+constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
+{
+ switch(bit_size) {
+ case 64:
+ if (0 == (value->u64 & 0x7ff0000000000000))
+ value->u64 &= 0x8000000000000000;
+ break;
+ case 32:
+ if (0 == (value->u32 & 0x7f800000))
+ value->u32 &= 0x80000000;
+ break;
+ case 16:
+ if (0 == (value->u16 & 0x7c00))
+ value->u16 &= 0x8000;
+ }
+}
+
/**
* Evaluate one component of packSnorm4x8.
*/
% endfor
% endfor
-<%def name="evaluate_op(op, bit_size)">
+<%def name="evaluate_op(op, bit_size, execution_mode)">
<%
output_type = type_add_size(op.output_type, bit_size)
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
% else:
_dst_val[_i].${get_const_field(output_type)} = dst;
% endif
+
+ % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
+ % if type_has_size(output_type):
+ if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
+ constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
+ }
+ % else:
+ if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
+ constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
+ }
+ %endif
+ % endif
}
% else:
## In the non-per-component case, create a struct dst with
% else:
_dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzw"[k]};
% endif
+
+ % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
+ % if type_has_size(output_type):
+ if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
+ constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
+ }
+ % else:
+ if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
+ constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
+ }
+ % endif
+ % endif
% endfor
% endif
</%def>
evaluate_${name}(nir_const_value *_dst_val,
UNUSED unsigned num_components,
${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
- UNUSED nir_const_value **_src)
+ UNUSED nir_const_value **_src,
+ UNUSED unsigned execution_mode)
{
% if op_bit_sizes(op) is not None:
switch (bit_size) {
% for bit_size in op_bit_sizes(op):
case ${bit_size}: {
- ${evaluate_op(op, bit_size)}
+ ${evaluate_op(op, bit_size, execution_mode)}
break;
}
% endfor
unreachable("unknown bit width");
}
% else:
- ${evaluate_op(op, 0)}
+ ${evaluate_op(op, 0, execution_mode)}
% endif
}
% endfor
void
nir_eval_const_opcode(nir_op op, nir_const_value *dest,
unsigned num_components, unsigned bit_width,
- nir_const_value **src)
+ nir_const_value **src,
+ unsigned float_controls_execution_mode)
{
switch (op) {
% for name in sorted(opcodes.keys()):
case nir_op_${name}:
- evaluate_${name}(dest, num_components, bit_width, src);
+ evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
return;
% endfor
default:
from mako.template import Template
print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+ type_base_type=type_base_type,
+ type_size=type_size,
type_has_size=type_has_size,
type_add_size=type_add_size,
op_bit_sizes=op_bit_sizes,
}
static nir_const_value
-eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0)
+eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0,
+ unsigned execution_mode)
{
assert(nir_op_infos[op].num_inputs == 1);
nir_const_value dest;
nir_const_value *src[1] = { &src0 };
- nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+ nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
return dest;
}
static nir_const_value
eval_const_binop(nir_op op, unsigned bit_size,
- nir_const_value src0, nir_const_value src1)
+ nir_const_value src0, nir_const_value src1,
+ unsigned execution_mode)
{
assert(nir_op_infos[op].num_inputs == 2);
nir_const_value dest;
nir_const_value *src[2] = { &src0, &src1 };
- nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+ nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
return dest;
}
static int32_t
get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
- nir_const_value limit, unsigned bit_size)
+ nir_const_value limit, unsigned bit_size,
+ unsigned execution_mode)
{
nir_const_value span, iter;
case nir_op_ilt:
case nir_op_ieq:
case nir_op_ine:
- span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
- iter = eval_const_binop(nir_op_idiv, bit_size, span, step);
+ span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
+ execution_mode);
+ iter = eval_const_binop(nir_op_idiv, bit_size, span, step,
+ execution_mode);
break;
case nir_op_uge:
case nir_op_ult:
- span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
- iter = eval_const_binop(nir_op_udiv, bit_size, span, step);
+ span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
+ execution_mode);
+ iter = eval_const_binop(nir_op_udiv, bit_size, span, step,
+ execution_mode);
break;
case nir_op_fge:
case nir_op_flt:
case nir_op_feq:
case nir_op_fne:
- span = eval_const_binop(nir_op_fsub, bit_size, limit, initial);
- iter = eval_const_binop(nir_op_fdiv, bit_size, span, step);
- iter = eval_const_unop(nir_op_f2i64, bit_size, iter);
+ span = eval_const_binop(nir_op_fsub, bit_size, limit, initial,
+ execution_mode);
+ iter = eval_const_binop(nir_op_fdiv, bit_size, span,
+ step, execution_mode);
+ iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode);
break;
default:
nir_op cond_op, unsigned bit_size,
nir_const_value initial,
nir_const_value limit,
- bool limit_rhs, bool invert_cond)
+ bool limit_rhs, bool invert_cond,
+ unsigned execution_mode)
{
if (trip_offset == 1) {
nir_op add_op;
unreachable("Unhandled induction variable base type!");
}
- initial = eval_const_binop(add_op, bit_size, initial, step);
+ initial = eval_const_binop(add_op, bit_size, initial, step,
+ execution_mode);
}
nir_const_value *src[2];
/* Evaluate the loop exit condition */
nir_const_value result;
- nir_eval_const_opcode(cond_op, &result, 1, bit_size, src);
+ nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode);
return invert_cond ? !result.b : result.b;
}
test_iterations(int32_t iter_int, nir_const_value step,
nir_const_value limit, nir_op cond_op, unsigned bit_size,
nir_alu_type induction_base_type,
- nir_const_value initial, bool limit_rhs, bool invert_cond)
+ nir_const_value initial, bool limit_rhs, bool invert_cond,
+ unsigned execution_mode)
{
assert(nir_op_infos[cond_op].num_inputs == 2);
* step the induction variable each iteration.
*/
nir_const_value mul_result =
- eval_const_binop(mul_op, bit_size, iter_src, step);
+ eval_const_binop(mul_op, bit_size, iter_src, step, execution_mode);
/* Add the initial value to the accumulated induction variable total */
nir_const_value add_result =
- eval_const_binop(add_op, bit_size, mul_result, initial);
+ eval_const_binop(add_op, bit_size, mul_result, initial, execution_mode);
nir_const_value *src[2];
src[limit_rhs ? 0 : 1] = &add_result;
/* Evaluate the loop exit condition */
nir_const_value result;
- nir_eval_const_opcode(cond_op, &result, 1, bit_size, src);
+ nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode);
return invert_cond ? !result.b : result.b;
}
calculate_iterations(nir_const_value initial, nir_const_value step,
nir_const_value limit, nir_alu_instr *alu,
nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs,
- bool invert_cond)
+ bool invert_cond, unsigned execution_mode)
{
/* nir_op_isub should have been lowered away by this point */
assert(alu->op != nir_op_isub);
*/
if (will_break_on_first_iteration(step, induction_base_type, trip_offset,
alu_op, bit_size, initial,
- limit, limit_rhs, invert_cond)) {
+ limit, limit_rhs, invert_cond,
+ execution_mode)) {
return 0;
}
- int iter_int = get_iteration(alu_op, initial, step, limit, bit_size);
+ int iter_int = get_iteration(alu_op, initial, step, limit, bit_size,
+ execution_mode);
/* If iter_int is negative the loop is ill-formed or is the conditional is
* unsigned with a huge iteration count so don't bother going any further.
if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
induction_base_type, initial,
- limit_rhs, invert_cond)) {
+ limit_rhs, invert_cond, execution_mode)) {
return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
}
}
* loop.
*/
static void
-find_trip_count(loop_info_state *state)
+find_trip_count(loop_info_state *state, unsigned execution_mode)
{
bool trip_count_known = true;
bool guessed_trip_count = false;
int iterations = calculate_iterations(initial_val, step_val, limit_val,
ind_var->alu, cond,
alu_op, limit_rhs,
- terminator->continue_from_then);
+ terminator->continue_from_then,
+ execution_mode);
/* Where we not able to calculate the iteration count */
if (iterations == -1) {
return;
/* Run through each of the terminators and try to compute a trip-count */
- find_trip_count(state);
+ find_trip_count(state, impl->function->shader->info.float_controls_execution_mode);
nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
if (force_unroll_heuristics(state, block)) {
*/
static bool
-constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)
{
nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; ++i)
srcs[i] = src[i];
nir_eval_const_opcode(instr->op, dest, instr->dest.dest.ssa.num_components,
- bit_size, srcs);
+ bit_size, srcs, execution_mode);
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
}
static bool
-constant_fold_block(nir_block *block, void *mem_ctx)
+constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
switch (instr->type) {
case nir_instr_type_alu:
- progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
+ progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);
break;
case nir_instr_type_intrinsic:
progress |=
}
static bool
-nir_opt_constant_folding_impl(nir_function_impl *impl)
+nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)
{
void *mem_ctx = ralloc_parent(impl);
bool progress = false;
nir_foreach_block(block, impl) {
- progress |= constant_fold_block(block, mem_ctx);
+ progress |= constant_fold_block(block, mem_ctx, execution_mode);
}
if (progress) {
nir_opt_constant_folding(nir_shader *shader)
{
bool progress = false;
+ unsigned execution_mode = shader->info.float_controls_execution_mode;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_constant_folding_impl(function->impl);
+ progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);
}
return progress;