They indicate the operation does not cause overflow or underflow.
This is motivated by SPIR-V decorations NoSignedWrap and
NoUnsignedWrap.
Change the storage of `exact` to be a single bit, so they pack
together.
v2: Handle no_wrap in nir_instr_set. (Karol)
v3: Use two separate flags, since the NIR SSA values and certain
instructions are typeless, so just no_wrap would be insufficient
to know which one was referred to. (Connor)
v4: Don't use nir_instr_set to propagate the flags, unlike `exact`,
consider the instructions different if the flags have different
values. Fix hashing/comparing. (Jason)
Reviewed-by: Karol Herbst <kherbst@redhat.com> [v1]
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
* it must ensure that the resulting value is bit-for-bit identical to the
* original.
*/
- bool exact;
+ bool exact:1;
+
+ /**
+ * Indicates that this instruction do not cause wrapping to occur, in the
+ * form of overflow or underflow.
+ */
+ bool no_signed_wrap:1;
+ bool no_unsigned_wrap:1;
nir_alu_dest dest;
nir_alu_src src[];
{
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
nalu->exact = alu->exact;
+ nalu->no_signed_wrap = alu->no_signed_wrap;
+ nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
nalu->dest.saturate = alu->dest.saturate;
hash_alu(uint32_t hash, const nir_alu_instr *instr)
{
hash = HASH(hash, instr->op);
+
+ /* We explicitly don't hash instr->exact. */
+ uint8_t flags = instr->no_signed_wrap |
+ instr->no_unsigned_wrap << 1;
+ hash = HASH(hash, flags);
+
hash = HASH(hash, instr->dest.dest.ssa.num_components);
hash = HASH(hash, instr->dest.dest.ssa.bit_size);
- /* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
assert(nir_op_infos[instr->op].num_inputs >= 2);
if (alu1->op != alu2->op)
return false;
+ /* We explicitly don't compare instr->exact. */
+
+ if (alu1->no_signed_wrap != alu2->no_signed_wrap)
+ return false;
+
+ if (alu1->no_unsigned_wrap != alu2->no_unsigned_wrap)
+ return false;
+
/* TODO: We can probably acutally do something more inteligent such
* as allowing different numbers and taking a maximum or something
* here */
if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
return false;
- /* We explicitly don't hash instr->dest.dest.exact */
-
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
if ((!nir_alu_srcs_equal(alu1, alu2, 0, 0) ||
!nir_alu_srcs_equal(alu1, alu2, 1, 1)) &&
fprintf(fp, "!");
if (instr->dest.saturate)
fprintf(fp, ".sat");
+ if (instr->no_signed_wrap)
+ fprintf(fp, ".nsw");
+ if (instr->no_unsigned_wrap)
+ fprintf(fp, ".nuw");
fprintf(fp, " ");
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
{
blob_write_uint32(ctx->blob, alu->op);
uint32_t flags = alu->exact;
- flags |= alu->dest.saturate << 1;
- flags |= alu->dest.write_mask << 2;
+ flags |= alu->no_signed_wrap << 1;
+ flags |= alu->no_unsigned_wrap << 2;
+ flags |= alu->dest.saturate << 3;
+ flags |= alu->dest.write_mask << 4;
blob_write_uint32(ctx->blob, flags);
write_dest(ctx, &alu->dest.dest);
uint32_t flags = blob_read_uint32(ctx->blob);
alu->exact = flags & 1;
- alu->dest.saturate = flags & 2;
- alu->dest.write_mask = flags >> 2;
+ alu->no_signed_wrap = flags & 2;
+ alu->no_unsigned_wrap = flags & 4;
+ alu->dest.saturate = flags & 8;
+ alu->dest.write_mask = flags >> 4;
read_dest(ctx, &alu->dest.dest, &alu->instr);