X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_instr_set.c;h=1a6a7ab7743901a240042b82c9910d2a1fe02418;hb=c12750527b7c4d1f4265ffa2346fd5c2346f41cd;hp=d3f939fe8058afcc1a3c2acc15ed67aa57a63d90;hpb=a39a8fbbaa129f4e52f2a3ad2747182e9a74d910;p=mesa.git diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index d3f939fe805..1a6a7ab7743 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -23,6 +23,7 @@ #include "nir_instr_set.h" #include "nir_vla.h" +#include "util/half_float.h" #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) @@ -52,9 +53,12 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr) { hash = HASH(hash, instr->op); hash = HASH(hash, instr->dest.dest.ssa.num_components); + hash = HASH(hash, instr->dest.dest.ssa.bit_size); + /* We explicitly don't hash instr->dest.dest.exact */ + + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs >= 2); - if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[instr->op].num_inputs == 2); uint32_t hash0 = hash_alu_src(hash, &instr->src[0], nir_ssa_alu_instr_src_components(instr, 0)); uint32_t hash1 = hash_alu_src(hash, &instr->src[1], @@ -66,6 +70,11 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr) * collision. Either addition or multiplication will also work. */ hash = hash0 * hash1; + + for (unsigned i = 2; i < nir_op_infos[instr->op].num_inputs; i++) { + hash = hash_alu_src(hash, &instr->src[i], + nir_ssa_alu_instr_src_components(instr, i)); + } } else { for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { hash = hash_alu_src(hash, &instr->src[i], @@ -76,14 +85,58 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr) return hash; } +static uint32_t +hash_deref(uint32_t hash, const nir_deref_instr *instr) +{ + hash = HASH(hash, instr->deref_type); + hash = HASH(hash, instr->mode); + hash = HASH(hash, instr->type); + + if (instr->deref_type == nir_deref_type_var) + return HASH(hash, instr->var); + + hash = hash_src(hash, &instr->parent); + + switch (instr->deref_type) { + case nir_deref_type_struct: + hash = HASH(hash, instr->strct.index); + break; + + case nir_deref_type_array: + case nir_deref_type_ptr_as_array: + hash = hash_src(hash, &instr->arr.index); + break; + + case nir_deref_type_cast: + hash = HASH(hash, instr->cast.ptr_stride); + break; + + case nir_deref_type_var: + case nir_deref_type_array_wildcard: + /* Nothing to do */ + break; + + default: + unreachable("Invalid instruction deref type"); + } + + return hash; +} + static uint32_t hash_load_const(uint32_t hash, const nir_load_const_instr *instr) { hash = HASH(hash, instr->def.num_components); - hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f, - instr->def.num_components - * sizeof(instr->value.f[0])); + if (instr->def.bit_size == 1) { + for (unsigned i = 0; i < instr->def.num_components; i++) { + uint8_t b = instr->value[i].b; + hash = HASH(hash, b); + } + } else { + unsigned size = instr->def.num_components * sizeof(*instr->value); + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value, size); + } return hash; } @@ -105,7 +158,7 @@ hash_phi(uint32_t hash, const nir_phi_instr *instr) unsigned num_preds = instr->instr.block->predecessors->entries; NIR_VLA(nir_phi_src *, srcs, num_preds); unsigned i = 0; - nir_foreach_phi_src(instr, src) { + nir_foreach_phi_src(src, instr) { srcs[i++] = src; } @@ -125,10 +178,10 @@ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; hash = HASH(hash, instr->intrinsic); - if (info->has_dest) + if (info->has_dest) { hash = HASH(hash, instr->dest.ssa.num_components); - - assert(info->num_variables == 0); + hash = HASH(hash, instr->dest.ssa.bit_size); + } hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, info->num_indices @@ -152,13 +205,14 @@ hash_tex(uint32_t hash, const nir_tex_instr *instr) hash = HASH(hash, instr->is_array); hash = HASH(hash, instr->is_shadow); hash = HASH(hash, instr->is_new_style_shadow); - hash = HASH(hash, instr->const_offset); unsigned component = instr->component; hash = HASH(hash, component); + for (unsigned i = 0; i < 4; ++i) + for (unsigned j = 0; j < 2; ++j) + hash = HASH(hash, instr->tg4_offsets[i][j]); + hash = HASH(hash, instr->texture_index); + hash = HASH(hash, instr->texture_array_size); hash = HASH(hash, instr->sampler_index); - hash = HASH(hash, instr->sampler_array_size); - - assert(!instr->sampler); return hash; } @@ -179,6 +233,9 @@ hash_instr(const void *data) case nir_instr_type_alu: hash = hash_alu(hash, nir_instr_as_alu(instr)); break; + case nir_instr_type_deref: + hash = hash_deref(hash, nir_instr_as_deref(instr)); + break; case nir_instr_type_load_const: hash = hash_load_const(hash, nir_instr_as_load_const(instr)); break; @@ -225,7 +282,212 @@ nir_srcs_equal(nir_src src1, nir_src src2) } } -static bool +/** + * If the \p s is an SSA value that was generated by a negation instruction, + * that instruction is returned as a \c nir_alu_instr. Otherwise \c NULL is + * returned. + */ +static nir_alu_instr * +get_neg_instr(nir_src s) +{ + nir_alu_instr *alu = nir_src_as_alu_instr(s); + + return alu != NULL && (alu->op == nir_op_fneg || alu->op == nir_op_ineg) + ? alu : NULL; +} + +bool +nir_const_value_negative_equal(const nir_const_value *c1, + const nir_const_value *c2, + unsigned components, + nir_alu_type base_type, + unsigned bits) +{ + assert(base_type == nir_alu_type_get_base_type(base_type)); + assert(base_type != nir_type_invalid); + + /* This can occur for 1-bit Boolean values. */ + if (bits == 1) + return false; + + switch (base_type) { + case nir_type_float: + switch (bits) { + case 16: + for (unsigned i = 0; i < components; i++) { + if (_mesa_half_to_float(c1[i].u16) != + -_mesa_half_to_float(c2[i].u16)) { + return false; + } + } + + return true; + + case 32: + for (unsigned i = 0; i < components; i++) { + if (c1[i].f32 != -c2[i].f32) + return false; + } + + return true; + + case 64: + for (unsigned i = 0; i < components; i++) { + if (c1[i].f64 != -c2[i].f64) + return false; + } + + return true; + + default: + unreachable("unknown bit size"); + } + + break; + + case nir_type_int: + case nir_type_uint: + switch (bits) { + case 8: + for (unsigned i = 0; i < components; i++) { + if (c1[i].i8 != -c2[i].i8) + return false; + } + + return true; + + case 16: + for (unsigned i = 0; i < components; i++) { + if (c1[i].i16 != -c2[i].i16) + return false; + } + + return true; + break; + + case 32: + for (unsigned i = 0; i < components; i++) { + if (c1[i].i32 != -c2[i].i32) + return false; + } + + return true; + + case 64: + for (unsigned i = 0; i < components; i++) { + if (c1[i].i64 != -c2[i].i64) + return false; + } + + return true; + + default: + unreachable("unknown bit size"); + } + + break; + + case nir_type_bool: + return false; + + default: + break; + } + + return false; +} + +/** + * Shallow compare of ALU srcs to determine if one is the negation of the other + * + * This function detects cases where \p alu1 is a constant and \p alu2 is a + * constant that is its negation. It will also detect cases where \p alu2 is + * an SSA value that is a \c nir_op_fneg applied to \p alu1 (and vice versa). + * + * This function does not detect the general case when \p alu1 and \p alu2 are + * SSA values that are the negations of each other (e.g., \p alu1 represents + * (a * b) and \p alu2 represents (-a * b)). + */ +bool +nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, + const nir_alu_instr *alu2, + unsigned src1, unsigned src2) +{ + if (alu1->src[src1].abs != alu2->src[src2].abs) + return false; + + bool parity = alu1->src[src1].negate != alu2->src[src2].negate; + + /* Handling load_const instructions is tricky. */ + + const nir_const_value *const const1 = + nir_src_as_const_value(alu1->src[src1].src); + + if (const1 != NULL) { + /* Assume that constant folding will eliminate source mods and unary + * ops. + */ + if (parity) + return false; + + const nir_const_value *const const2 = + nir_src_as_const_value(alu2->src[src2].src); + + if (const2 == NULL) + return false; + + /* FINISHME: Apply the swizzle? */ + return nir_const_value_negative_equal(const1, + const2, + nir_ssa_alu_instr_src_components(alu1, src1), + nir_op_infos[alu1->op].input_types[src1], + alu1->dest.dest.ssa.bit_size); + } + + uint8_t alu1_swizzle[4] = {0}; + nir_src alu1_actual_src; + nir_alu_instr *neg1 = get_neg_instr(alu1->src[src1].src); + + if (neg1) { + parity = !parity; + alu1_actual_src = neg1->src[0].src; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg1, 0); i++) + alu1_swizzle[i] = neg1->src[0].swizzle[i]; + } else { + alu1_actual_src = alu1->src[src1].src; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) + alu1_swizzle[i] = i; + } + + uint8_t alu2_swizzle[4] = {0}; + nir_src alu2_actual_src; + nir_alu_instr *neg2 = get_neg_instr(alu2->src[src2].src); + + if (neg2) { + parity = !parity; + alu2_actual_src = neg2->src[0].src; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg2, 0); i++) + alu2_swizzle[i] = neg2->src[0].swizzle[i]; + } else { + alu2_actual_src = alu2->src[src2].src; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++) + alu2_swizzle[i] = i; + } + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1_swizzle[alu1->src[src1].swizzle[i]] != + alu2_swizzle[alu2->src[src2].swizzle[i]]) + return false; + } + + return parity && nir_srcs_equal(alu1_actual_src, alu2_actual_src); +} + +bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, unsigned src1, unsigned src2) { @@ -267,12 +529,22 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) return false; - if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[alu1->op].num_inputs == 2); - return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && - nir_alu_srcs_equal(alu1, alu2, 1, 1)) || - (nir_alu_srcs_equal(alu1, alu2, 0, 1) && - nir_alu_srcs_equal(alu1, alu2, 1, 0)); + if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size) + return false; + + /* We explicitly don't hash instr->dest.dest.exact */ + + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) { + if ((!nir_alu_srcs_equal(alu1, alu2, 0, 0) || + !nir_alu_srcs_equal(alu1, alu2, 1, 1)) && + (!nir_alu_srcs_equal(alu1, alu2, 0, 1) || + !nir_alu_srcs_equal(alu1, alu2, 1, 0))) + return false; + + for (unsigned i = 2; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } } else { for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { if (!nir_alu_srcs_equal(alu1, alu2, i, i)) @@ -281,6 +553,48 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) } return true; } + case nir_instr_type_deref: { + nir_deref_instr *deref1 = nir_instr_as_deref(instr1); + nir_deref_instr *deref2 = nir_instr_as_deref(instr2); + + if (deref1->deref_type != deref2->deref_type || + deref1->mode != deref2->mode || + deref1->type != deref2->type) + return false; + + if (deref1->deref_type == nir_deref_type_var) + return deref1->var == deref2->var; + + if (!nir_srcs_equal(deref1->parent, deref2->parent)) + return false; + + switch (deref1->deref_type) { + case nir_deref_type_struct: + if (deref1->strct.index != deref2->strct.index) + return false; + break; + + case nir_deref_type_array: + case nir_deref_type_ptr_as_array: + if (!nir_srcs_equal(deref1->arr.index, deref2->arr.index)) + return false; + break; + + case nir_deref_type_cast: + if (deref1->cast.ptr_stride != deref2->cast.ptr_stride) + return false; + break; + + case nir_deref_type_var: + case nir_deref_type_array_wildcard: + /* Nothing to do */ + break; + + default: + unreachable("Invalid instruction deref type"); + } + return true; + } case nir_instr_type_tex: { nir_tex_instr *tex1 = nir_instr_as_tex(instr1); nir_tex_instr *tex2 = nir_instr_as_tex(instr2); @@ -302,16 +616,16 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) tex1->is_array != tex2->is_array || tex1->is_shadow != tex2->is_shadow || tex1->is_new_style_shadow != tex2->is_new_style_shadow || - memcmp(tex1->const_offset, tex2->const_offset, - sizeof(tex1->const_offset)) != 0 || tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { + tex1->texture_index != tex2->texture_index || + tex1->texture_array_size != tex2->texture_array_size || + tex1->sampler_index != tex2->sampler_index) { return false; } - /* Don't support un-lowered sampler derefs currently. */ - assert(!tex1->sampler && !tex2->sampler); + if (memcmp(tex1->tg4_offsets, tex2->tg4_offsets, + sizeof(tex1->tg4_offsets))) + return false; return true; } @@ -322,8 +636,20 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) if (load1->def.num_components != load2->def.num_components) return false; - return memcmp(load1->value.f, load2->value.f, - load1->def.num_components * sizeof(*load2->value.f)) == 0; + if (load1->def.bit_size != load2->def.bit_size) + return false; + + if (load1->def.bit_size == 1) { + for (unsigned i = 0; i < load1->def.num_components; ++i) { + if (load1->value[i].b != load2->value[i].b) + return false; + } + } else { + unsigned size = load1->def.num_components * sizeof(*load1->value); + if (memcmp(load1->value, load2->value, size) != 0) + return false; + } + return true; } case nir_instr_type_phi: { nir_phi_instr *phi1 = nir_instr_as_phi(instr1); @@ -332,8 +658,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) if (phi1->instr.block != phi2->instr.block) return false; - nir_foreach_phi_src(phi1, src1) { - nir_foreach_phi_src(phi2, src2) { + nir_foreach_phi_src(src1, phi1) { + nir_foreach_phi_src(src2, phi2) { if (src1->pred == src2->pred) { if (!nir_srcs_equal(src1->src, src2->src)) return false; @@ -359,13 +685,15 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) intrinsic2->dest.ssa.num_components) return false; + if (info->has_dest && intrinsic1->dest.ssa.bit_size != + intrinsic2->dest.ssa.bit_size) + return false; + for (unsigned i = 0; i < info->num_srcs; i++) { if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) return false; } - assert(info->num_variables == 0); - for (unsigned i = 0; i < info->num_indices; i++) { if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) return false; @@ -381,7 +709,7 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) unreachable("Invalid instruction type"); } - return false; + unreachable("All cases in the above switch should return"); } static bool @@ -398,6 +726,16 @@ dest_is_ssa(nir_dest *dest, void *data) return dest->is_ssa; } +static inline bool +instr_each_src_and_dest_is_ssa(nir_instr *instr) +{ + if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || + !nir_foreach_src(instr, src_is_ssa, NULL)) + return false; + + return true; +} + /* This function determines if uses of an instruction can safely be rewritten * to use another identical instruction instead. Note that this function must * be kept in sync with hash_instr() and nir_instrs_equal() -- only @@ -409,30 +747,20 @@ static bool instr_can_rewrite(nir_instr *instr) { /* We only handle SSA. */ - if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || - !nir_foreach_src(instr, src_is_ssa, NULL)) - return false; + assert(instr_each_src_and_dest_is_ssa(instr)); switch (instr->type) { case nir_instr_type_alu: + case nir_instr_type_deref: + case nir_instr_type_tex: case nir_instr_type_load_const: case nir_instr_type_phi: return true; - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - - /* Don't support un-lowered sampler derefs currently. */ - if (tex->sampler) - return false; - - return true; - } case nir_instr_type_intrinsic: { const nir_intrinsic_info *info = &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER) && - info->num_variables == 0; /* not implemented yet */ + (info->flags & NIR_INTRINSIC_CAN_REORDER); } case nir_instr_type_call: case nir_instr_type_jump: @@ -453,6 +781,9 @@ nir_instr_get_dest_ssa_def(nir_instr *instr) case nir_instr_type_alu: assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); return &nir_instr_as_alu(instr)->dest.dest.ssa; + case nir_instr_type_deref: + assert(nir_instr_as_deref(instr)->dest.is_ssa); + return &nir_instr_as_deref(instr)->dest.ssa; case nir_instr_type_load_const: return &nir_instr_as_load_const(instr)->def; case nir_instr_type_phi: @@ -493,16 +824,24 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) if (!instr_can_rewrite(instr)) return false; - struct set_entry *entry = _mesa_set_search(instr_set, instr); - if (entry) { + struct set_entry *e = _mesa_set_search_or_add(instr_set, instr); + nir_instr *match = (nir_instr *) e->key; + if (match != instr) { nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); - nir_ssa_def *new_def = - nir_instr_get_dest_ssa_def((nir_instr *) entry->key); + nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match); + + /* It's safe to replace an exact instruction with an inexact one as + * long as we make it exact. If we got here, the two instructions are + * exactly identical in every other way so, once we've set the exact + * bit, they are the same. + */ + if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact) + nir_instr_as_alu(match)->exact = true; + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); return true; } - _mesa_set_add(instr_set, instr); return false; }