X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_nir.cpp;h=607cfc24d9cc6c98606504ceab1dedaaed971fc2;hb=4064a6cd207811434e5400a613b3833fbda6b787;hp=39f78fa98a8f106efaa1a2a6b8485586e28bf636;hpb=f2dc0f28728af63e1a79756dab06a7035fecb590;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 39f78fa98a8..607cfc24d9c 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -25,6 +25,7 @@ #include "brw_vec4.h" #include "brw_vec4_builder.h" #include "brw_vec4_surface_builder.h" +#include "brw_eu.h" using namespace brw; using namespace brw::surface_access; @@ -406,6 +407,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_input: { + assert(nir_dest_bit_size(instr->dest) == 32); /* We set EmitNoIndirectInput for VS */ unsigned load_offset = nir_src_as_uint(instr->src[0]); @@ -416,53 +418,22 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) glsl_type::uvec4_type); src = retype(src, dest.type); - bool is_64bit = nir_dest_bit_size(instr->dest) == 64; - if (is_64bit) { - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - src.swizzle = BRW_SWIZZLE_XYZW; - shuffle_64bit_data(tmp, src, false); - emit(MOV(dest, src_reg(tmp))); - } else { - /* Swizzle source based on component layout qualifier */ - src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); - emit(MOV(dest, src)); - } + /* Swizzle source based on component layout qualifier */ + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); + emit(MOV(dest, src)); break; } case nir_intrinsic_store_output: { + assert(nir_src_bit_size(instr->src[0]) == 32); unsigned store_offset = nir_src_as_uint(instr->src[1]); int varying = instr->const_index[0] + store_offset; - - bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; - if (is_64bit) { - src_reg data; - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF, - instr->num_components); - data = src_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(dst_reg(data), src, true); - src = retype(data, BRW_REGISTER_TYPE_F); - } else { - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, - instr->num_components); - } + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, + instr->num_components); unsigned c = nir_intrinsic_component(instr); output_reg[varying][c] = dst_reg(src); output_num_components[varying][c] = instr->num_components; - - unsigned num_components = instr->num_components; - if (is_64bit) - num_components *= 2; - - output_reg[varying][c] = dst_reg(src); - output_num_components[varying][c] = MIN2(4, num_components); - - if (is_64bit && num_components > 4) { - assert(num_components <= 8); - output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE); - output_num_components[varying + 1][c] = num_components - 4; - } break; } @@ -577,46 +548,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_ssbo_atomic_add: { - int op = BRW_AOP_ADD; - - if (nir_src_is_const(instr->src[2])) { - int add_val = nir_src_as_int(instr->src[2]); - if (add_val == 1) - op = BRW_AOP_INC; - else if (add_val == -1) - op = BRW_AOP_DEC; - } - - nir_emit_ssbo_atomic(op, instr); - break; - } + case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: - nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr); - break; case nir_intrinsic_ssbo_atomic_umin: - nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr); - break; case nir_intrinsic_ssbo_atomic_imax: - nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr); - break; case nir_intrinsic_ssbo_atomic_umax: - nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr); - break; case nir_intrinsic_ssbo_atomic_and: - nir_emit_ssbo_atomic(BRW_AOP_AND, instr); - break; case nir_intrinsic_ssbo_atomic_or: - nir_emit_ssbo_atomic(BRW_AOP_OR, instr); - break; case nir_intrinsic_ssbo_atomic_xor: - nir_emit_ssbo_atomic(BRW_AOP_XOR, instr); - break; case nir_intrinsic_ssbo_atomic_exchange: - nir_emit_ssbo_atomic(BRW_AOP_MOV, instr); - break; case nir_intrinsic_ssbo_atomic_comp_swap: - nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr); + nir_emit_ssbo_atomic(brw_aop_for_nir_intrinsic(instr), instr); break; case nir_intrinsic_load_vertex_id: @@ -685,6 +627,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_ubo: { src_reg surf_index; + prog_data->base.has_ubo_pull = true; + dest = get_nir_dest(instr->dest); if (nir_src_is_const(instr->src[0])) { @@ -756,12 +700,16 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_scoped_barrier: + assert(nir_intrinsic_execution_scope(instr) == NIR_SCOPE_NONE); + /* Fall through. */ case nir_intrinsic_memory_barrier: { const vec4_builder bld = vec4_builder(this).at_end().annotate(current_annotation, base_ir); - const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) - ->size_written = 2 * REG_SIZE; + const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); + vec4_instruction *fence = + bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0)); + fence->sfid = GEN7_SFID_DATAPORT_DATA_CACHE; break; } @@ -815,45 +763,6 @@ brw_swizzle_for_nir_swizzle(uint8_t swizzle[4]) return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); } -static enum brw_conditional_mod -brw_conditional_for_nir_comparison(nir_op op) -{ - switch (op) { - case nir_op_flt32: - case nir_op_ilt32: - case nir_op_ult32: - return BRW_CONDITIONAL_L; - - case nir_op_fge32: - case nir_op_ige32: - case nir_op_uge32: - return BRW_CONDITIONAL_GE; - - case nir_op_feq32: - case nir_op_ieq32: - case nir_op_b32all_fequal2: - case nir_op_b32all_iequal2: - case nir_op_b32all_fequal3: - case nir_op_b32all_iequal3: - case nir_op_b32all_fequal4: - case nir_op_b32all_iequal4: - return BRW_CONDITIONAL_Z; - - case nir_op_fne32: - case nir_op_ine32: - case nir_op_b32any_fnequal2: - case nir_op_b32any_inequal2: - case nir_op_b32any_fnequal3: - case nir_op_b32any_inequal3: - case nir_op_b32any_fnequal4: - case nir_op_b32any_inequal4: - return BRW_CONDITIONAL_NZ; - - default: - unreachable("not reached: bad operation for comparison"); - } -} - bool vec4_visitor::optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate) @@ -899,12 +808,10 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr, unsigned base_swizzle = brw_swizzle_for_nir_swizzle(cmp_instr->src[i].swizzle); op[i].swizzle = brw_compose_swizzle(size_swizzle, base_swizzle); - op[i].abs = cmp_instr->src[i].abs; - op[i].negate = cmp_instr->src[i].negate; } emit(CMP(dst_null_d(), op[0], op[1], - brw_conditional_for_nir_comparison(cmp_instr->op))); + brw_cmod_for_nir_comparison(cmp_instr->op))); return true; } @@ -958,8 +865,7 @@ emit_find_msb_using_lzd(const vec4_builder &bld, } void -vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, - bool saturate) +vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src) { /* BDW PRM vol 15 - workarounds: * DF->f format conversion for Align16 has wrong emask calculation when @@ -967,8 +873,7 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, */ if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F && src.file == BRW_IMMEDIATE_VALUE) { - vec4_instruction *inst = emit(MOV(dst, brw_imm_f(src.df))); - inst->saturate = saturate; + emit(MOV(dst, brw_imm_f(src.df))); return; } @@ -993,49 +898,91 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, emit(op, temp2, src_reg(temp)); emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2)); - vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type)))); - inst->saturate = saturate; + emit(MOV(dst, src_reg(retype(temp2, dst.type)))); } void -vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src, - bool saturate) +vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src) { dst_reg tmp_dst = dst_reg(src_reg(this, glsl_type::dvec4_type)); src_reg tmp_src = retype(src_reg(this, glsl_type::vec4_type), src.type); emit(MOV(dst_reg(tmp_src), src)); emit(VEC4_OPCODE_TO_DOUBLE, tmp_dst, tmp_src); - vec4_instruction *inst = emit(MOV(dst, src_reg(tmp_dst))); - inst->saturate = saturate; + emit(MOV(dst, src_reg(tmp_dst))); } /** - * Try to use an immediate value for source 1 + * Try to use an immediate value for a source * * In cases of flow control, constant propagation is sometimes unable to * determine that a register contains a constant value. To work around this, - * try to emit a literal as the second source here. + * try to emit a literal as one of the sources. If \c try_src0_also is set, + * \c op[0] will also be tried for an immediate value. + * + * If \c op[0] is modified, the operands will be exchanged so that \c op[1] + * will always be the immediate value. + * + * \return The index of the source that was modified, 0 or 1, if successful. + * Otherwise, -1. + * + * \param op - Operands to the instruction + * \param try_src0_also - True if \c op[0] should also be a candidate for + * getting an immediate value. This should only be set + * for commutative operations. */ -static void +static int try_immediate_source(const nir_alu_instr *instr, src_reg *op, - MAYBE_UNUSED const gen_device_info *devinfo) + bool try_src0_also, + ASSERTED const gen_device_info *devinfo) { - if (nir_src_num_components(instr->src[1].src) != 1 || - nir_src_bit_size(instr->src[1].src) != 32 || - !nir_src_is_const(instr->src[1].src)) - return; + unsigned idx; - const enum brw_reg_type old_type = op->type; + /* MOV should be the only single-source instruction passed to this + * function. Any other unary instruction with a constant source should + * have been constant-folded away! + */ + assert(nir_op_infos[instr->op].num_inputs > 1 || + instr->op == nir_op_mov); + + if (instr->op != nir_op_mov && + nir_src_bit_size(instr->src[1].src) == 32 && + nir_src_is_const(instr->src[1].src)) { + idx = 1; + } else if (try_src0_also && + nir_src_bit_size(instr->src[0].src) == 32 && + nir_src_is_const(instr->src[0].src)) { + idx = 0; + } else { + return -1; + } + + const enum brw_reg_type old_type = op[idx].type; switch (old_type) { case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: { - int d = nir_src_as_int(instr->src[1].src); + int first_comp = -1; + int d = 0; + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (nir_alu_instr_channel_used(instr, idx, i)) { + if (first_comp < 0) { + first_comp = i; + d = nir_src_comp_as_int(instr->src[idx].src, + instr->src[idx].swizzle[i]); + } else if (d != nir_src_comp_as_int(instr->src[idx].src, + instr->src[idx].swizzle[i])) { + return -1; + } + } + } - if (op->abs) + assert(first_comp >= 0); + + if (op[idx].abs) d = MAX2(-d, d); - if (op->negate) { + if (op[idx].negate) { /* On Gen8+ a negation source modifier on a logical operation means * something different. Nothing should generate this, so assert that * it does not occur. @@ -1046,27 +993,130 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op, d = -d; } - *op = retype(src_reg(brw_imm_d(d)), old_type); + op[idx] = retype(src_reg(brw_imm_d(d)), old_type); break; } case BRW_REGISTER_TYPE_F: { - float f = nir_src_as_float(instr->src[1].src); + int first_comp = -1; + float f[NIR_MAX_VEC_COMPONENTS] = { 0.0f }; + bool is_scalar = true; + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (nir_alu_instr_channel_used(instr, idx, i)) { + f[i] = nir_src_comp_as_float(instr->src[idx].src, + instr->src[idx].swizzle[i]); + if (first_comp < 0) { + first_comp = i; + } else if (f[first_comp] != f[i]) { + is_scalar = false; + } + } + } + + if (is_scalar) { + if (op[idx].abs) + f[first_comp] = fabs(f[first_comp]); + + if (op[idx].negate) + f[first_comp] = -f[first_comp]; + + op[idx] = src_reg(brw_imm_f(f[first_comp])); + assert(op[idx].type == old_type); + } else { + uint8_t vf_values[4] = { 0, 0, 0, 0 }; + + for (unsigned i = 0; i < ARRAY_SIZE(vf_values); i++) { - if (op->abs) - f = fabs(f); + if (op[idx].abs) + f[i] = fabs(f[i]); - if (op->negate) - f = -f; + if (op[idx].negate) + f[i] = -f[i]; - *op = src_reg(brw_imm_f(f)); - assert(op->type == old_type); + const int vf = brw_float_to_vf(f[i]); + if (vf == -1) + return -1; + + vf_values[i] = vf; + } + + op[idx] = src_reg(brw_imm_vf4(vf_values[0], vf_values[1], + vf_values[2], vf_values[3])); + } break; } default: unreachable("Non-32bit type."); } + + /* If the instruction has more than one source, the instruction format only + * allows source 1 to be an immediate value. If the immediate value was + * source 0, then the sources must be exchanged. + */ + if (idx == 0 && instr->op != nir_op_mov) { + src_reg tmp = op[0]; + op[0] = op[1]; + op[1] = tmp; + } + + return idx; +} + +void +vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr) +{ + bool fixed[3] = { false, false, false }; + + for (unsigned i = 0; i < 2; i++) { + if (!nir_src_is_const(instr->src[i].src)) + continue; + + for (unsigned j = i + 1; j < 3; j++) { + if (fixed[j]) + continue; + + if (!nir_src_is_const(instr->src[j].src)) + continue; + + if (nir_alu_srcs_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + + fixed[i] = true; + fixed[j] = true; + } else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + op[j].negate = !op[j].negate; + + fixed[i] = true; + fixed[j] = true; + } + } + } + + for (unsigned i = 0; i < 3; i++) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + } +} + +static bool +const_src_fits_in_16_bits(const nir_src &src, brw_reg_type type) +{ + assert(nir_src_is_const(src)); + if (type_is_unsigned_int(type)) { + return nir_src_comp_as_uint(src, 0) <= UINT16_MAX; + } else { + const int64_t c = nir_src_comp_as_int(src, 0); + return c <= INT16_MAX && c >= INT16_MIN; + } } void @@ -1079,21 +1129,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) dst_reg dst = get_nir_dest(instr->dest.dest, dst_type); dst.writemask = instr->dest.write_mask; + assert(!instr->dest.saturate); + src_reg op[4]; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + /* We don't lower to source modifiers, so they shouldn't exist. */ + assert(!instr->src[i].abs); + assert(!instr->src[i].negate); + nir_alu_type src_type = (nir_alu_type) (nir_op_infos[instr->op].input_types[i] | nir_src_bit_size(instr->src[i].src)); op[i] = get_nir_src(instr->src[i].src, src_type, 4); op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle); - op[i].abs = instr->src[i].abs; - op[i].negate = instr->src[i].negate; } switch (instr->op) { case nir_op_mov: + try_immediate_source(instr, &op[0], true, devinfo); inst = emit(MOV(dst, op[0])); - inst->saturate = instr->dest.saturate; break; case nir_op_vec2: @@ -1104,14 +1158,13 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_i2f32: case nir_op_u2f32: inst = emit(MOV(dst, op[0])); - inst->saturate = instr->dest.saturate; break; case nir_op_f2f32: case nir_op_f2i32: case nir_op_f2u32: if (nir_src_bit_size(instr->src[0].src) == 64) - emit_conversion_from_double(dst, op[0], instr->dest.saturate); + emit_conversion_from_double(dst, op[0]); else inst = emit(MOV(dst, op[0])); break; @@ -1119,7 +1172,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_f2f64: case nir_op_i2f64: case nir_op_u2f64: - emit_conversion_to_double(dst, op[0], instr->dest.saturate); + emit_conversion_to_double(dst, op[0]); break; case nir_op_fsat: @@ -1131,8 +1184,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ineg: op[0].negate = true; inst = emit(MOV(dst, op[0])); - if (instr->op == nir_op_fneg) - inst->saturate = instr->dest.saturate; break; case nir_op_fabs: @@ -1140,17 +1191,14 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0].negate = false; op[0].abs = true; inst = emit(MOV(dst, op[0])); - if (instr->op == nir_op_fabs) - inst->saturate = instr->dest.saturate; break; case nir_op_iadd: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fadd: - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); inst = emit(ADD(dst, op[0], op[1])); - inst->saturate = instr->dest.saturate; break; case nir_op_uadd_sat: @@ -1160,9 +1208,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fmul: - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); inst = emit(MUL(dst, op[0], op[1])); - inst->saturate = instr->dest.saturate; break; case nir_op_imul: { @@ -1176,14 +1223,14 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) */ if (nir_src_is_const(instr->src[0].src) && nir_alu_instr_src_read_mask(instr, 0) == 1 && - nir_src_comp_as_uint(instr->src[0].src, 0) < (1 << 16)) { + const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) { if (devinfo->gen < 7) emit(MUL(dst, op[0], op[1])); else emit(MUL(dst, op[1], op[0])); } else if (nir_src_is_const(instr->src[1].src) && nir_alu_instr_src_read_mask(instr, 1) == 1 && - nir_src_comp_as_uint(instr->src[1].src, 0) < (1 << 16)) { + const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) { if (devinfo->gen < 7) emit(MUL(dst, op[1], op[0])); else @@ -1217,27 +1264,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_frcp: inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_fexp2: inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_flog2: inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_fsin: inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_fcos: inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_idiv: @@ -1292,17 +1334,14 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_fsqrt: inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_frsq: inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]); - inst->saturate = instr->dest.saturate; break; case nir_op_fpow: inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_uadd_carry: { @@ -1325,7 +1364,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ftrunc: inst = emit(RNDZ(dst, op[0])); - inst->saturate = instr->dest.saturate; + if (devinfo->gen < 6) { + inst->conditional_mod = BRW_CONDITIONAL_R; + inst = emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f))); + inst->predicate = BRW_PREDICATE_NORMAL; + inst = emit(MOV(dst, src_reg(dst))); /* for potential saturation */ + } break; case nir_op_fceil: { @@ -1339,23 +1383,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(RNDD(dst_reg(tmp), op[0])); tmp.negate = true; inst = emit(MOV(dst, tmp)); - inst->saturate = instr->dest.saturate; break; } case nir_op_ffloor: inst = emit(RNDD(dst, op[0])); - inst->saturate = instr->dest.saturate; break; case nir_op_ffract: inst = emit(FRC(dst, op[0])); - inst->saturate = instr->dest.saturate; break; case nir_op_fround_even: inst = emit(RNDE(dst, op[0])); - inst->saturate = instr->dest.saturate; + if (devinfo->gen < 6) { + inst->conditional_mod = BRW_CONDITIONAL_R; + inst = emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f))); + inst->predicate = BRW_PREDICATE_NORMAL; + inst = emit(MOV(dst, src_reg(dst))); /* for potential saturation */ + } break; case nir_op_fquantize2f16: { @@ -1379,7 +1425,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) /* Select that or zero based on normal status */ inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); inst->predicate = BRW_PREDICATE_NORMAL; - inst->saturate = instr->dest.saturate; break; } @@ -1388,9 +1433,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmin: - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_imax: @@ -1398,9 +1442,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmax: - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_fddx: @@ -1424,10 +1467,15 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_feq32: case nir_op_fne32: { enum brw_conditional_mod conditional_mod = - brw_conditional_for_nir_comparison(instr->op); + brw_cmod_for_nir_comparison(instr->op); if (nir_src_bit_size(instr->src[0].src) < 64) { - try_immediate_source(instr, &op[1], devinfo); + /* If the order of the sources is changed due to an immediate value, + * then the condition must also be changed. + */ + if (try_immediate_source(instr, op, true, devinfo) == 0) + conditional_mod = brw_swap_cmod(conditional_mod); + emit(CMP(dst, op[0], op[1], conditional_mod)); } else { /* Produce a 32-bit boolean result from the DF comparison by selecting @@ -1456,7 +1504,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), - brw_conditional_for_nir_comparison(instr->op))); + brw_cmod_for_nir_comparison(instr->op))); emit(MOV(dst, brw_imm_d(0))); inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; @@ -1475,7 +1523,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), - brw_conditional_for_nir_comparison(instr->op))); + brw_cmod_for_nir_comparison(instr->op))); emit(MOV(dst, brw_imm_d(0))); inst = emit(MOV(dst, brw_imm_d(~0))); @@ -1497,7 +1545,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); emit(XOR(dst, op[0], op[1])); break; @@ -1507,7 +1555,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); emit(OR(dst, op[0], op[1])); break; @@ -1517,7 +1565,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, true, devinfo); emit(AND(dst, op[0], op[1])); break; @@ -1526,7 +1574,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_b2f64: if (nir_dest_bit_size(instr->dest.dest) > 32) { assert(dst.type == BRW_REGISTER_TYPE_DF); - emit_conversion_to_double(dst, negate(op[0]), false); + emit_conversion_to_double(dst, negate(op[0])); } else { emit(MOV(dst, negate(op[0]))); } @@ -1557,24 +1605,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); break; - case nir_op_fnoise1_1: - case nir_op_fnoise1_2: - case nir_op_fnoise1_3: - case nir_op_fnoise1_4: - case nir_op_fnoise2_1: - case nir_op_fnoise2_2: - case nir_op_fnoise2_3: - case nir_op_fnoise2_4: - case nir_op_fnoise3_1: - case nir_op_fnoise3_2: - case nir_op_fnoise3_3: - case nir_op_fnoise3_4: - case nir_op_fnoise4_1: - case nir_op_fnoise4_2: - case nir_op_fnoise4_3: - case nir_op_fnoise4_4: - unreachable("not reached: should be handled by lower_noise"); - case nir_op_unpack_half_2x16_split_x: case nir_op_unpack_half_2x16_split_y: case nir_op_pack_half_2x16_split: @@ -1761,20 +1791,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should have been lowered"); case nir_op_fsign: - assert(!instr->dest.saturate); - if (op[0].abs) { - /* Straightforward since the source can be assumed to be either - * strictly >= 0 or strictly <= 0 depending on the setting of the - * negate flag. - */ - inst = emit(MOV(dst, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - inst = (op[0].negate) - ? emit(MOV(dst, brw_imm_f(-1.0f))) - : emit(MOV(dst, brw_imm_f(1.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - } else if (type_sz(op[0].type) < 8) { + if (type_sz(op[0].type) < 8) { /* AND(val, 0x80000000) gives the sign bit. * * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not @@ -1820,26 +1837,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) /* Now convert the result from float to double */ emit_conversion_to_double(dst, retype(src_reg(tmp), - BRW_REGISTER_TYPE_F), - false); + BRW_REGISTER_TYPE_F)); } break; case nir_op_ishl: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, false, devinfo); emit(SHL(dst, op[0], op[1])); break; case nir_op_ishr: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, false, devinfo); emit(ASR(dst, op[0], op[1])); break; case nir_op_ushr: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, &op[1], devinfo); + try_immediate_source(instr, op, false, devinfo); emit(SHR(dst, op[0], op[1])); break; @@ -1848,20 +1864,15 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) dst_reg mul_dst = dst_reg(this, glsl_type::dvec4_type); emit(MUL(mul_dst, op[1], op[0])); inst = emit(ADD(dst, src_reg(mul_dst), op[2])); - inst->saturate = instr->dest.saturate; } else { - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - + fix_float_operands(op, instr); inst = emit(MAD(dst, op[2], op[1], op[0])); - inst->saturate = instr->dest.saturate; } break; case nir_op_flrp: - inst = emit_lrp(dst, op[0], op[1], op[2]); - inst->saturate = instr->dest.saturate; + fix_float_operands(op, instr); + inst = emit(LRP(dst, op[2], op[1], op[0])); break; case nir_op_b32csel: @@ -1891,23 +1902,23 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fdot_replicated2: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_fdot_replicated3: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_fdot_replicated4: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_fdph_replicated: + try_immediate_source(instr, op, false, devinfo); inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]); - inst->saturate = instr->dest.saturate; break; case nir_op_fdiv: