X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_nir.cpp;h=3036abcb814be32affffee2b0d1266afd1bc4567;hb=24c0545b2d5d79132ee48643886cd3eb285beb6f;hp=98632b5af08411c6362411ec746d657884d42a97;hpb=04c2f12ab28d61d30f9cb008edb9039c610dfb5f;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 98632b5af08..3036abcb814 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -25,6 +25,7 @@ #include "brw_vec4.h" #include "brw_vec4_builder.h" #include "brw_vec4_surface_builder.h" +#include "brw_eu.h" using namespace brw; using namespace brw::surface_access; @@ -353,18 +354,18 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) for (unsigned j = i; j < instr->def.num_components; j++) { if ((instr->def.bit_size == 32 && - instr->value.u32[i] == instr->value.u32[j]) || + instr->value[i].u32 == instr->value[j].u32) || (instr->def.bit_size == 64 && - instr->value.f64[i] == instr->value.f64[j])) { + instr->value[i].f64 == instr->value[j].f64)) { writemask |= 1 << j; } } reg.writemask = writemask; if (instr->def.bit_size == 64) { - emit(MOV(reg, setup_imm_df(ibld, instr->value.f64[i]))); + emit(MOV(reg, setup_imm_df(ibld, instr->value[i].f64))); } else { - emit(MOV(reg, brw_imm_d(instr->value.i32[i]))); + emit(MOV(reg, brw_imm_d(instr->value[i].i32))); } remaining &= ~writemask; @@ -406,6 +407,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_input: { + assert(nir_dest_bit_size(instr->dest) == 32); /* We set EmitNoIndirectInput for VS */ unsigned load_offset = nir_src_as_uint(instr->src[0]); @@ -416,57 +418,27 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) glsl_type::uvec4_type); src = retype(src, dest.type); - bool is_64bit = nir_dest_bit_size(instr->dest) == 64; - if (is_64bit) { - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - src.swizzle = BRW_SWIZZLE_XYZW; - shuffle_64bit_data(tmp, src, false); - emit(MOV(dest, src_reg(tmp))); - } else { - /* Swizzle source based on component layout qualifier */ - src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); - emit(MOV(dest, src)); - } + /* Swizzle source based on component layout qualifier */ + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); + emit(MOV(dest, src)); break; } case nir_intrinsic_store_output: { + assert(nir_src_bit_size(instr->src[0]) == 32); unsigned store_offset = nir_src_as_uint(instr->src[1]); int varying = instr->const_index[0] + store_offset; - - bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; - if (is_64bit) { - src_reg data; - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF, - instr->num_components); - data = src_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(dst_reg(data), src, true); - src = retype(data, BRW_REGISTER_TYPE_F); - } else { - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, - instr->num_components); - } + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, + instr->num_components); unsigned c = nir_intrinsic_component(instr); output_reg[varying][c] = dst_reg(src); output_num_components[varying][c] = instr->num_components; - - unsigned num_components = instr->num_components; - if (is_64bit) - num_components *= 2; - - output_reg[varying][c] = dst_reg(src); - output_num_components[varying][c] = MIN2(4, num_components); - - if (is_64bit && num_components > 4) { - assert(num_components <= 8); - output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE); - output_num_components[varying + 1][c] = num_components - 4; - } break; } case nir_intrinsic_get_buffer_size: { + assert(nir_src_num_components(instr->src[0]) == 1); unsigned ssbo_index = nir_src_is_const(instr->src[0]) ? nir_src_as_uint(instr->src[0]) : 0; @@ -576,46 +548,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_ssbo_atomic_add: { - int op = BRW_AOP_ADD; - - if (nir_src_is_const(instr->src[2])) { - int add_val = nir_src_as_int(instr->src[2]); - if (add_val == 1) - op = BRW_AOP_INC; - else if (add_val == -1) - op = BRW_AOP_DEC; - } - - nir_emit_ssbo_atomic(op, instr); - break; - } + case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: - nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr); - break; case nir_intrinsic_ssbo_atomic_umin: - nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr); - break; case nir_intrinsic_ssbo_atomic_imax: - nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr); - break; case nir_intrinsic_ssbo_atomic_umax: - nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr); - break; case nir_intrinsic_ssbo_atomic_and: - nir_emit_ssbo_atomic(BRW_AOP_AND, instr); - break; case nir_intrinsic_ssbo_atomic_or: - nir_emit_ssbo_atomic(BRW_AOP_OR, instr); - break; case nir_intrinsic_ssbo_atomic_xor: - nir_emit_ssbo_atomic(BRW_AOP_XOR, instr); - break; case nir_intrinsic_ssbo_atomic_exchange: - nir_emit_ssbo_atomic(BRW_AOP_MOV, instr); - break; case nir_intrinsic_ssbo_atomic_comp_swap: - nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr); + nir_emit_ssbo_atomic(brw_aop_for_nir_intrinsic(instr), instr); break; case nir_intrinsic_load_vertex_id: @@ -684,6 +627,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_ubo: { src_reg surf_index; + prog_data->base.has_ubo_pull = true; + dest = get_nir_dest(instr->dest); if (nir_src_is_const(instr->src[0])) { @@ -759,7 +704,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const vec4_builder bld = vec4_builder(this).at_end().annotate(current_annotation, base_ir); const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0)) ->size_written = 2 * REG_SIZE; break; } @@ -814,45 +759,6 @@ brw_swizzle_for_nir_swizzle(uint8_t swizzle[4]) return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); } -static enum brw_conditional_mod -brw_conditional_for_nir_comparison(nir_op op) -{ - switch (op) { - case nir_op_flt32: - case nir_op_ilt32: - case nir_op_ult32: - return BRW_CONDITIONAL_L; - - case nir_op_fge32: - case nir_op_ige32: - case nir_op_uge32: - return BRW_CONDITIONAL_GE; - - case nir_op_feq32: - case nir_op_ieq32: - case nir_op_b32all_fequal2: - case nir_op_b32all_iequal2: - case nir_op_b32all_fequal3: - case nir_op_b32all_iequal3: - case nir_op_b32all_fequal4: - case nir_op_b32all_iequal4: - return BRW_CONDITIONAL_Z; - - case nir_op_fne32: - case nir_op_ine32: - case nir_op_b32any_fnequal2: - case nir_op_b32any_inequal2: - case nir_op_b32any_fnequal3: - case nir_op_b32any_inequal3: - case nir_op_b32any_fnequal4: - case nir_op_b32any_inequal4: - return BRW_CONDITIONAL_NZ; - - default: - unreachable("not reached: bad operation for comparison"); - } -} - bool vec4_visitor::optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate) @@ -903,7 +809,7 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr, } emit(CMP(dst_null_d(), op[0], op[1], - brw_conditional_for_nir_comparison(cmp_instr->op))); + brw_cmod_for_nir_comparison(cmp_instr->op))); return true; } @@ -1008,6 +914,201 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src, inst->saturate = saturate; } +/** + * Try to use an immediate value for a source + * + * In cases of flow control, constant propagation is sometimes unable to + * determine that a register contains a constant value. To work around this, + * try to emit a literal as one of the sources. If \c try_src0_also is set, + * \c op[0] will also be tried for an immediate value. + * + * If \c op[0] is modified, the operands will be exchanged so that \c op[1] + * will always be the immediate value. + * + * \return The index of the source that was modified, 0 or 1, if successful. + * Otherwise, -1. + * + * \param op - Operands to the instruction + * \param try_src0_also - True if \c op[0] should also be a candidate for + * getting an immediate value. This should only be set + * for commutative operations. + */ +static int +try_immediate_source(const nir_alu_instr *instr, src_reg *op, + bool try_src0_also, + ASSERTED const gen_device_info *devinfo) +{ + unsigned idx; + + /* MOV should be the only single-source instruction passed to this + * function. Any other unary instruction with a constant source should + * have been constant-folded away! + */ + assert(nir_op_infos[instr->op].num_inputs > 1 || + instr->op == nir_op_mov); + + if (instr->op != nir_op_mov && + nir_src_bit_size(instr->src[1].src) == 32 && + nir_src_is_const(instr->src[1].src)) { + idx = 1; + } else if (try_src0_also && + nir_src_bit_size(instr->src[0].src) == 32 && + nir_src_is_const(instr->src[0].src)) { + idx = 0; + } else { + return -1; + } + + const enum brw_reg_type old_type = op[idx].type; + + switch (old_type) { + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: { + int first_comp = -1; + int d = 0; + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (nir_alu_instr_channel_used(instr, idx, i)) { + if (first_comp < 0) { + first_comp = i; + d = nir_src_comp_as_int(instr->src[idx].src, + instr->src[idx].swizzle[i]); + } else if (d != nir_src_comp_as_int(instr->src[idx].src, + instr->src[idx].swizzle[i])) { + return -1; + } + } + } + + assert(first_comp >= 0); + + if (op[idx].abs) + d = MAX2(-d, d); + + if (op[idx].negate) { + /* On Gen8+ a negation source modifier on a logical operation means + * something different. Nothing should generate this, so assert that + * it does not occur. + */ + assert(devinfo->gen < 8 || (instr->op != nir_op_iand && + instr->op != nir_op_ior && + instr->op != nir_op_ixor)); + d = -d; + } + + op[idx] = retype(src_reg(brw_imm_d(d)), old_type); + break; + } + + case BRW_REGISTER_TYPE_F: { + int first_comp = -1; + float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + bool is_scalar = true; + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (nir_alu_instr_channel_used(instr, idx, i)) { + f[i] = nir_src_comp_as_float(instr->src[idx].src, + instr->src[idx].swizzle[i]); + if (first_comp < 0) { + first_comp = i; + } else if (f[first_comp] != f[i]) { + is_scalar = false; + } + } + } + + if (is_scalar) { + if (op[idx].abs) + f[first_comp] = fabs(f[first_comp]); + + if (op[idx].negate) + f[first_comp] = -f[first_comp]; + + op[idx] = src_reg(brw_imm_f(f[first_comp])); + assert(op[idx].type == old_type); + } else { + uint8_t vf_values[4] = { 0, 0, 0, 0 }; + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (op[idx].abs) + f[i] = fabs(f[i]); + + if (op[idx].negate) + f[i] = -f[i]; + + const int vf = brw_float_to_vf(f[i]); + if (vf == -1) + return -1; + + vf_values[i] = vf; + } + + op[idx] = src_reg(brw_imm_vf4(vf_values[0], vf_values[1], + vf_values[2], vf_values[3])); + } + break; + } + + default: + unreachable("Non-32bit type."); + } + + /* If the instruction has more than one source, the instruction format only + * allows source 1 to be an immediate value. If the immediate value was + * source 0, then the sources must be exchanged. + */ + if (idx == 0 && instr->op != nir_op_mov) { + src_reg tmp = op[0]; + op[0] = op[1]; + op[1] = tmp; + } + + return idx; +} + +void +vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr) +{ + bool fixed[3] = { false, false, false }; + + for (unsigned i = 0; i < 2; i++) { + if (!nir_src_is_const(instr->src[i].src)) + continue; + + for (unsigned j = i + 1; j < 3; j++) { + if (fixed[j]) + continue; + + if (!nir_src_is_const(instr->src[j].src)) + continue; + + if (nir_alu_srcs_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + + fixed[i] = true; + fixed[j] = true; + } else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + op[j].negate = !op[j].negate; + + fixed[i] = true; + fixed[j] = true; + } + } + } + + for (unsigned i = 0; i < 3; i++) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + } +} + void vec4_visitor::nir_emit_alu(nir_alu_instr *instr) { @@ -1030,8 +1131,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) } switch (instr->op) { - case nir_op_imov: - case nir_op_fmov: + case nir_op_mov: + try_immediate_source(instr, &op[0], true, devinfo); inst = emit(MOV(dst, op[0])); inst->saturate = instr->dest.saturate; break; @@ -1062,10 +1163,33 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit_conversion_to_double(dst, op[0], instr->dest.saturate); break; + case nir_op_fsat: + inst = emit(MOV(dst, op[0])); + inst->saturate = true; + break; + + case nir_op_fneg: + case nir_op_ineg: + op[0].negate = true; + inst = emit(MOV(dst, op[0])); + if (instr->op == nir_op_fneg) + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fabs: + case nir_op_iabs: + op[0].negate = false; + op[0].abs = true; + inst = emit(MOV(dst, op[0])); + if (instr->op == nir_op_fabs) + inst->saturate = instr->dest.saturate; + break; + case nir_op_iadd: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fadd: + try_immediate_source(instr, op, true, devinfo); inst = emit(ADD(dst, op[0], op[1])); inst->saturate = instr->dest.saturate; break; @@ -1077,6 +1201,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fmul: + try_immediate_source(instr, op, true, devinfo); inst = emit(MUL(dst, op[0], op[1])); inst->saturate = instr->dest.saturate; break; @@ -1304,6 +1429,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmin: + try_immediate_source(instr, op, true, devinfo); inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; @@ -1313,6 +1439,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmax: + try_immediate_source(instr, op, true, devinfo); inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; @@ -1338,9 +1465,15 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_feq32: case nir_op_fne32: { enum brw_conditional_mod conditional_mod = - brw_conditional_for_nir_comparison(instr->op); + brw_cmod_for_nir_comparison(instr->op); if (nir_src_bit_size(instr->src[0].src) < 64) { + /* If the order of the sources is changed due to an immediate value, + * then the condition must also be changed. + */ + if (try_immediate_source(instr, op, true, devinfo) == 0) + conditional_mod = brw_swap_cmod(conditional_mod); + emit(CMP(dst, op[0], op[1], conditional_mod)); } else { /* Produce a 32-bit boolean result from the DF comparison by selecting @@ -1369,7 +1502,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), - brw_conditional_for_nir_comparison(instr->op))); + brw_cmod_for_nir_comparison(instr->op))); emit(MOV(dst, brw_imm_d(0))); inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; @@ -1388,7 +1521,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), - brw_conditional_for_nir_comparison(instr->op))); + brw_cmod_for_nir_comparison(instr->op))); emit(MOV(dst, brw_imm_d(0))); inst = emit(MOV(dst, brw_imm_d(~0))); @@ -1410,6 +1543,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, op, true, devinfo); emit(XOR(dst, op[0], op[1])); break; @@ -1419,6 +1553,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, op, true, devinfo); emit(OR(dst, op[0], op[1])); break; @@ -1428,6 +1563,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, op, true, devinfo); emit(AND(dst, op[0], op[1])); break; @@ -1735,30 +1871,21 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) } break; - case nir_op_isign: - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - assert(nir_dest_bit_size(instr->dest.dest) < 64); - emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G)); - emit(ASR(dst, op[0], brw_imm_d(31))); - inst = emit(OR(dst, src_reg(dst), brw_imm_d(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - case nir_op_ishl: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, op, false, devinfo); emit(SHL(dst, op[0], op[1])); break; case nir_op_ishr: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, op, false, devinfo); emit(ASR(dst, op[0], op[1])); break; case nir_op_ushr: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, op, false, devinfo); emit(SHR(dst, op[0], op[1])); break; @@ -1769,17 +1896,15 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst = emit(ADD(dst, src_reg(mul_dst), op[2])); inst->saturate = instr->dest.saturate; } else { - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - + fix_float_operands(op, instr); inst = emit(MAD(dst, op[2], op[1], op[0])); inst->saturate = instr->dest.saturate; } break; case nir_op_flrp: - inst = emit_lrp(dst, op[0], op[1], op[2]); + fix_float_operands(op, instr); + inst = emit(LRP(dst, op[2], op[1], op[0])); inst->saturate = instr->dest.saturate; break; @@ -1810,34 +1935,29 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fdot_replicated2: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_fdot_replicated3: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_fdot_replicated4: + try_immediate_source(instr, op, true, devinfo); inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_fdph_replicated: + try_immediate_source(instr, op, false, devinfo); inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_iabs: - case nir_op_ineg: - assert(nir_dest_bit_size(instr->dest.dest) < 64); - /* fall through */ - case nir_op_fabs: - case nir_op_fneg: - case nir_op_fsat: - unreachable("not reached: should be lowered by lower_source mods"); - case nir_op_fdiv: unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler"); @@ -2001,19 +2121,12 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) break; } - case nir_tex_src_offset: { - nir_const_value *const_offset = - nir_src_as_const_value(instr->src[i].src); - assert(nir_src_bit_size(instr->src[i].src) == 32); - if (!const_offset || - !brw_texture_offset(const_offset->i32, - nir_tex_instr_src_size(instr, i), - &constant_offset)) { + case nir_tex_src_offset: + if (!brw_texture_offset(instr, i, &constant_offset)) { offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); } break; - } case nir_tex_src_texture_offset: { /* Emit code to evaluate the actual indexing expression */