#include "brw_vec4.h"
#include "brw_vec4_builder.h"
#include "brw_vec4_surface_builder.h"
+#include "brw_eu.h"
using namespace brw;
using namespace brw::surface_access;
for (unsigned j = i; j < instr->def.num_components; j++) {
if ((instr->def.bit_size == 32 &&
- instr->value.u32[i] == instr->value.u32[j]) ||
+ instr->value[i].u32 == instr->value[j].u32) ||
(instr->def.bit_size == 64 &&
- instr->value.f64[i] == instr->value.f64[j])) {
+ instr->value[i].f64 == instr->value[j].f64)) {
writemask |= 1 << j;
}
}
reg.writemask = writemask;
if (instr->def.bit_size == 64) {
- emit(MOV(reg, setup_imm_df(ibld, instr->value.f64[i])));
+ emit(MOV(reg, setup_imm_df(ibld, instr->value[i].f64)));
} else {
- emit(MOV(reg, brw_imm_d(instr->value.i32[i])));
+ emit(MOV(reg, brw_imm_d(instr->value[i].i32)));
}
remaining &= ~writemask;
switch (instr->intrinsic) {
case nir_intrinsic_load_input: {
+ assert(nir_dest_bit_size(instr->dest) == 32);
/* We set EmitNoIndirectInput for VS */
unsigned load_offset = nir_src_as_uint(instr->src[0]);
glsl_type::uvec4_type);
src = retype(src, dest.type);
- bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
- if (is_64bit) {
- dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
- src.swizzle = BRW_SWIZZLE_XYZW;
- shuffle_64bit_data(tmp, src, false);
- emit(MOV(dest, src_reg(tmp)));
- } else {
- /* Swizzle source based on component layout qualifier */
- src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
- emit(MOV(dest, src));
- }
+ /* Swizzle source based on component layout qualifier */
+ src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
+ emit(MOV(dest, src));
break;
}
case nir_intrinsic_store_output: {
+ assert(nir_src_bit_size(instr->src[0]) == 32);
unsigned store_offset = nir_src_as_uint(instr->src[1]);
int varying = instr->const_index[0] + store_offset;
-
- bool is_64bit = nir_src_bit_size(instr->src[0]) == 64;
- if (is_64bit) {
- src_reg data;
- src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF,
- instr->num_components);
- data = src_reg(this, glsl_type::dvec4_type);
- shuffle_64bit_data(dst_reg(data), src, true);
- src = retype(data, BRW_REGISTER_TYPE_F);
- } else {
- src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
- instr->num_components);
- }
+ src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
+ instr->num_components);
unsigned c = nir_intrinsic_component(instr);
output_reg[varying][c] = dst_reg(src);
output_num_components[varying][c] = instr->num_components;
-
- unsigned num_components = instr->num_components;
- if (is_64bit)
- num_components *= 2;
-
- output_reg[varying][c] = dst_reg(src);
- output_num_components[varying][c] = MIN2(4, num_components);
-
- if (is_64bit && num_components > 4) {
- assert(num_components <= 8);
- output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE);
- output_num_components[varying + 1][c] = num_components - 4;
- }
break;
}
case nir_intrinsic_get_buffer_size: {
+ assert(nir_src_num_components(instr->src[0]) == 1);
unsigned ssbo_index = nir_src_is_const(instr->src[0]) ?
nir_src_as_uint(instr->src[0]) : 0;
break;
}
- case nir_intrinsic_ssbo_atomic_add: {
- int op = BRW_AOP_ADD;
-
- if (nir_src_is_const(instr->src[2])) {
- int add_val = nir_src_as_int(instr->src[2]);
- if (add_val == 1)
- op = BRW_AOP_INC;
- else if (add_val == -1)
- op = BRW_AOP_DEC;
- }
-
- nir_emit_ssbo_atomic(op, instr);
- break;
- }
+ case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
- nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
- break;
case nir_intrinsic_ssbo_atomic_umin:
- nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
- break;
case nir_intrinsic_ssbo_atomic_imax:
- nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
- break;
case nir_intrinsic_ssbo_atomic_umax:
- nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
- break;
case nir_intrinsic_ssbo_atomic_and:
- nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
- break;
case nir_intrinsic_ssbo_atomic_or:
- nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
- break;
case nir_intrinsic_ssbo_atomic_xor:
- nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
- break;
case nir_intrinsic_ssbo_atomic_exchange:
- nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
- break;
case nir_intrinsic_ssbo_atomic_comp_swap:
- nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+ nir_emit_ssbo_atomic(brw_aop_for_nir_intrinsic(instr), instr);
break;
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_ubo: {
src_reg surf_index;
+ prog_data->base.has_ubo_pull = true;
+
dest = get_nir_dest(instr->dest);
if (nir_src_is_const(instr->src[0])) {
const vec4_builder bld =
vec4_builder(this).at_end().annotate(current_annotation, base_ir);
const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
- bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+ bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
->size_written = 2 * REG_SIZE;
break;
}
return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
}
-static enum brw_conditional_mod
-brw_conditional_for_nir_comparison(nir_op op)
-{
- switch (op) {
- case nir_op_flt32:
- case nir_op_ilt32:
- case nir_op_ult32:
- return BRW_CONDITIONAL_L;
-
- case nir_op_fge32:
- case nir_op_ige32:
- case nir_op_uge32:
- return BRW_CONDITIONAL_GE;
-
- case nir_op_feq32:
- case nir_op_ieq32:
- case nir_op_b32all_fequal2:
- case nir_op_b32all_iequal2:
- case nir_op_b32all_fequal3:
- case nir_op_b32all_iequal3:
- case nir_op_b32all_fequal4:
- case nir_op_b32all_iequal4:
- return BRW_CONDITIONAL_Z;
-
- case nir_op_fne32:
- case nir_op_ine32:
- case nir_op_b32any_fnequal2:
- case nir_op_b32any_inequal2:
- case nir_op_b32any_fnequal3:
- case nir_op_b32any_inequal3:
- case nir_op_b32any_fnequal4:
- case nir_op_b32any_inequal4:
- return BRW_CONDITIONAL_NZ;
-
- default:
- unreachable("not reached: bad operation for comparison");
- }
-}
-
bool
vec4_visitor::optimize_predicate(nir_alu_instr *instr,
enum brw_predicate *predicate)
}
emit(CMP(dst_null_d(), op[0], op[1],
- brw_conditional_for_nir_comparison(cmp_instr->op)));
+ brw_cmod_for_nir_comparison(cmp_instr->op)));
return true;
}
inst->saturate = saturate;
}
+/**
+ * Try to use an immediate value for a source
+ *
+ * In cases of flow control, constant propagation is sometimes unable to
+ * determine that a register contains a constant value. To work around this,
+ * try to emit a literal as one of the sources. If \c try_src0_also is set,
+ * \c op[0] will also be tried for an immediate value.
+ *
+ * If \c op[0] is modified, the operands will be exchanged so that \c op[1]
+ * will always be the immediate value.
+ *
+ * \return The index of the source that was modified, 0 or 1, if successful.
+ * Otherwise, -1.
+ *
+ * \param op - Operands to the instruction
+ * \param try_src0_also - True if \c op[0] should also be a candidate for
+ * getting an immediate value. This should only be set
+ * for commutative operations.
+ */
+static int
+try_immediate_source(const nir_alu_instr *instr, src_reg *op,
+ bool try_src0_also,
+ ASSERTED const gen_device_info *devinfo)
+{
+ unsigned idx;
+
+ /* MOV should be the only single-source instruction passed to this
+ * function. Any other unary instruction with a constant source should
+ * have been constant-folded away!
+ */
+ assert(nir_op_infos[instr->op].num_inputs > 1 ||
+ instr->op == nir_op_mov);
+
+ if (instr->op != nir_op_mov &&
+ nir_src_bit_size(instr->src[1].src) == 32 &&
+ nir_src_is_const(instr->src[1].src)) {
+ idx = 1;
+ } else if (try_src0_also &&
+ nir_src_bit_size(instr->src[0].src) == 32 &&
+ nir_src_is_const(instr->src[0].src)) {
+ idx = 0;
+ } else {
+ return -1;
+ }
+
+ const enum brw_reg_type old_type = op[idx].type;
+
+ switch (old_type) {
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_UD: {
+ int first_comp = -1;
+ int d = 0;
+
+ for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
+ if (nir_alu_instr_channel_used(instr, idx, i)) {
+ if (first_comp < 0) {
+ first_comp = i;
+ d = nir_src_comp_as_int(instr->src[idx].src,
+ instr->src[idx].swizzle[i]);
+ } else if (d != nir_src_comp_as_int(instr->src[idx].src,
+ instr->src[idx].swizzle[i])) {
+ return -1;
+ }
+ }
+ }
+
+ assert(first_comp >= 0);
+
+ if (op[idx].abs)
+ d = MAX2(-d, d);
+
+ if (op[idx].negate) {
+ /* On Gen8+ a negation source modifier on a logical operation means
+ * something different. Nothing should generate this, so assert that
+ * it does not occur.
+ */
+ assert(devinfo->gen < 8 || (instr->op != nir_op_iand &&
+ instr->op != nir_op_ior &&
+ instr->op != nir_op_ixor));
+ d = -d;
+ }
+
+ op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
+ break;
+ }
+
+ case BRW_REGISTER_TYPE_F: {
+ int first_comp = -1;
+ float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
+ bool is_scalar = true;
+
+ for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
+ if (nir_alu_instr_channel_used(instr, idx, i)) {
+ f[i] = nir_src_comp_as_float(instr->src[idx].src,
+ instr->src[idx].swizzle[i]);
+ if (first_comp < 0) {
+ first_comp = i;
+ } else if (f[first_comp] != f[i]) {
+ is_scalar = false;
+ }
+ }
+ }
+
+ if (is_scalar) {
+ if (op[idx].abs)
+ f[first_comp] = fabs(f[first_comp]);
+
+ if (op[idx].negate)
+ f[first_comp] = -f[first_comp];
+
+ op[idx] = src_reg(brw_imm_f(f[first_comp]));
+ assert(op[idx].type == old_type);
+ } else {
+ uint8_t vf_values[4] = { 0, 0, 0, 0 };
+
+ for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
+ if (op[idx].abs)
+ f[i] = fabs(f[i]);
+
+ if (op[idx].negate)
+ f[i] = -f[i];
+
+ const int vf = brw_float_to_vf(f[i]);
+ if (vf == -1)
+ return -1;
+
+ vf_values[i] = vf;
+ }
+
+ op[idx] = src_reg(brw_imm_vf4(vf_values[0], vf_values[1],
+ vf_values[2], vf_values[3]));
+ }
+ break;
+ }
+
+ default:
+ unreachable("Non-32bit type.");
+ }
+
+ /* If the instruction has more than one source, the instruction format only
+ * allows source 1 to be an immediate value. If the immediate value was
+ * source 0, then the sources must be exchanged.
+ */
+ if (idx == 0 && instr->op != nir_op_mov) {
+ src_reg tmp = op[0];
+ op[0] = op[1];
+ op[1] = tmp;
+ }
+
+ return idx;
+}
+
+void
+vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr)
+{
+ bool fixed[3] = { false, false, false };
+
+ for (unsigned i = 0; i < 2; i++) {
+ if (!nir_src_is_const(instr->src[i].src))
+ continue;
+
+ for (unsigned j = i + 1; j < 3; j++) {
+ if (fixed[j])
+ continue;
+
+ if (!nir_src_is_const(instr->src[j].src))
+ continue;
+
+ if (nir_alu_srcs_equal(instr, instr, i, j)) {
+ if (!fixed[i])
+ op[i] = fix_3src_operand(op[i]);
+
+ op[j] = op[i];
+
+ fixed[i] = true;
+ fixed[j] = true;
+ } else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) {
+ if (!fixed[i])
+ op[i] = fix_3src_operand(op[i]);
+
+ op[j] = op[i];
+ op[j].negate = !op[j].negate;
+
+ fixed[i] = true;
+ fixed[j] = true;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < 3; i++) {
+ if (!fixed[i])
+ op[i] = fix_3src_operand(op[i]);
+ }
+}
+
void
vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
{
}
switch (instr->op) {
- case nir_op_imov:
- case nir_op_fmov:
+ case nir_op_mov:
+ try_immediate_source(instr, &op[0], true, devinfo);
inst = emit(MOV(dst, op[0]));
inst->saturate = instr->dest.saturate;
break;
emit_conversion_to_double(dst, op[0], instr->dest.saturate);
break;
+ case nir_op_fsat:
+ inst = emit(MOV(dst, op[0]));
+ inst->saturate = true;
+ break;
+
+ case nir_op_fneg:
+ case nir_op_ineg:
+ op[0].negate = true;
+ inst = emit(MOV(dst, op[0]));
+ if (instr->op == nir_op_fneg)
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fabs:
+ case nir_op_iabs:
+ op[0].negate = false;
+ op[0].abs = true;
+ inst = emit(MOV(dst, op[0]));
+ if (instr->op == nir_op_fabs)
+ inst->saturate = instr->dest.saturate;
+ break;
+
case nir_op_iadd:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fadd:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit(ADD(dst, op[0], op[1]));
inst->saturate = instr->dest.saturate;
break;
break;
case nir_op_fmul:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit(MUL(dst, op[0], op[1]));
inst->saturate = instr->dest.saturate;
break;
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fmin:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fmax:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_feq32:
case nir_op_fne32: {
enum brw_conditional_mod conditional_mod =
- brw_conditional_for_nir_comparison(instr->op);
+ brw_cmod_for_nir_comparison(instr->op);
if (nir_src_bit_size(instr->src[0].src) < 64) {
+ /* If the order of the sources is changed due to an immediate value,
+ * then the condition must also be changed.
+ */
+ if (try_immediate_source(instr, op, true, devinfo) == 0)
+ conditional_mod = brw_swap_cmod(conditional_mod);
+
emit(CMP(dst, op[0], op[1], conditional_mod));
} else {
/* Produce a 32-bit boolean result from the DF comparison by selecting
brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
- brw_conditional_for_nir_comparison(instr->op)));
+ brw_cmod_for_nir_comparison(instr->op)));
emit(MOV(dst, brw_imm_d(0)));
inst = emit(MOV(dst, brw_imm_d(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
- brw_conditional_for_nir_comparison(instr->op)));
+ brw_cmod_for_nir_comparison(instr->op)));
emit(MOV(dst, brw_imm_d(0)));
inst = emit(MOV(dst, brw_imm_d(~0)));
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
+ try_immediate_source(instr, op, true, devinfo);
emit(XOR(dst, op[0], op[1]));
break;
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
+ try_immediate_source(instr, op, true, devinfo);
emit(OR(dst, op[0], op[1]));
break;
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
+ try_immediate_source(instr, op, true, devinfo);
emit(AND(dst, op[0], op[1]));
break;
}
break;
- case nir_op_isign:
- /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
- * -> non-negative val generates 0x00000000.
- * Predicated OR sets 1 if val is positive.
- */
- assert(nir_dest_bit_size(instr->dest.dest) < 64);
- emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G));
- emit(ASR(dst, op[0], brw_imm_d(31)));
- inst = emit(OR(dst, src_reg(dst), brw_imm_d(1)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
-
case nir_op_ishl:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ try_immediate_source(instr, op, false, devinfo);
emit(SHL(dst, op[0], op[1]));
break;
case nir_op_ishr:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ try_immediate_source(instr, op, false, devinfo);
emit(ASR(dst, op[0], op[1]));
break;
case nir_op_ushr:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ try_immediate_source(instr, op, false, devinfo);
emit(SHR(dst, op[0], op[1]));
break;
inst = emit(ADD(dst, src_reg(mul_dst), op[2]));
inst->saturate = instr->dest.saturate;
} else {
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
-
+ fix_float_operands(op, instr);
inst = emit(MAD(dst, op[2], op[1], op[0]));
inst->saturate = instr->dest.saturate;
}
break;
case nir_op_flrp:
- inst = emit_lrp(dst, op[0], op[1], op[2]);
+ fix_float_operands(op, instr);
+ inst = emit(LRP(dst, op[2], op[1], op[0]));
inst->saturate = instr->dest.saturate;
break;
break;
case nir_op_fdot_replicated2:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fdot_replicated3:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fdot_replicated4:
+ try_immediate_source(instr, op, true, devinfo);
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fdph_replicated:
+ try_immediate_source(instr, op, false, devinfo);
inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_iabs:
- case nir_op_ineg:
- assert(nir_dest_bit_size(instr->dest.dest) < 64);
- /* fall through */
- case nir_op_fabs:
- case nir_op_fneg:
- case nir_op_fsat:
- unreachable("not reached: should be lowered by lower_source mods");
-
case nir_op_fdiv:
unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler");
break;
}
- case nir_tex_src_offset: {
- nir_const_value *const_offset =
- nir_src_as_const_value(instr->src[i].src);
- assert(nir_src_bit_size(instr->src[i].src) == 32);
- if (!const_offset ||
- !brw_texture_offset(const_offset->i32,
- nir_tex_instr_src_size(instr, i),
- &constant_offset)) {
+ case nir_tex_src_offset:
+ if (!brw_texture_offset(instr, i, &constant_offset)) {
offset_value =
get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
}
break;
- }
case nir_tex_src_texture_offset: {
/* Emit code to evaluate the actual indexing expression */