if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
return;
+ unsigned vec4s[VARYING_SLOT_TESS_MAX] = { 0, };
+
+ /* Calculate the size of output registers in a separate pass, before
+ * allocating them. With ARB_enhanced_layouts, multiple output variables
+ * may occupy the same slot, but have different type sizes.
+ */
nir_foreach_variable(var, &nir->outputs) {
- const unsigned vec4s =
+ const int loc = var->data.driver_location;
+ const unsigned var_vec4s =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
: type_size_vec4(var->type);
- fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s);
- for (unsigned i = 0; i < vec4s; i++) {
- if (outputs[var->data.driver_location + i].file == BAD_FILE)
- outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i);
+ vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
+ }
+
+ nir_foreach_variable(var, &nir->outputs) {
+ const int loc = var->data.driver_location;
+ if (outputs[loc].file == BAD_FILE) {
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s[loc]);
+ for (unsigned i = 0; i < vec4s[loc]; i++) {
+ outputs[loc + i] = offset(reg, bld, 4 * i);
+ }
}
}
}
unreachable("should be lowered by lower_vertex_id().");
case nir_intrinsic_load_vertex_id_zero_base:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
- break;
-
case nir_intrinsic_load_base_vertex:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
- break;
-
case nir_intrinsic_load_instance_id:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
- break;
-
case nir_intrinsic_load_base_instance:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
- break;
-
case nir_intrinsic_load_draw_id:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
- break;
+ unreachable("should be lowered by brw_nir_lower_vs_inputs().");
case nir_intrinsic_load_invocation_id:
if (v->stage == MESA_SHADER_TESS_CTRL)
}
}
+/*
+ * Returns a type based on a reference_type (word, float, half-float) and a
+ * given bit_size.
+ *
+ * Reference BRW_REGISTER_TYPE are HF,F,DF,W,D,UW,UD.
+ *
+ * @FIXME: 64-bit return types are always DF on integer types to maintain
+ * compability with uses of DF previously to the introduction of int64
+ * support.
+ */
+static brw_reg_type
+brw_reg_type_from_bit_size(const unsigned bit_size,
+ const brw_reg_type reference_type)
+{
+ switch(reference_type) {
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_F:
+ case BRW_REGISTER_TYPE_DF:
+ switch(bit_size) {
+ case 16:
+ return BRW_REGISTER_TYPE_HF;
+ case 32:
+ return BRW_REGISTER_TYPE_F;
+ case 64:
+ return BRW_REGISTER_TYPE_DF;
+ default:
+ unreachable("Invalid bit size");
+ }
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_Q:
+ switch(bit_size) {
+ case 16:
+ return BRW_REGISTER_TYPE_W;
+ case 32:
+ return BRW_REGISTER_TYPE_D;
+ case 64:
+ return BRW_REGISTER_TYPE_DF;
+ default:
+ unreachable("Invalid bit size");
+ }
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_UQ:
+ switch(bit_size) {
+ case 16:
+ return BRW_REGISTER_TYPE_UW;
+ case 32:
+ return BRW_REGISTER_TYPE_UD;
+ case 64:
+ return BRW_REGISTER_TYPE_DF;
+ default:
+ unreachable("Invalid bit size");
+ }
+ default:
+ unreachable("Unknown type");
+ }
+}
+
void
fs_visitor::nir_emit_impl(nir_function_impl *impl)
{
reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
unsigned size = array_elems * reg->num_components;
const brw_reg_type reg_type =
- reg->bit_size == 32 ? BRW_REGISTER_TYPE_F : BRW_REGISTER_TYPE_DF;
+ brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F);
nir_locals[reg->index] = bld.vgrf(reg_type, size);
}
*/
if (nir_dest_bit_size(instr->dest.dest) == 64 &&
nir_src_bit_size(instr->src[0].src) == 32 &&
- (devinfo->is_cherryview || devinfo->is_broxton)) {
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
fs_reg tmp = bld.vgrf(result.type, 1);
tmp = subscript(tmp, op[0].type, 0);
inst = bld.MOV(tmp, op[0]);
*
* - 2-src instructions can't operate with 64-bit immediates
* - The sign is encoded in the high 32-bit of each DF
- * - CMP with DF requires special handling in SIMD16
* - We need to produce a DF result.
*/
- /* 2-src instructions can't have 64-bit immediates, so put 0.0 in
- * a register and compare with that.
- */
- fs_reg tmp = vgrf(glsl_type::double_type);
- bld.MOV(tmp, setup_imm_df(bld, 0.0));
-
- /* A direct DF CMP using the flag register (null dst) won't work in
- * SIMD16 because the CMP will be split in two by lower_simd_width,
- * resulting in two CMP instructions with the same dst (NULL),
- * leading to dead code elimination of the first one. In SIMD8,
- * however, there is no need to split the CMP and we can save some
- * work.
- */
- fs_reg dst_tmp = vgrf(glsl_type::double_type);
- bld.CMP(dst_tmp, op[0], tmp, BRW_CONDITIONAL_NZ);
-
- /* In SIMD16 we want to avoid using a NULL dst register with DF CMP,
- * so we store the result of the comparison in a vgrf instead and
- * then we generate a UD comparison from that that won't have to
- * be split by lower_simd_width. This is what NIR does to handle
- * double comparisons in the general case.
- */
- if (bld.dispatch_width() == 16 ) {
- fs_reg dst_tmp_ud = retype(dst_tmp, BRW_REGISTER_TYPE_UD);
- bld.MOV(dst_tmp_ud, subscript(dst_tmp, BRW_REGISTER_TYPE_UD, 0));
- bld.CMP(bld.null_reg_ud(),
- dst_tmp_ud, brw_imm_ud(0), BRW_CONDITIONAL_NZ);
- }
-
- /* Get the high 32-bit of each double component where the sign is */
- fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
- bld.MOV(result_int, subscript(op[0], BRW_REGISTER_TYPE_UD, 1));
+ fs_reg zero = vgrf(glsl_type::double_type);
+ bld.MOV(zero, setup_imm_df(bld, 0.0));
+ bld.CMP(bld.null_reg_df(), op[0], zero, BRW_CONDITIONAL_NZ);
- /* Get the sign bit */
- bld.AND(result_int, result_int, brw_imm_ud(0x80000000u));
+ bld.MOV(result, zero);
- /* Add 1.0 to the sign, predicated to skip the case of op[0] == 0.0 */
- inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
- inst->predicate = BRW_PREDICATE_NORMAL;
+ fs_reg r = subscript(result, BRW_REGISTER_TYPE_UD, 1);
+ bld.AND(r, subscript(op[0], BRW_REGISTER_TYPE_UD, 1),
+ brw_imm_ud(0x80000000u));
- /* Convert from 32-bit float to 64-bit double */
- result.type = BRW_REGISTER_TYPE_DF;
- inst = bld.MOV(result, retype(result_int, BRW_REGISTER_TYPE_F));
+ set_predicate(BRW_PREDICATE_NORMAL,
+ bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
if (instr->dest.saturate) {
inst = bld.MOV(result, result);
unreachable("not reached: should have been lowered");
case nir_op_ishl:
- bld.SHL(result, op[0], op[1]);
- break;
case nir_op_ishr:
- bld.ASR(result, op[0], op[1]);
- break;
- case nir_op_ushr:
- bld.SHR(result, op[0], op[1]);
+ case nir_op_ushr: {
+ fs_reg shift_count = op[1];
+
+ if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
+ if (op[1].file == VGRF &&
+ (result.type == BRW_REGISTER_TYPE_Q ||
+ result.type == BRW_REGISTER_TYPE_UQ)) {
+ shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
+ BRW_REGISTER_TYPE_UD);
+ shift_count.stride = 2;
+ bld.MOV(shift_count, op[1]);
+ }
+ }
+
+ switch (instr->op) {
+ case nir_op_ishl:
+ bld.SHL(result, op[0], shift_count);
+ break;
+ case nir_op_ishr:
+ bld.ASR(result, op[0], shift_count);
+ break;
+ case nir_op_ushr:
+ bld.SHR(result, op[0], shift_count);
+ break;
+ default:
+ unreachable("not reached");
+ }
break;
+ }
case nir_op_pack_half_2x16_split:
bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
nir_load_const_instr *instr)
{
const brw_reg_type reg_type =
- instr->def.bit_size == 32 ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
+ brw_reg_type_from_bit_size(instr->def.bit_size, BRW_REGISTER_TYPE_D);
fs_reg reg = bld.vgrf(reg_type, instr->def.num_components);
switch (instr->def.bit_size) {
fs_reg reg;
if (src.is_ssa) {
if (src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
- const brw_reg_type reg_type = src.ssa->bit_size == 32 ?
- BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
+ const brw_reg_type reg_type =
+ brw_reg_type_from_bit_size(src.ssa->bit_size, BRW_REGISTER_TYPE_D);
reg = bld.vgrf(reg_type, src.ssa->num_components);
} else {
reg = nir_ssa_values[src.ssa->index];
{
if (dest.is_ssa) {
const brw_reg_type reg_type =
- dest.ssa.bit_size == 32 ? BRW_REGISTER_TYPE_F : BRW_REGISTER_TYPE_DF;
+ brw_reg_type_from_bit_size(dest.ssa.bit_size, BRW_REGISTER_TYPE_F);
nir_ssa_values[dest.ssa.index] =
bld.vgrf(reg_type, dest.ssa.num_components);
return nir_ssa_values[dest.ssa.index];
assert(gs_compile->control_data_bits_per_vertex == 2);
/* Must be a valid stream */
- assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+ assert(stream_id < MAX_VERTEX_STREAMS);
/* Control data bits are initialized to 0 so we don't have to set any
* bits when sending vertices to stream 0.
nir_const_value *offset_const = nir_src_as_const_value(offset_src);
const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
- /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
- * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only
- * gl_PointSize is available as a GS input, however, so it must be that.
- */
- const bool is_point_size = (base_offset == 0);
-
/* TODO: figure out push input layout for invocations == 1 */
+ /* TODO: make this work with 64-bit inputs */
if (gs_prog_data->invocations == 1 &&
+ type_sz(dst.type) <= 4 &&
offset_const != NULL && vertex_const != NULL &&
4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
vertex_const->u32[0] * push_reg_count;
- /* This input was pushed into registers. */
- if (is_point_size) {
- /* gl_PointSize comes in .w */
- bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
- } else {
- for (unsigned i = 0; i < num_components; i++) {
- bld.MOV(offset(dst, bld, i),
- fs_reg(ATTR, imm_offset + i + first_component, dst.type));
- }
+ for (unsigned i = 0; i < num_components; i++) {
+ bld.MOV(offset(dst, bld, i),
+ fs_reg(ATTR, imm_offset + i + first_component, dst.type));
}
return;
}
/* Resort to the pull model. Ensure the VUE handles are provided. */
- gs_prog_data->base.include_vue_handles = true;
+ assert(gs_prog_data->base.include_vue_handles);
unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2;
fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
}
}
}
-
- if (is_point_size) {
- /* Read the whole VUE header (because of alignment) and read .w. */
- fs_reg tmp = bld.vgrf(dst.type, 4);
- inst->dst = tmp;
- inst->size_written = 4 * REG_SIZE;
- bld.MOV(dst, offset(tmp, bld, 3));
- }
}
fs_reg
}
case nir_intrinsic_load_input: {
- fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+ fs_reg src = fs_reg(ATTR, nir_intrinsic_base(instr) * 4, dest.type);
unsigned first_component = nir_intrinsic_component(instr);
unsigned num_components = instr->num_components;
enum brw_reg_type type = dest.type;
/* Arbitrarily only push up to 32 vec4 slots worth of data,
* which is 16 registers (since each holds 2 vec4 slots).
*/
+ unsigned slot_count = 1;
+ if (type_sz(dest.type) == 8 && instr->num_components > 2)
+ slot_count++;
+
const unsigned max_push_slots = 32;
- if (imm_offset < max_push_slots) {
+ if (imm_offset + slot_count <= max_push_slots) {
fs_reg src = fs_reg(ATTR, imm_offset / 2, dest.type);
for (int i = 0; i < instr->num_components; i++) {
unsigned comp = 16 / type_sz(dest.type) * (imm_offset % 2) +
i + first_component;
bld.MOV(offset(dest, bld, i), component(src, comp));
}
+
tes_prog_data->base.urb_read_length =
MAX2(tes_prog_data->base.urb_read_length,
- DIV_ROUND_UP(imm_offset + 1, 2));
+ DIV_ROUND_UP(imm_offset + slot_count, 2));
} else {
/* Replicate the patch handle to all enabled channels */
const fs_reg srcs[] = {
* expected by our 32-bit write messages.
*/
unsigned type_size = 4;
- unsigned bit_size = instr->src[0].is_ssa ?
- instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
- if (bit_size == 64) {
+ if (nir_src_bit_size(instr->src[0]) == 64) {
type_size = 8;
fs_reg tmp =
fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type);
(instr->num_components - 1) * type_sz(dest.type);
bool supports_64bit_indirects =
- !devinfo->is_cherryview && !devinfo->is_broxton;
+ !devinfo->is_cherryview && !gen_device_info_is_9lp(devinfo);
if (type_sz(dest.type) != 8 || supports_64bit_indirects) {
for (unsigned j = 0; j < instr->num_components; j++) {
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
+
+ /* See if we've selected this as a push constant candidate */
+ if (const_index) {
+ const unsigned ubo_block = const_index->u32[0];
+ const unsigned offset_256b = const_offset->u32[0] / 32;
+
+ fs_reg push_reg;
+ for (int i = 0; i < 4; i++) {
+ const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+ if (range->block == ubo_block &&
+ offset_256b >= range->start &&
+ offset_256b < range->start + range->length) {
+
+ push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
+ push_reg.offset = const_offset->u32[0] - 32 * range->start;
+ break;
+ }
+ }
+
+ if (push_reg.file != BAD_FILE) {
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ bld.MOV(offset(dest, bld, i),
+ byte_offset(push_reg, i * type_size));
+ }
+ break;
+ }
+ }
+
const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
* expected by our 32-bit write messages.
*/
unsigned type_size = 4;
- unsigned bit_size = instr->src[0].is_ssa ?
- instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
- if (bit_size == 64) {
+ if (nir_src_bit_size(instr->src[0]) == 64) {
type_size = 8;
fs_reg tmp =
fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type);
unsigned num_components = instr->num_components;
unsigned first_component = nir_intrinsic_component(instr);
- unsigned bit_size = instr->src[0].is_ssa ?
- instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
- if (bit_size == 64) {
+ if (nir_src_bit_size(instr->src[0]) == 64) {
fs_reg tmp =
fs_reg(VGRF, alloc.allocate(2 * num_components),
BRW_REGISTER_TYPE_F);
break;
}
- case nir_intrinsic_load_channel_num: {
+ case nir_intrinsic_load_subgroup_size:
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width));
+ break;
+
+ case nir_intrinsic_load_subgroup_invocation: {
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
dest = retype(dest, BRW_REGISTER_TYPE_UD);
const fs_builder allbld8 = bld.group(8, 0).exec_all();
break;
}
+ case nir_intrinsic_load_subgroup_eq_mask:
+ case nir_intrinsic_load_subgroup_ge_mask:
+ case nir_intrinsic_load_subgroup_gt_mask:
+ case nir_intrinsic_load_subgroup_le_mask:
+ case nir_intrinsic_load_subgroup_lt_mask:
+ unreachable("not reached");
+
+ case nir_intrinsic_vote_any: {
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ }
+ bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H :
+ BRW_PREDICATE_ALIGN1_ANY32H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+ case nir_intrinsic_vote_all: {
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
+ bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+ case nir_intrinsic_vote_eq: {
+ fs_reg value = get_nir_src(instr->src[0]);
+ fs_reg uniformized = bld.emit_uniformize(value);
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
+ bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+
+ case nir_intrinsic_ballot: {
+ const fs_reg value = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+ struct brw_reg flag = brw_flag_reg(0, 0);
+ /* FIXME: For SIMD32 programs, this causes us to stomp on f0.1 as well
+ * as f0.0. This is a problem for fragment programs as we currently use
+ * f0.1 for discards. Fortunately, we don't support SIMD32 fragment
+ * programs yet so this isn't a problem. When we do, something will
+ * have to change.
+ */
+ if (dispatch_width == 32)
+ flag.type = BRW_REGISTER_TYPE_UD;
+
+ bld.exec_all().MOV(flag, brw_imm_ud(0u));
+ bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
+
+ if (instr->dest.ssa.bit_size > 32) {
+ dest.type = BRW_REGISTER_TYPE_UQ;
+ } else {
+ dest.type = BRW_REGISTER_TYPE_UD;
+ }
+ bld.MOV(dest, flag);
+ break;
+ }
+
+ case nir_intrinsic_read_invocation: {
+ const fs_reg value = get_nir_src(instr->src[0]);
+ const fs_reg invocation = get_nir_src(instr->src[1]);
+ fs_reg tmp = bld.vgrf(value.type);
+
+ bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value,
+ component(invocation, 0));
+
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ fs_reg(component(tmp, 0)));
+ break;
+ }
+
+ case nir_intrinsic_read_first_invocation: {
+ const fs_reg value = get_nir_src(instr->src[0]);
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ bld.emit_uniformize(value));
+ break;
+ }
+
default:
unreachable("unknown intrinsic");
}
unreachable("unknown texture opcode");
}
- /* TXS and TXL require a LOD but not everything we implement using those
- * two opcodes provides one. Provide a default LOD of 0.
- */
- if ((opcode == SHADER_OPCODE_TXS_LOGICAL ||
- opcode == SHADER_OPCODE_TXL_LOGICAL) &&
- srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE) {
- srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
- }
-
if (instr->op == nir_texop_tg4) {
if (instr->component == 1 &&
key_tex->gather_channel_quirk_mask & (1 << texture)) {