unreachable("should be lowered by lower_vertex_id().");
case nir_intrinsic_load_vertex_id_zero_base:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
- break;
-
case nir_intrinsic_load_base_vertex:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
- break;
-
case nir_intrinsic_load_instance_id:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
- break;
-
case nir_intrinsic_load_base_instance:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
- break;
-
case nir_intrinsic_load_draw_id:
- assert(v->stage == MESA_SHADER_VERTEX);
- reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
- if (reg->file == BAD_FILE)
- *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
- break;
+ unreachable("should be lowered by brw_nir_lower_vs_inputs().");
case nir_intrinsic_load_invocation_id:
if (v->stage == MESA_SHADER_TESS_CTRL)
}
switch (instr->op) {
- case nir_op_i2f:
- case nir_op_u2f:
- case nir_op_i642d:
- case nir_op_u642d:
+ case nir_op_i2f32:
+ case nir_op_u2f32:
if (optimize_extract_to_float(instr, result))
return;
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_f2d:
- case nir_op_i2d:
- case nir_op_u2d:
+ case nir_op_f2f64:
+ case nir_op_i2f64:
+ case nir_op_u2f64:
/* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
*
* "When source or destination is 64b (...), regioning in Align1
*/
if (nir_dest_bit_size(instr->dest.dest) == 64 &&
nir_src_bit_size(instr->src[0].src) == 32 &&
- (devinfo->is_cherryview || devinfo->is_broxton)) {
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
fs_reg tmp = bld.vgrf(result.type, 1);
tmp = subscript(tmp, op[0].type, 0);
inst = bld.MOV(tmp, op[0]);
break;
}
/* fallthrough */
+ case nir_op_f2f32:
+ case nir_op_f2i32:
+ case nir_op_f2u32:
case nir_op_f2i64:
case nir_op_f2u64:
- case nir_op_i2i64:
- case nir_op_i2u64:
- case nir_op_u2i64:
- case nir_op_u2u64:
- case nir_op_b2i64:
- case nir_op_d2f:
- case nir_op_d2i:
- case nir_op_d2u:
- case nir_op_i642f:
- case nir_op_u642f:
- case nir_op_u2i32:
case nir_op_i2i32:
+ case nir_op_i2i64:
case nir_op_u2u32:
- case nir_op_i2u32:
- if (instr->op == nir_op_b2i64) {
- bld.MOV(result, negate(op[0]));
- } else {
- inst = bld.MOV(result, op[0]);
- inst->saturate = instr->dest.saturate;
- }
- break;
-
- case nir_op_f2i:
- case nir_op_f2u:
- bld.MOV(result, op[0]);
+ case nir_op_u2u64:
+ inst = bld.MOV(result, op[0]);
+ inst->saturate = instr->dest.saturate;
break;
case nir_op_fsign: {
bld.MOV(result, negate(op[0]));
break;
+ case nir_op_i2b:
case nir_op_f2b:
- bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
- break;
-
- case nir_op_i642b:
- case nir_op_d2b: {
- /* two-argument instructions can't take 64-bit immediates */
- fs_reg zero;
- fs_reg tmp;
+ if (nir_src_bit_size(instr->src[0].src) == 64) {
+ /* two-argument instructions can't take 64-bit immediates */
+ fs_reg zero;
+ fs_reg tmp;
+
+ if (instr->op == nir_op_f2b) {
+ zero = vgrf(glsl_type::double_type);
+ tmp = vgrf(glsl_type::double_type);
+ } else {
+ zero = vgrf(glsl_type::int64_t_type);
+ tmp = vgrf(glsl_type::int64_t_type);
+ }
- if (instr->op == nir_op_d2b) {
- zero = vgrf(glsl_type::double_type);
- tmp = vgrf(glsl_type::double_type);
+ bld.MOV(zero, setup_imm_df(bld, 0.0));
+ /* A SIMD16 execution needs to be split in two instructions, so use
+ * a vgrf instead of the flag register as dst so instruction splitting
+ * works
+ */
+ bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
+ bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
} else {
- zero = vgrf(glsl_type::int64_t_type);
- tmp = vgrf(glsl_type::int64_t_type);
+ if (instr->op == nir_op_f2b) {
+ bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
+ } else {
+ bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ }
}
-
- bld.MOV(zero, setup_imm_df(bld, 0.0));
- /* A SIMD16 execution needs to be split in two instructions, so use
- * a vgrf instead of the flag register as dst so instruction splitting
- * works
- */
- bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
- bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
- break;
- }
- case nir_op_i2b:
- bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
break;
case nir_op_ftrunc:
assert(gs_compile->control_data_bits_per_vertex == 2);
/* Must be a valid stream */
- assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+ assert(stream_id < MAX_VERTEX_STREAMS);
/* Control data bits are initialized to 0 so we don't have to set any
* bits when sending vertices to stream 0.
* be recorded by transform feedback, we can simply discard all geometry
* bound to these streams when transform feedback is disabled.
*/
- if (stream_id > 0 && !nir->info->has_transform_feedback_varyings)
+ if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
return;
/* If we're outputting 32 control data bits or less, then we can wait
nir_const_value *offset_const = nir_src_as_const_value(offset_src);
const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
- /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
- * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only
- * gl_PointSize is available as a GS input, however, so it must be that.
- */
- const bool is_point_size = (base_offset == 0);
-
/* TODO: figure out push input layout for invocations == 1 */
if (gs_prog_data->invocations == 1 &&
offset_const != NULL && vertex_const != NULL &&
4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
vertex_const->u32[0] * push_reg_count;
- /* This input was pushed into registers. */
- if (is_point_size) {
- /* gl_PointSize comes in .w */
- bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
- } else {
- for (unsigned i = 0; i < num_components; i++) {
- bld.MOV(offset(dst, bld, i),
- fs_reg(ATTR, imm_offset + i + first_component, dst.type));
- }
+ for (unsigned i = 0; i < num_components; i++) {
+ bld.MOV(offset(dst, bld, i),
+ fs_reg(ATTR, imm_offset + i + first_component, dst.type));
}
return;
}
/* Use first_icp_handle as the base offset. There is one register
* of URB handles per vertex, so inform the register allocator that
- * we might read up to nir->info->gs.vertices_in registers.
+ * we might read up to nir->info.gs.vertices_in registers.
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
fs_reg(icp_offset_bytes),
- brw_imm_ud(nir->info->gs.vertices_in * REG_SIZE));
+ brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
}
} else {
assert(gs_prog_data->invocations > 1);
/* Use first_icp_handle as the base offset. There is one DWord
* of URB handles per vertex, so inform the register allocator that
- * we might read up to ceil(nir->info->gs.vertices_in / 8) registers.
+ * we might read up to ceil(nir->info.gs.vertices_in / 8) registers.
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
fs_reg(icp_offset_bytes),
- brw_imm_ud(DIV_ROUND_UP(nir->info->gs.vertices_in, 8) *
+ brw_imm_ud(DIV_ROUND_UP(nir->info.gs.vertices_in, 8) *
REG_SIZE));
}
}
}
}
}
-
- if (is_point_size) {
- /* Read the whole VUE header (because of alignment) and read .w. */
- fs_reg tmp = bld.vgrf(dst.type, 4);
- inst->dst = tmp;
- inst->size_written = 4 * REG_SIZE;
- bld.MOV(dst, offset(tmp, bld, 3));
- }
}
fs_reg
}
case nir_intrinsic_load_input: {
- fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+ fs_reg src = fs_reg(ATTR, nir_intrinsic_base(instr) * 4, dest.type);
unsigned first_component = nir_intrinsic_component(instr);
unsigned num_components = instr->num_components;
enum brw_reg_type type = dest.type;
(instr->num_components - 1) * type_sz(dest.type);
bool supports_64bit_indirects =
- !devinfo->is_cherryview && !devinfo->is_broxton;
+ !devinfo->is_cherryview && !gen_device_info_is_9lp(devinfo);
if (type_sz(dest.type) != 8 || supports_64bit_indirects) {
for (unsigned j = 0; j < instr->num_components; j++) {
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- nir->info->num_ubos - 1);
+ nir->info.num_ubos - 1);
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
+
+ /* See if we've selected this as a push constant candidate */
+ if (const_index) {
+ const unsigned ubo_block = const_index->u32[0];
+ const unsigned offset_256b = const_offset->u32[0] / 32;
+
+ fs_reg push_reg;
+ for (int i = 0; i < 4; i++) {
+ const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+ if (range->block == ubo_block &&
+ offset_256b >= range->start &&
+ offset_256b < range->start + range->length) {
+
+ push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
+ push_reg.offset = const_offset->u32[0] - 32 * range->start;
+ break;
+ }
+ }
+
+ if (push_reg.file != BAD_FILE) {
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ bld.MOV(offset(dest, bld, i),
+ byte_offset(push_reg, i * type_size));
+ }
+ break;
+ }
+ }
+
const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info->num_ssbos - 1);
+ nir->info.num_ssbos - 1);
}
fs_reg offset_reg;
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info->num_ssbos - 1);
+ nir->info.num_ssbos - 1);
}
/* Value */
break;
}
- case nir_intrinsic_load_channel_num: {
+ case nir_intrinsic_load_subgroup_size:
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width));
+ break;
+
+ case nir_intrinsic_load_subgroup_invocation: {
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
dest = retype(dest, BRW_REGISTER_TYPE_UD);
const fs_builder allbld8 = bld.group(8, 0).exec_all();
break;
}
+ case nir_intrinsic_load_subgroup_eq_mask:
+ case nir_intrinsic_load_subgroup_ge_mask:
+ case nir_intrinsic_load_subgroup_gt_mask:
+ case nir_intrinsic_load_subgroup_le_mask:
+ case nir_intrinsic_load_subgroup_lt_mask:
+ unreachable("not reached");
+
+ case nir_intrinsic_vote_any: {
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ?
+ BRW_PREDICATE_ALIGN1_ANY8H :
+ BRW_PREDICATE_ALIGN1_ANY16H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+ case nir_intrinsic_vote_all: {
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ?
+ BRW_PREDICATE_ALIGN1_ALL8H :
+ BRW_PREDICATE_ALIGN1_ALL16H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+ case nir_intrinsic_vote_eq: {
+ fs_reg value = get_nir_src(instr->src[0]);
+ fs_reg uniformized = bld.emit_uniformize(value);
+ const fs_builder ubld = bld.exec_all();
+
+ /* The any/all predicates do not consider channel enables. To prevent
+ * dead channels from affecting the result, we initialize the flag with
+ * with the identity value for the logical operation.
+ */
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
+ bld.MOV(dest, brw_imm_d(-1));
+ set_predicate(dispatch_width == 8 ?
+ BRW_PREDICATE_ALIGN1_ALL8H :
+ BRW_PREDICATE_ALIGN1_ALL16H,
+ bld.SEL(dest, dest, brw_imm_d(0)));
+ break;
+ }
+
+ case nir_intrinsic_ballot: {
+ const fs_reg value = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+ const struct brw_reg flag = retype(brw_flag_reg(0, 0),
+ BRW_REGISTER_TYPE_UD);
+
+ bld.exec_all().MOV(flag, brw_imm_ud(0u));
+ bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
+
+ if (instr->dest.ssa.bit_size > 32) {
+ dest.type = BRW_REGISTER_TYPE_UQ;
+ } else {
+ dest.type = BRW_REGISTER_TYPE_UD;
+ }
+ bld.MOV(dest, flag);
+ break;
+ }
+
+ case nir_intrinsic_read_invocation: {
+ const fs_reg value = get_nir_src(instr->src[0]);
+ const fs_reg invocation = get_nir_src(instr->src[1]);
+ fs_reg tmp = bld.vgrf(value.type);
+
+ bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value,
+ component(invocation, 0));
+
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ fs_reg(component(tmp, 0)));
+ break;
+ }
+
+ case nir_intrinsic_read_first_invocation: {
+ const fs_reg value = get_nir_src(instr->src[0]);
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ bld.emit_uniformize(value));
+ break;
+ }
+
default:
unreachable("unknown intrinsic");
}
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info->num_ssbos - 1);
+ nir->info.num_ssbos - 1);
}
fs_reg offset = get_nir_src(instr->src[1]);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
- if (instr->op == nir_texop_query_levels ||
- (instr->op == nir_texop_tex && stage != MESA_SHADER_FRAGMENT)) {
- /* textureQueryLevels() and texture() are implemented in terms of TXS
- * and TXL respectively, so we need to pass a valid LOD argument.
- */
- assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
- srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
- }
-
enum opcode opcode;
switch (instr->op) {
case nir_texop_tex:
unreachable("unknown texture opcode");
}
+ /* TXS and TXL require a LOD but not everything we implement using those
+ * two opcodes provides one. Provide a default LOD of 0.
+ */
+ if ((opcode == SHADER_OPCODE_TXS_LOGICAL ||
+ opcode == SHADER_OPCODE_TXL_LOGICAL) &&
+ srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE) {
+ srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
+ }
+
if (instr->op == nir_texop_tg4) {
if (instr->component == 1 &&
key_tex->gather_channel_quirk_mask & (1 << texture)) {