return !failed;
}
+static bool
+is_used_in_not_interp_frag_coord(nir_ssa_def *def)
+{
+ nir_foreach_use(src, def) {
+ if (src->parent_instr->type != nir_instr_type_intrinsic)
+ return true;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
+ if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
+ return true;
+
+ if (nir_intrinsic_base(intrin) != VARYING_SLOT_POS)
+ return true;
+ }
+
+ nir_foreach_if_use(src, def)
+ return true;
+
+ return false;
+}
+
/**
* Return a bitfield where bit n is set if barycentric interpolation mode n
* (see enum brw_barycentric_mode) is needed by the fragment shader.
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid:
+ case nir_intrinsic_load_barycentric_sample:
+ break;
+ default:
continue;
+ }
/* Ignore WPOS; it doesn't require interpolation. */
- if (nir_intrinsic_base(intrin) == VARYING_SLOT_POS)
+ assert(intrin->dest.is_ssa);
+ if (!is_used_in_not_interp_frag_coord(&intrin->dest.ssa))
continue;
- intrin = nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
enum glsl_interp_mode interp = (enum glsl_interp_mode)
nir_intrinsic_interp_mode(intrin);
nir_intrinsic_op bary_op = intrin->intrinsic;
unreachable("Invalid location");
}
+/* Annoyingly, we get the barycentrics into the shader in a layout that's
+ * optimized for PLN but it doesn't work nearly as well as one would like for
+ * manual interpolation.
+ */
+static void
+shuffle_from_pln_layout(const fs_builder &bld, fs_reg dest, fs_reg pln_data)
+{
+ dest.type = BRW_REGISTER_TYPE_F;
+ pln_data.type = BRW_REGISTER_TYPE_F;
+ const fs_reg dest_u = offset(dest, bld, 0);
+ const fs_reg dest_v = offset(dest, bld, 1);
+
+ for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
+ const fs_builder gbld = bld.group(8, g);
+ gbld.MOV(horiz_offset(dest_u, g * 8),
+ byte_offset(pln_data, (g * 2 + 0) * REG_SIZE));
+ gbld.MOV(horiz_offset(dest_v, g * 8),
+ byte_offset(pln_data, (g * 2 + 1) * REG_SIZE));
+ }
+}
+
+static void
+shuffle_to_pln_layout(const fs_builder &bld, fs_reg pln_data, fs_reg src)
+{
+ pln_data.type = BRW_REGISTER_TYPE_F;
+ src.type = BRW_REGISTER_TYPE_F;
+ const fs_reg src_u = offset(src, bld, 0);
+ const fs_reg src_v = offset(src, bld, 1);
+
+ for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
+ const fs_builder gbld = bld.group(8, g);
+ gbld.MOV(byte_offset(pln_data, (g * 2 + 0) * REG_SIZE),
+ horiz_offset(src_u, g * 8));
+ gbld.MOV(byte_offset(pln_data, (g * 2 + 1) * REG_SIZE),
+ horiz_offset(src_v, g * 8));
+ }
+}
+
void
fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_centroid:
- case nir_intrinsic_load_barycentric_sample:
- /* Do nothing - load_interpolated_input handling will handle it later. */
+ case nir_intrinsic_load_barycentric_sample: {
+ /* Use the delta_xy values computed from the payload */
+ const glsl_interp_mode interp_mode =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
+ enum brw_barycentric_mode bary =
+ brw_barycentric_mode(interp_mode, instr->intrinsic);
+
+ shuffle_from_pln_layout(bld, dest, this->delta_xy[bary]);
break;
+ }
case nir_intrinsic_load_barycentric_at_sample: {
const glsl_interp_mode interpolation =
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
if (nir_src_is_const(instr->src[0])) {
unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dest,
+ tmp,
fs_reg(), /* src */
brw_imm_ud(msg_data),
interpolation);
.SHL(msg_data, sample_id, brw_imm_ud(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dest,
+ tmp,
fs_reg(), /* src */
msg_data,
interpolation);
fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dest,
+ tmp,
fs_reg(), /* src */
component(msg_data, 0),
interpolation);
bld.emit(BRW_OPCODE_WHILE));
}
}
+ shuffle_from_pln_layout(bld, dest, tmp);
break;
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
if (const_offset) {
assert(nir_src_bit_size(instr->src[0]) == 32);
unsigned off_x = MIN2((int)(const_offset[0].f32 * 16), 7) & 0xf;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
- dest,
+ tmp,
fs_reg(), /* src */
brw_imm_ud(off_x | (off_y << 4)),
interpolation);
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
emit_pixel_interpolater_send(bld,
opcode,
- dest,
+ tmp,
src,
brw_imm_ud(0u),
interpolation);
}
+ shuffle_from_pln_layout(bld, dest, tmp);
break;
}
if (bary_intrin == nir_intrinsic_load_barycentric_at_offset ||
bary_intrin == nir_intrinsic_load_barycentric_at_sample) {
- /* Use the result of the PI message */
- dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F);
+ /* Use the result of the PI message. Because the load_barycentric
+ * intrinsics return a regular vec2 and we need it in PLN layout, we
+ * have to do a translation. Fortunately, copy-prop cleans this up
+ * reliably.
+ */
+ dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
+ shuffle_to_pln_layout(bld, dst_xy, get_nir_src(instr->src[0]));
} else {
/* Use the delta_xy values computed from the payload */
enum brw_barycentric_mode bary =