const fs_builder ubld = ibld.exec_all().group(8, 0);
switch (inst->opcode) {
+ case FS_OPCODE_LINTERP : {
+ assert(inst->exec_size == 16);
+ const fs_reg tmp = ibld.vgrf(inst->src[0].type, 2);
+ fs_reg srcs[4];
+
+ for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)
+ srcs[i] = horiz_offset(offset(inst->src[0], ibld, i % 2),
+ 8 * (i / 2));
+
+ ubld.LOAD_PAYLOAD(tmp, srcs, ARRAY_SIZE(srcs), ARRAY_SIZE(srcs));
+
+ inst->src[0] = tmp;
+ progress = true;
+ break;
+ }
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: {
return fs_reg();
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
- const brw::fs_builder hbld = bld.exec_all().group(16, 0);
+ const brw::fs_builder hbld = bld.exec_all().group(8, 0);
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
fs_reg *const components = new fs_reg[2 * m];
for (unsigned c = 0; c < 2; c++) {
for (unsigned g = 0; g < m; g++)
- components[c * m + g] = offset(brw_vec8_grf(regs[g], 0), hbld, c);
+ components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
+ hbld, c + 2 * (g % 2));
}
hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
unreachable("Invalid location");
}
-/* Annoyingly, we get the barycentrics into the shader in a layout that's
- * optimized for PLN but it doesn't work nearly as well as one would like for
- * manual interpolation.
- */
-static void
-shuffle_from_pln_layout(const fs_builder &bld, fs_reg dest, fs_reg pln_data)
-{
- dest.type = BRW_REGISTER_TYPE_F;
- pln_data.type = BRW_REGISTER_TYPE_F;
- const fs_reg dest_u = offset(dest, bld, 0);
- const fs_reg dest_v = offset(dest, bld, 1);
-
- for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
- const fs_builder gbld = bld.group(8, g);
- gbld.MOV(horiz_offset(dest_u, g * 8),
- byte_offset(pln_data, (g * 2 + 0) * REG_SIZE));
- gbld.MOV(horiz_offset(dest_v, g * 8),
- byte_offset(pln_data, (g * 2 + 1) * REG_SIZE));
- }
-}
-
-static void
-shuffle_to_pln_layout(const fs_builder &bld, fs_reg pln_data, fs_reg src)
-{
- pln_data.type = BRW_REGISTER_TYPE_F;
- src.type = BRW_REGISTER_TYPE_F;
- const fs_reg src_u = offset(src, bld, 0);
- const fs_reg src_v = offset(src, bld, 1);
-
- for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
- const fs_builder gbld = bld.group(8, g);
- gbld.MOV(byte_offset(pln_data, (g * 2 + 0) * REG_SIZE),
- horiz_offset(src_u, g * 8));
- gbld.MOV(byte_offset(pln_data, (g * 2 + 1) * REG_SIZE),
- horiz_offset(src_v, g * 8));
- }
-}
-
void
fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
enum brw_barycentric_mode bary =
brw_barycentric_mode(interp_mode, instr->intrinsic);
-
- shuffle_from_pln_layout(bld, dest, this->delta_xy[bary]);
+ const fs_reg srcs[] = { offset(this->delta_xy[bary], bld, 0),
+ offset(this->delta_xy[bary], bld, 1) };
+ bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
break;
}
if (bary_intrin == nir_intrinsic_load_barycentric_at_offset ||
bary_intrin == nir_intrinsic_load_barycentric_at_sample) {
- /* Use the result of the PI message. Because the load_barycentric
- * intrinsics return a regular vec2 and we need it in PLN layout, we
- * have to do a translation. Fortunately, copy-prop cleans this up
- * reliably.
- */
- dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
- shuffle_to_pln_layout(bld, dst_xy, get_nir_src(instr->src[0]));
+ /* Use the result of the PI message. */
+ dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F);
} else {
/* Use the delta_xy values computed from the payload */
enum brw_barycentric_mode bary =
brw_barycentric_mode(interp_mode, bary_intrin);
-
dst_xy = this->delta_xy[bary];
}
const fs_reg xstart(negate(brw_vec1_grf(1, 0)));
const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
- if (devinfo->has_pln && dispatch_width == 16) {
- for (unsigned i = 0; i < 2; i++) {
- abld.half(i).ADD(half(offset(delta_xy, abld, i), 0),
+ if (devinfo->has_pln) {
+ for (unsigned i = 0; i < dispatch_width / 8; i++) {
+ abld.half(i).ADD(half(offset(delta_xy, abld, 0), i),
half(this->pixel_x, i), xstart);
- abld.half(i).ADD(half(offset(delta_xy, abld, i), 1),
+ abld.half(i).ADD(half(offset(delta_xy, abld, 1), i),
half(this->pixel_y, i), ystart);
}
} else {
for (unsigned c = 0; c < 2; c++) {
for (unsigned q = 0; q < dispatch_width / 8; q++) {
- const unsigned idx = c + (q & 2) + (q & 1) * dispatch_width / 8;
set_predicate(BRW_PREDICATE_NORMAL,
- bld.half(q).SEL(horiz_offset(delta_xy[i], idx * 8),
- horiz_offset(centroid_delta_xy, idx * 8),
- horiz_offset(pixel_delta_xy, idx * 8)));
+ bld.half(q).SEL(half(offset(delta_xy[i], bld, c), q),
+ half(offset(centroid_delta_xy, bld, c), q),
+ half(offset(pixel_delta_xy, bld, c), q)));
}
}
}