intel/fs: Switch to standard vector layout for barycentrics at optimization time.

author Francisco Jerez <currojerez@riseup.net>

Sat, 4 Jan 2020 01:08:51 +0000 (17:08 -0800)

committer Francisco Jerez <currojerez@riseup.net>

Fri, 17 Jan 2020 21:23:12 +0000 (13:23 -0800)
author Francisco Jerez <currojerez@riseup.net>
Sat, 4 Jan 2020 01:08:51 +0000 (17:08 -0800)
committer Francisco Jerez <currojerez@riseup.net>
Fri, 17 Jan 2020 21:23:12 +0000 (13:23 -0800)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index f10df1dcbeb93f7f0d21457bfc04fc3ce37df4e1..97f47ab92c2e9f081d9bba8980a98817304ddac9 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6766,6 +6766,21 @@ fs_visitor::lower_barycentrics()
        const fs_builder ubld = ibld.exec_all().group(8, 0);
  
        switch (inst->opcode) {
+      case FS_OPCODE_LINTERP : {
+         assert(inst->exec_size == 16);
+         const fs_reg tmp = ibld.vgrf(inst->src[0].type, 2);
+         fs_reg srcs[4];
+
+         for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)
+            srcs[i] = horiz_offset(offset(inst->src[0], ibld, i % 2),
+                                   8 * (i / 2));
+
+         ubld.LOAD_PAYLOAD(tmp, srcs, ARRAY_SIZE(srcs), ARRAY_SIZE(srcs));
+
+         inst->src[0] = tmp;
+         progress = true;
+         break;
+      }
        case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
        case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
        case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h

index d84f99db036fb8e800d861a8f35e7fd569757815..a682fac9aa611bba76e99685412c075f86f7db6e 100644 (file)
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -571,13 +571,14 @@ namespace brw {
           return fs_reg();
  
        const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-      const brw::fs_builder hbld = bld.exec_all().group(16, 0);
+      const brw::fs_builder hbld = bld.exec_all().group(8, 0);
        const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
        fs_reg *const components = new fs_reg[2 * m];
  
        for (unsigned c = 0; c < 2; c++) {
           for (unsigned g = 0; g < m; g++)
-            components[c * m + g] = offset(brw_vec8_grf(regs[g], 0), hbld, c);
+            components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
+                                           hbld, c + 2 * (g % 2));
        }
  
        hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index ffaf90764f5c92f76b5ddeb625d5d46cf4b58693..3bed5406576b672b2ce096506706a24a0c1b0072 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3313,44 +3313,6 @@ alloc_frag_output(fs_visitor *v, unsigned location)
        unreachable("Invalid location");
  }
  
-/* Annoyingly, we get the barycentrics into the shader in a layout that's
- * optimized for PLN but it doesn't work nearly as well as one would like for
- * manual interpolation.
- */
-static void
-shuffle_from_pln_layout(const fs_builder &bld, fs_reg dest, fs_reg pln_data)
-{
-   dest.type = BRW_REGISTER_TYPE_F;
-   pln_data.type = BRW_REGISTER_TYPE_F;
-   const fs_reg dest_u = offset(dest, bld, 0);
-   const fs_reg dest_v = offset(dest, bld, 1);
-
-   for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
-      const fs_builder gbld = bld.group(8, g);
-      gbld.MOV(horiz_offset(dest_u, g * 8),
-               byte_offset(pln_data, (g * 2 + 0) * REG_SIZE));
-      gbld.MOV(horiz_offset(dest_v, g * 8),
-               byte_offset(pln_data, (g * 2 + 1) * REG_SIZE));
-   }
-}
-
-static void
-shuffle_to_pln_layout(const fs_builder &bld, fs_reg pln_data, fs_reg src)
-{
-   pln_data.type = BRW_REGISTER_TYPE_F;
-   src.type = BRW_REGISTER_TYPE_F;
-   const fs_reg src_u = offset(src, bld, 0);
-   const fs_reg src_v = offset(src, bld, 1);
-
-   for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) {
-      const fs_builder gbld = bld.group(8, g);
-      gbld.MOV(byte_offset(pln_data, (g * 2 + 0) * REG_SIZE),
-               horiz_offset(src_u, g * 8));
-      gbld.MOV(byte_offset(pln_data, (g * 2 + 1) * REG_SIZE),
-               horiz_offset(src_v, g * 8));
-   }
-}
-
  void
  fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                    nir_intrinsic_instr *instr)
@@ -3565,8 +3527,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
           (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
        enum brw_barycentric_mode bary =
           brw_barycentric_mode(interp_mode, instr->intrinsic);
-
-      shuffle_from_pln_layout(bld, dest, this->delta_xy[bary]);
+      const fs_reg srcs[] = { offset(this->delta_xy[bary], bld, 0),
+                              offset(this->delta_xy[bary], bld, 1) };
+      bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
        break;
     }
  
@@ -3711,18 +3674,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
  
        if (bary_intrin == nir_intrinsic_load_barycentric_at_offset ||
            bary_intrin == nir_intrinsic_load_barycentric_at_sample) {
-         /* Use the result of the PI message.  Because the load_barycentric
-          * intrinsics return a regular vec2 and we need it in PLN layout, we
-          * have to do a translation.  Fortunately, copy-prop cleans this up
-          * reliably.
-          */
-         dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-         shuffle_to_pln_layout(bld, dst_xy, get_nir_src(instr->src[0]));
+         /* Use the result of the PI message. */
+         dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F);
        } else {
           /* Use the delta_xy values computed from the payload */
           enum brw_barycentric_mode bary =
              brw_barycentric_mode(interp_mode, bary_intrin);
-
           dst_xy = this->delta_xy[bary];
        }
  
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp

index 476a9c64a5b7fe8efc42e2fd797a151aafa6a4d2..81d0e466cc7b353ee3ff634ca26961e9839cd70f 100644 (file)
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -176,11 +176,11 @@ fs_visitor::emit_interpolation_setup_gen4()
     const fs_reg xstart(negate(brw_vec1_grf(1, 0)));
     const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
  
-   if (devinfo->has_pln && dispatch_width == 16) {
-      for (unsigned i = 0; i < 2; i++) {
-         abld.half(i).ADD(half(offset(delta_xy, abld, i), 0),
+   if (devinfo->has_pln) {
+      for (unsigned i = 0; i < dispatch_width / 8; i++) {
+         abld.half(i).ADD(half(offset(delta_xy, abld, 0), i),
                            half(this->pixel_x, i), xstart);
-         abld.half(i).ADD(half(offset(delta_xy, abld, i), 1),
+         abld.half(i).ADD(half(offset(delta_xy, abld, 1), i),
                            half(this->pixel_y, i), ystart);
        }
     } else {
@@ -358,11 +358,10 @@ fs_visitor::emit_interpolation_setup_gen6()
  
           for (unsigned c = 0; c < 2; c++) {
              for (unsigned q = 0; q < dispatch_width / 8; q++) {
-               const unsigned idx = c + (q & 2) + (q & 1) * dispatch_width / 8;
                 set_predicate(BRW_PREDICATE_NORMAL,
-                  bld.half(q).SEL(horiz_offset(delta_xy[i], idx * 8),
-                                  horiz_offset(centroid_delta_xy, idx * 8),
-                                  horiz_offset(pixel_delta_xy, idx * 8)));
+                  bld.half(q).SEL(half(offset(delta_xy[i], bld, c), q),
+                                  half(offset(centroid_delta_xy, bld, c), q),
+                                  half(offset(pixel_delta_xy, bld, c), q)));
              }
           }
        }
author	Francisco Jerez <currojerez@riseup.net>
	Sat, 4 Jan 2020 01:08:51 +0000 (17:08 -0800)
committer	Francisco Jerez <currojerez@riseup.net>
	Fri, 17 Jan 2020 21:23:12 +0000 (13:23 -0800)
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_fs.h		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history
src/intel/compiler/brw_fs_visitor.cpp		patch \| blob \| history