i965/fs: Handle non-const sample number in interpolateAtSample
authorNeil Roberts <neil@linux.intel.com>
Fri, 17 Jul 2015 13:40:03 +0000 (14:40 +0100)
committerNeil Roberts <neil@linux.intel.com>
Fri, 9 Oct 2015 13:13:40 +0000 (15:13 +0200)
If a non-const sample number is given to interpolateAtSample it will
now generate an indirect send message with the sample ID similar to
how non-const sampler array indexing works. Previously non-const
values were ignored and instead it ended up using a constant 0 value.

The generator will try to determine if the sample ID is dynamically
uniform via nir_src_is_dynamically_uniform. If not it will query the
pixel interpolator in a loop, once for each different live sample
number. The next live sample number is found using emit_uniformize. If
multiple live channels have the same sample number then they will be
handled in a single iteration of the loop. The loop is necessary
because the indirect send message doesn't seem to have a way to
specify a different value for each fragment.

This fixes the following two Piglit tests:

arb_gpu_shader5-interpolateAtSample-nonconst
arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform

v2: Handle dynamically non-uniform sample ids.
v3: Remove the BREAK instruction and predicate the WHILE directly.
    Make the tokens arrays const. (Matt Turner)
v4: Iterate over the live channels instead of each possible sample
    number.
v5: Don't special case immediate values in
    brw_pixel_interpolator_query. Make a better wrapper for the
    function to set up the PI send instruction. Ensure that the SHL
    instructions are scalar. (Francisco Jerez).

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 761aa0ec5fa892aa5115588be8b0c3058f2bd9dd..0ac1ad9378bba242269b165f21aad8ed4bd70193 100644 (file)
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length);
 
index dc699bb63210384bebef1caefa156ed1ab48c06a..bf2fee9ed4818c933ef08a37dca5eeb187b05e5e 100644 (file)
@@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length)
 {
    const struct brw_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
-
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, mrf);
-   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
-                              msg_length, response_length,
-                              false /* header is never present for PI */,
-                              false);
+   struct brw_inst *insn;
+   const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
 
-   brw_inst_set_pi_simd_mode(
-         devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+   /* brw_send_indirect_message will automatically use a direct send message
+    * if data is actually immediate.
+    */
+   insn = brw_send_indirect_message(p,
+                                    GEN7_SFID_PIXEL_INTERPOLATOR,
+                                    dest,
+                                    mrf,
+                                    vec1(data));
+   brw_inst_set_mlen(devinfo, insn, msg_length);
+   brw_inst_set_rlen(devinfo, insn, response_length);
+
+   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
    brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
    brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
    brw_inst_set_pi_message_type(devinfo, insn, mode);
-   brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
 void
index 6f8b75e339f1f68e923cc1617d4b4b957481cf97..17e19cf807ac6aef14552cb025d19ea4c6da893d 100644 (file)
@@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
                                                 struct brw_reg msg_data,
                                                 unsigned msg_type)
 {
-   assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
-          msg_data.type == BRW_REGISTER_TYPE_UD);
+   assert(msg_data.type == BRW_REGISTER_TYPE_UD);
 
    brw_pixel_interpolator_query(p,
          retype(dst, BRW_REGISTER_TYPE_UW),
          src,
          inst->pi_noperspective,
          msg_type,
-         msg_data.dw1.ud,
+         msg_data,
          inst->mlen,
          inst->regs_written);
 }
index 03fe680470100dd731d7e0ecac1746ea2a9c0604..bc0df6850c41a05b0aced99b2ecbab6ddca1dfec 100644 (file)
@@ -1180,6 +1180,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
    }
 }
 
+static fs_inst *
+emit_pixel_interpolater_send(const fs_builder &bld,
+                             enum opcode opcode,
+                             const fs_reg &dst,
+                             const fs_reg &src,
+                             const fs_reg &desc,
+                             glsl_interp_qualifier interpolation)
+{
+   fs_inst *inst;
+   fs_reg payload;
+   int mlen;
+
+   if (src.file == BAD_FILE) {
+      /* Dummy payload */
+      payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1);
+      mlen = 1;
+   } else {
+      payload = src;
+      mlen = 2 * bld.dispatch_width() / 8;
+   }
+
+   inst = bld.emit(opcode, dst, payload, desc);
+   inst->mlen = mlen;
+   /* 2 floats per slot returned */
+   inst->regs_written = 2 * bld.dispatch_width() / 8;
+   inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE;
+
+   return inst;
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
@@ -1583,28 +1613,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
 
       fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-
-      /* For most messages, we need one reg of ignored data; the hardware
-       * requires mlen==1 even when there is no payload. in the per-slot
-       * offset case, we'll replace this with the proper source data.
-       */
-      fs_reg src = vgrf(glsl_type::float_type);
-      int mlen = 1;     /* one reg unless overriden */
-      fs_inst *inst;
+      const glsl_interp_qualifier interpolation =
+         (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
 
       switch (instr->intrinsic) {
       case nir_intrinsic_interp_var_at_centroid:
-         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
-                         dst_xy, src, fs_reg(0u));
+         emit_pixel_interpolater_send(bld,
+                                      FS_OPCODE_INTERPOLATE_AT_CENTROID,
+                                      dst_xy,
+                                      fs_reg(), /* src */
+                                      fs_reg(0u),
+                                      interpolation);
          break;
 
       case nir_intrinsic_interp_var_at_sample: {
-         /* XXX: We should probably handle non-constant sample id's */
          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-         assert(const_sample);
-         unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
-         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
-                         fs_reg(msg_data));
+
+         if (const_sample) {
+            unsigned msg_data = const_sample->i[0] << 4;
+
+            emit_pixel_interpolater_send(bld,
+                                         FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                         dst_xy,
+                                         fs_reg(), /* src */
+                                         fs_reg(msg_data),
+                                         interpolation);
+         } else {
+            const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+                                             BRW_REGISTER_TYPE_UD);
+
+            if (nir_src_is_dynamically_uniform(instr->src[0])) {
+               const fs_reg sample_id = bld.emit_uniformize(sample_src);
+               const fs_reg msg_data = vgrf(glsl_type::uint_type);
+               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               emit_pixel_interpolater_send(bld,
+                                            FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                            dst_xy,
+                                            fs_reg(), /* src */
+                                            msg_data,
+                                            interpolation);
+            } else {
+               /* Make a loop that sends a message to the pixel interpolater
+                * for the sample number in each live channel. If there are
+                * multiple channels with the same sample number then these
+                * will be handled simultaneously with a single interation of
+                * the loop.
+                */
+               bld.emit(BRW_OPCODE_DO);
+
+               /* Get the next live sample number into sample_id_reg */
+               const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+               /* Set the flag register so that we can perform the send
+                * message on all channels that have the same sample number
+                */
+               bld.CMP(bld.null_reg_ud(),
+                       sample_src, sample_id,
+                       BRW_CONDITIONAL_EQ);
+               const fs_reg msg_data = vgrf(glsl_type::uint_type);
+               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               fs_inst *inst =
+                  emit_pixel_interpolater_send(bld,
+                                               FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                               dst_xy,
+                                               fs_reg(), /* src */
+                                               msg_data,
+                                               interpolation);
+               set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+               /* Continue the loop if there are any live channels left */
+               set_predicate_inv(BRW_PREDICATE_NORMAL,
+                                 true, /* inverse */
+                                 bld.emit(BRW_OPCODE_WHILE));
+            }
+         }
+
          break;
       }
 
@@ -1615,10 +1698,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
             unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
             unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
 
-            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
-                            fs_reg(off_x | (off_y << 4)));
+            emit_pixel_interpolater_send(bld,
+                                         FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+                                         dst_xy,
+                                         fs_reg(), /* src */
+                                         fs_reg(off_x | (off_y << 4)),
+                                         interpolation);
          } else {
-            src = vgrf(glsl_type::ivec2_type);
+            fs_reg src = vgrf(glsl_type::ivec2_type);
             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
                                        BRW_REGISTER_TYPE_F);
             for (int i = 0; i < 2; i++) {
@@ -1646,9 +1733,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                            bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
             }
 
-            mlen = 2 * dispatch_width / 8;
-            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
-                            fs_reg(0u));
+            const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+            emit_pixel_interpolater_send(bld,
+                                         opcode,
+                                         dst_xy,
+                                         src,
+                                         fs_reg(0u),
+                                         interpolation);
          }
          break;
       }
@@ -1657,12 +1748,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          unreachable("Invalid intrinsic");
       }
 
-      inst->mlen = mlen;
-      /* 2 floats per slot returned */
-      inst->regs_written = 2 * dispatch_width / 8;
-      inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
-                               INTERP_QUALIFIER_NOPERSPECTIVE;
-
       for (unsigned j = 0; j < instr->num_components; j++) {
          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
          src.type = dest.type;