intel/fs: Add a couple of simple helper opcodes

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 1 Sep 2017 04:45:30 +0000 (21:45 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Wed, 7 Mar 2018 20:13:47 +0000 (12:13 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 1 Sep 2017 04:45:30 +0000 (21:45 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Wed, 7 Mar 2018 20:13:47 +0000 (12:13 -0800)
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h

index 3449c73d771a45452ee2fa72b048208d86683dd7..3c4c538ac17a8f9a89ded8b83e73ceccda4d5afc 100644 (file)
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -460,6 +460,19 @@ enum opcode {
      */
     SHADER_OPCODE_SHUFFLE,
  
+   /* Select between src0 and src1 based on channel enables.
+    *
+    * This instruction copies src0 into the enabled channels of the
+    * destination and copies src1 into the disabled channels.
+    */
+   SHADER_OPCODE_SEL_EXEC,
+
+   /* Take every Nth element in src0 and broadcast it to the group of N
+    * channels in which it lives in the destination.  The offset within the
+    * cluster is given by src1 and the cluster size is given by src2.
+    */
+   SHADER_OPCODE_CLUSTER_BROADCAST,
+
     SHADER_OPCODE_GET_BUFFER_SIZE,
  
     VEC4_OPCODE_MOV_BYTES,
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index 47f1f6e9c9f0ca31d74164fe83f310c1fbb21b22..9f1b8d0b184707e492d4b52793dd4ff0d7c8854f 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -316,6 +316,15 @@ fs_inst::has_source_and_destination_hazard() const
         * that one of the instructions will read from a channel corresponding
         * to an earlier instruction.
         */
+   case SHADER_OPCODE_SEL_EXEC:
+      /* This is implemented as
+       *
+       * mov(16)      g4<1>D      0D            { align1 WE_all 1H };
+       * mov(16)      g4<1>D      g5<8,8,1>D    { align1 1H }
+       *
+       * Because the source is only read in the second instruction, the first
+       * may stomp all over it.
+       */
        return true;
     default:
        /* The SIMD16 compressed instruction
@@ -5038,6 +5047,8 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
     case BRW_OPCODE_MAD:
     case BRW_OPCODE_LRP:
     case FS_OPCODE_PACK:
+   case SHADER_OPCODE_SEL_EXEC:
+   case SHADER_OPCODE_CLUSTER_BROADCAST:
        return get_fpu_lowered_simd_width(devinfo, inst);
  
     case BRW_OPCODE_CMP: {
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp

index 9b8f8ce683e0160f54c19cbf5aca755f575f576a..382548f5c36b0466e9d3f25b7c492a4652d1911a 100644 (file)
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2293,6 +2293,53 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           generate_shuffle(inst, dst, src[0], src[1]);
           break;
  
+      case SHADER_OPCODE_SEL_EXEC:
+         assert(inst->force_writemask_all);
+         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+         brw_MOV(p, dst, src[1]);
+         brw_set_default_mask_control(p, BRW_MASK_ENABLE);
+         brw_MOV(p, dst, src[0]);
+         break;
+
+      case SHADER_OPCODE_CLUSTER_BROADCAST: {
+         assert(src[0].type == dst.type);
+         assert(!src[0].negate && !src[0].abs);
+         assert(src[1].file == BRW_IMMEDIATE_VALUE);
+         assert(src[1].type == BRW_REGISTER_TYPE_UD);
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
+         assert(src[2].type == BRW_REGISTER_TYPE_UD);
+         const unsigned component = src[1].ud;
+         const unsigned cluster_size = src[2].ud;
+         struct brw_reg strided = stride(suboffset(src[0], component),
+                                         cluster_size, cluster_size, 0);
+         if (type_sz(src[0].type) > 4 &&
+             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+            /* IVB has an issue (which we found empirically) where it reads
+             * two address register components per channel for indirectly
+             * addressed 64-bit sources.
+             *
+             * From the Cherryview PRM Vol 7. "Register Region Restrictions":
+             *
+             *    "When source or destination datatype is 64b or operation is
+             *    integer DWord multiply, indirect addressing must not be
+             *    used."
+             *
+             * To work around both of these, we do two integer MOVs insead of
+             * one 64-bit MOV.  Because no double value should ever cross a
+             * register boundary, it's safe to use the immediate offset in the
+             * indirect here to handle adding 4 bytes to the offset and avoid
+             * the extra ADD to the register file.
+             */
+            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+                       subscript(strided, BRW_REGISTER_TYPE_D, 0));
+            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+                       subscript(strided, BRW_REGISTER_TYPE_D, 1));
+         } else {
+            brw_MOV(p, dst, strided);
+         }
+         break;
+      }
+
        case FS_OPCODE_SET_SAMPLE_ID:
           generate_set_sample_id(inst, dst, src[0], src[1]);
           break;
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp

index b1227e17e2c7c38db542ee8f46b33099a817dc14..e822c100e9f14fd9e72d5ef74fbe11cee05d46e1 100644 (file)
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -332,6 +332,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
        return "broadcast";
     case SHADER_OPCODE_SHUFFLE:
        return "shuffle";
+   case SHADER_OPCODE_SEL_EXEC:
+      return "sel_exec";
+   case SHADER_OPCODE_CLUSTER_BROADCAST:
+      return "cluster_broadcast";
  
     case SHADER_OPCODE_GET_BUFFER_SIZE:
        return "get_buffer_size";
@@ -847,6 +851,7 @@ backend_instruction::can_do_source_mods() const
     case BRW_OPCODE_FBL:
     case BRW_OPCODE_SUBB:
     case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_CLUSTER_BROADCAST:
     case SHADER_OPCODE_MOV_INDIRECT:
        return false;
     default:
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 1 Sep 2017 04:45:30 +0000 (21:45 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Wed, 7 Mar 2018 20:13:47 +0000 (12:13 -0800)
src/intel/compiler/brw_eu_defines.h		patch \| blob \| history
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_fs_generator.cpp		patch \| blob \| history
src/intel/compiler/brw_shader.cpp		patch \| blob \| history