i965: Introduce the FIND_LIVE_CHANNEL pseudo-opcode.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 23 Apr 2015 11:42:53 +0000 (14:42 +0300)
committerFrancisco Jerez <currojerez@riseup.net>
Mon, 4 May 2015 14:44:17 +0000 (17:44 +0300)
This instruction calculates the index of an arbitrary channel enabled
in the current execution mask.  It's expected to be used as input for
the BROADCAST opcode, but it's implemented as a separate instruction
rather than being baked into BROADCAST because FIND_LIVE_CHANNEL has
no dependencies so it can always be CSE'ed with other instances of the
same instruction within a basic block.

v2: Whitespace fixes.

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 5072e680db1af54772248c3b857de0c739e956a9..7b5dd45edc56cbefb8fb029a3969961a778d4886 100644 (file)
@@ -918,6 +918,14 @@ enum opcode {
 
    SHADER_OPCODE_URB_WRITE_SIMD8,
 
+   /**
+    * Return the index of an arbitrary live channel (i.e. one of the channels
+    * enabled in the current execution mask) and assign it to the first
+    * component of the destination.  Expected to be used as input for the
+    * BROADCAST pseudo-opcode.
+    */
+   SHADER_OPCODE_FIND_LIVE_CHANNEL,
+
    /**
     * Pick the channel from its first source register given by the index
     * specified as second source.  Useful for variable indexing of surfaces.
index a0c938a142bde250781f89052d6c513ce9309d31..0e7be1e1ea068d7736912ee9b054a5accf67b69b 100644 (file)
@@ -461,6 +461,10 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              unsigned msg_length,
                              unsigned response_length);
 
+void
+brw_find_live_channel(struct brw_codegen *p,
+                      struct brw_reg dst);
+
 void
 brw_broadcast(struct brw_codegen *p,
               struct brw_reg dst,
index 73bed49ecee201ef8e768a1ed47bd59e50b4375f..e78d0bec268111443e2b3ab8803d91eaedbb4f63 100644 (file)
@@ -3212,6 +3212,78 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
    brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
+void
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   brw_inst *inst;
+
+   assert(devinfo->gen >= 7);
+
+   brw_push_insn_state(p);
+
+   if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+      if (devinfo->gen >= 8) {
+         /* Getting the first active channel index is easy on Gen8: Just find
+          * the first bit set in the mask register.  The same register exists
+          * on HSW already but it reads back as all ones when the current
+          * instruction has execution masking disabled, so it's kind of
+          * useless.
+          */
+         inst = brw_FBL(p, vec1(dst),
+                        retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+
+         /* Quarter control has the effect of magically shifting the value of
+          * this register.  Make sure it's set to zero.
+          */
+         brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q);
+      } else {
+         const struct brw_reg flag = retype(brw_flag_reg(1, 0),
+                                            BRW_REGISTER_TYPE_UD);
+
+         brw_MOV(p, flag, brw_imm_ud(0));
+
+         /* Run a 16-wide instruction returning zero with execution masking
+          * and a conditional modifier enabled in order to get the current
+          * execution mask in f1.0.
+          */
+         inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0));
+         brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16);
+         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
+         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
+         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+
+         brw_FBL(p, vec1(dst), flag);
+      }
+   } else {
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+      if (devinfo->gen >= 8) {
+         /* In SIMD4x2 mode the first active channel index is just the
+          * negation of the first bit of the mask register.
+          */
+         inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
+                        negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
+                        brw_imm_ud(1));
+
+      } else {
+         /* Overwrite the destination without and with execution masking to
+          * find out which of the channels is active.
+          */
+         brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+                 brw_imm_ud(1));
+
+         inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+                        brw_imm_ud(0));
+         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
+      }
+   }
+
+   brw_pop_insn_state(p);
+}
+
 void
 brw_broadcast(struct brw_codegen *p,
               struct brw_reg dst,
index 2c6a12e4becac93bb1f007e47257e7902d4fe645..d476c92b73a0b7d28be730a9106faad369d539f5 100644 (file)
@@ -2061,6 +2061,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          generate_set_simd4x2_offset(inst, dst, src[0]);
          break;
 
+      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+         brw_find_live_channel(p, dst);
+         break;
+
       case SHADER_OPCODE_BROADCAST:
          brw_broadcast(p, dst, src[0], src[1]);
          break;
index 1944c2648c878b69ad66c926aa30889d79e9c8dc..c1fd859fef5316154eb5f3cbf76c01e480fc39c5 100644 (file)
@@ -517,6 +517,8 @@ brw_instruction_name(enum opcode op)
    case SHADER_OPCODE_URB_WRITE_SIMD8:
       return "gen8_urb_write_simd8";
 
+   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+      return "find_live_channel";
    case SHADER_OPCODE_BROADCAST:
       return "broadcast";
 
index 3dc808c85eeca5b423e1b7b30cec8c8ca1b22fb0..9d37c93bfa4c4614429508d4758310145fefc8d3 100644 (file)
@@ -1512,6 +1512,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
          brw_memory_fence(p, dst);
          break;
 
+      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+         brw_find_live_channel(p, dst);
+         break;
+
       case SHADER_OPCODE_BROADCAST:
          brw_broadcast(p, dst, src[0], src[1]);
          break;