*/
SHADER_OPCODE_SHUFFLE,
+ /* Select between src0 and src1 based on channel enables.
+ *
+ * This instruction copies src0 into the enabled channels of the
+ * destination and copies src1 into the disabled channels.
+ */
+ SHADER_OPCODE_SEL_EXEC,
+
+ /* Take every Nth element in src0 and broadcast it to the group of N
+ * channels in which it lives in the destination. The offset within the
+ * cluster is given by src1 and the cluster size is given by src2.
+ */
+ SHADER_OPCODE_CLUSTER_BROADCAST,
+
SHADER_OPCODE_GET_BUFFER_SIZE,
VEC4_OPCODE_MOV_BYTES,
* that one of the instructions will read from a channel corresponding
* to an earlier instruction.
*/
+ case SHADER_OPCODE_SEL_EXEC:
+ /* This is implemented as
+ *
+ * mov(16) g4<1>D 0D { align1 WE_all 1H };
+ * mov(16) g4<1>D g5<8,8,1>D { align1 1H }
+ *
+ * Because the source is only read in the second instruction, the first
+ * may stomp all over it.
+ */
return true;
default:
/* The SIMD16 compressed instruction
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
case FS_OPCODE_PACK:
+ case SHADER_OPCODE_SEL_EXEC:
+ case SHADER_OPCODE_CLUSTER_BROADCAST:
return get_fpu_lowered_simd_width(devinfo, inst);
case BRW_OPCODE_CMP: {
generate_shuffle(inst, dst, src[0], src[1]);
break;
+ case SHADER_OPCODE_SEL_EXEC:
+ assert(inst->force_writemask_all);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, src[1]);
+ brw_set_default_mask_control(p, BRW_MASK_ENABLE);
+ brw_MOV(p, dst, src[0]);
+ break;
+
+ case SHADER_OPCODE_CLUSTER_BROADCAST: {
+ assert(src[0].type == dst.type);
+ assert(!src[0].negate && !src[0].abs);
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);
+ assert(src[1].type == BRW_REGISTER_TYPE_UD);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ assert(src[2].type == BRW_REGISTER_TYPE_UD);
+ const unsigned component = src[1].ud;
+ const unsigned cluster_size = src[2].ud;
+ struct brw_reg strided = stride(suboffset(src[0], component),
+ cluster_size, cluster_size, 0);
+ if (type_sz(src[0].type) > 4 &&
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+ /* IVB has an issue (which we found empirically) where it reads
+ * two address register components per channel for indirectly
+ * addressed 64-bit sources.
+ *
+ * From the Cherryview PRM Vol 7. "Register Region Restrictions":
+ *
+ * "When source or destination datatype is 64b or operation is
+ * integer DWord multiply, indirect addressing must not be
+ * used."
+ *
+ * To work around both of these, we do two integer MOVs insead of
+ * one 64-bit MOV. Because no double value should ever cross a
+ * register boundary, it's safe to use the immediate offset in the
+ * indirect here to handle adding 4 bytes to the offset and avoid
+ * the extra ADD to the register file.
+ */
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+ subscript(strided, BRW_REGISTER_TYPE_D, 0));
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+ subscript(strided, BRW_REGISTER_TYPE_D, 1));
+ } else {
+ brw_MOV(p, dst, strided);
+ }
+ break;
+ }
+
case FS_OPCODE_SET_SAMPLE_ID:
generate_set_sample_id(inst, dst, src[0], src[1]);
break;