/* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
* or 16 (SIMD16), as that's normally correct. However, when dealing with
- * small registers, we automatically reduce it to match the register size.
- *
- * In platforms that support fp64 we can emit instructions with a width of
- * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these
- * cases we need to make sure that these instructions have their exec sizes
- * set properly when they are emitted and we can't rely on this code to fix
- * it.
+ * small registers, it can be useful for us to automatically reduce it to
+ * match the register size.
*/
- bool fix_exec_size;
- if (devinfo->gen >= 6)
- fix_exec_size = dest.width < BRW_EXECUTE_4;
- else
- fix_exec_size = dest.width < BRW_EXECUTE_8;
+ if (p->automatic_exec_sizes) {
+ /*
+ * In platforms that support fp64 we can emit instructions with a width
+ * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
+ * these cases we need to make sure that these instructions have their
+ * exec sizes set properly when they are emitted and we can't rely on
+ * this code to fix it.
+ */
+ bool fix_exec_size;
+ if (devinfo->gen >= 6)
+ fix_exec_size = dest.width < BRW_EXECUTE_4;
+ else
+ fix_exec_size = dest.width < BRW_EXECUTE_8;
- if (fix_exec_size)
- brw_inst_set_exec_size(devinfo, inst, dest.width);
+ if (fix_exec_size)
+ brw_inst_set_exec_size(devinfo, inst, dest.width);
+ }
}
void
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
brw_pop_insn_state(p);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
BRW_REGISTER_TYPE_UD),
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Load the indirect descriptor to an address register using OR so the
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
p, insn, num_channels);
}
+static unsigned
+brw_byte_scattered_data_element_from_bit_size(unsigned bit_size)
+{
+ switch (bit_size) {
+ case 8:
+ return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
+ case 16:
+ return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
+ case 32:
+ return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
+ default:
+ unreachable("Unsupported bit_size for byte scattered messages");
+ }
+}
+
+
+void
+brw_byte_scattered_read(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg payload,
+ struct brw_reg surface,
+ unsigned msg_length,
+ unsigned bit_size)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ assert(devinfo->gen > 7 || devinfo->is_haswell);
+ assert(brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
+ const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+
+ struct brw_inst *insn = brw_send_indirect_surface_message(
+ p, sfid, dst, payload, surface, msg_length,
+ brw_surface_payload_size(p, 1, true, true),
+ false);
+
+ unsigned msg_control =
+ brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
+
+ if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
+ msg_control |= 1; /* SIMD16 mode */
+ else
+ msg_control |= 0; /* SIMD8 mode */
+
+ brw_inst_set_dp_msg_type(devinfo, insn,
+ HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ);
+ brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+}
+
+void
+brw_byte_scattered_write(struct brw_codegen *p,
+ struct brw_reg payload,
+ struct brw_reg surface,
+ unsigned msg_length,
+ unsigned bit_size)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ assert(devinfo->gen > 7 || devinfo->is_haswell);
+ assert(brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
+ const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+
+ struct brw_inst *insn = brw_send_indirect_surface_message(
+ p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
+ payload, surface, msg_length, 0, true);
+
+ unsigned msg_control =
+ brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
+
+ if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
+ msg_control |= 1;
+ else
+ msg_control |= 0;
+
+ brw_inst_set_dp_msg_type(devinfo, insn,
+ HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE);
+ brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+}
+
static void
brw_set_dp_typed_atomic_message(struct brw_codegen *p,
struct brw_inst *insn,
struct brw_reg exec_mask =
retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
/* Unfortunately, ce0 does not take into account the thread
* dispatch mask, which may be a problem in cases where it's not
} else {
const struct brw_reg flag = brw_flag_reg(1, 0);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
/* Run enough instructions returning zero with execution masking and
* instructions.
*/
const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
}
} else {
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
+
+/**
+ * Changes the floating point rounding mode updating the control register
+ * field defined at cr0.0[5-6] bits. This function supports the changes to
+ * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
+ * Only RTNE and RTZ rounding are enabled at nir.
+ */
+void
+brw_rounding_mode(struct brw_codegen *p,
+ enum brw_rnd_mode mode)
+{
+ const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT;
+
+ if (bits != BRW_CR0_RND_MODE_MASK) {
+ brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
+
+ /* From the Skylake PRM, Volume 7, page 760:
+ * "Implementation Restriction on Register Access: When the control
+ * register is used as an explicit source and/or destination, hardware
+ * does not ensure execution pipeline coherency. Software must set the
+ * thread control field to ‘switch’ for an instruction that uses
+ * control register as an explicit operand."
+ */
+ brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+ }
+
+ if (bits) {
+ brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(bits));
+ brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+ }
+}