return;
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+ assert(devinfo->gen < 12);
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
else if (dest.file == BRW_GENERAL_REGISTER_FILE)
assert(dest.nr < 128);
- /* The hardware has a restriction where if the destination is Byte,
- * the instruction needs to have a stride of 2 (except for packed byte
- * MOV). This seems to be required even if the destination is the NULL
- * register.
+ /* The hardware has a restriction where a destination of size Byte with
+ * a stride of 1 is only allowed for a packed byte MOV. For any other
+ * instruction, the stride must be at least 2, even when the destination
+ * is the NULL register.
*/
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
dest.nr == BRW_ARF_NULL &&
- type_sz(dest.type) == 1) {
+ type_sz(dest.type) == 1 &&
+ dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
}
assert(reg.file != BRW_IMMEDIATE_VALUE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr == 0);
- assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ assert(has_scalar_region(reg) ||
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
assert(reg.file == BRW_GENERAL_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr % 16 == 0);
- assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
- reg.vstride == reg.width + 1);
+ assert(has_scalar_region(reg) ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr == 0);
- assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ assert(has_scalar_region(reg) ||
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
const struct gen_device_info *devinfo = p->devinfo;
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
(devinfo->gen >= 8 && num_regs == 8));
- const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+ const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
num_regs - 1);
brw_set_desc(p, inst, brw_message_desc(
brw_inst_set_compression(devinfo, insn, state->compressed);
brw_inst_set_access_mode(devinfo, insn, state->access_mode);
brw_inst_set_mask_control(devinfo, insn, state->mask_control);
+ if (devinfo->gen >= 12)
+ brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb));
brw_inst_set_saturate(devinfo, insn, state->saturate);
brw_inst_set_pred_control(devinfo, insn, state->predicate);
brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
gen7_convert_mrf_to_grf(p, &dest);
assert(dest.nr < 128);
+
+ if (devinfo->gen >= 10)
+ assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
+ src2.file == BRW_IMMEDIATE_VALUE));
+
assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
- brw_inst_set_3src_a1_src0_vstride(
- devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
+ if (src0.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
+ } else {
+ brw_inst_set_3src_a1_src0_vstride(
+ devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
+ brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
+ to_3src_align1_hstride(src0.hstride));
+ brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
+ if (src0.type == BRW_REGISTER_TYPE_NF) {
+ brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+ } else {
+ brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
+ }
+ brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
+ brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
+ }
brw_inst_set_3src_a1_src1_vstride(
devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
- /* no vstride on src2 */
-
- brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
- to_3src_align1_hstride(src0.hstride));
brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
to_3src_align1_hstride(src1.hstride));
- brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
- to_3src_align1_hstride(src2.hstride));
-
- brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
- if (src0.type == BRW_REGISTER_TYPE_NF) {
- brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
- } else {
- brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
- }
- brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
- brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
- brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
- brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
- brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
- brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+ if (src2.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
+ } else {
+ brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
+ to_3src_align1_hstride(src2.hstride));
+ /* no vstride on src2 */
+ brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
+ brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
+ brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
+ brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+ }
assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
src0.file == BRW_IMMEDIATE_VALUE ||
src2.file == BRW_IMMEDIATE_VALUE);
if (devinfo->gen >= 12) {
- brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+ if (src0.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
+ } else {
+ brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+ }
+
brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
- brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+
+ if (src2.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
+ } else {
+ brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+ }
} else {
brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
src0.file == BRW_GENERAL_REGISTER_FILE ?
dest.file == BRW_MESSAGE_REGISTER_FILE);
}
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
- brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
}
-/* Rounding operations (other than RNDD) require two instructions - the first
- * stores a rounded value (possibly the wrong way) in the dest register, but
- * also sets a per-channel "increment bit" in the flag register. A predicated
- * add of 1.0 fixes dest to contain the desired result.
- *
- * Sandybridge and later appear to round correctly without an ADD.
- */
-#define ROUND(OP) \
-void brw_##OP(struct brw_codegen *p, \
- struct brw_reg dest, \
- struct brw_reg src) \
-{ \
- const struct gen_device_info *devinfo = p->devinfo; \
- brw_inst *rnd, *add; \
- rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(p, rnd, dest); \
- brw_set_src0(p, rnd, src); \
- \
- if (devinfo->gen < 6) { \
- /* turn on round-increments */ \
- brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
- add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
- brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
- } \
-}
-
-
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDU)
+ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
ALU2(ADDC)
ALU2(SUBB)
-ROUND(RNDZ)
-ROUND(RNDE)
-
brw_inst *
brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
{
}
if (needs_zero_fill) {
- brw_inst_set_no_dd_clear(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_clear(devinfo, inst, true);
+ brw_set_default_swsb(p, tgl_swsb_null());
inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
- brw_inst_set_no_dd_check(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_check(devinfo, inst, true);
}
brw_pop_insn_state(p);
brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
}
-
-
-
+void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
+{
+ brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
+ brw_inst_set_cond_modifier(p->devinfo, insn, func);
+}
/***********************************************************************
* Comparisons, if/else/endif
}
brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
{
const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen < 8) {
+ if (devinfo->gen < 6) {
+ /* From the Gen4 PRM:
+ *
+ * "IP register must be put (for example, by the assembler) at <dst>
+ * and <src0> locations.
+ */
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+ } else if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
} else if (devinfo->gen < 12) {
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
BRW_SFID_DATAPORT_WRITE);
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
uint32_t msg_type;
if (devinfo->gen >= 6)
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
brw_imm_ud(offset));
brw_pop_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
unsigned offset)
{
const struct gen_device_info *devinfo = p->devinfo;
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
if (devinfo->gen >= 6)
offset /= 16;
{
brw_push_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
brw_pop_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
BRW_SFID_DATAPORT_READ);
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
brw_imm_ud(offset));
brw_pop_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
+
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
brw_inst_set_sfid(devinfo, insn, target_cache);
struct brw_reg temp = get_element_ud(header, 3);
+ brw_push_insn_state(p);
brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
+ brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_SHL(p, temp, temp, brw_imm_ud(4));
brw_ADD(p,
get_element_ud(header, 3),
get_element_ud(brw_vec8_grf(0, 0), 3),
temp);
+ brw_pop_insn_state(p);
}
}
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_set_desc(p, send, desc.ud | desc_imm);
} else {
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Load the indirect descriptor to an address register using OR so the
* caller can specify additional descriptor bits with the desc_imm
brw_pop_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
send = next_insn(p, BRW_OPCODE_SEND);
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
if (desc.file == BRW_IMMEDIATE_VALUE) {
desc.ud |= desc_imm;
} else {
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Load the indirect descriptor to an address register using OR so the
* caller can specify additional descriptor bits with the desc_imm
brw_pop_insn_state(p);
desc = addr;
+
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
- (ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
+ (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) {
ex_desc.ud |= ex_desc_imm;
} else {
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Load the indirect extended descriptor to an address register using OR
* so the caller can specify additional descriptor bits with the
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
- /* ex_desc bits 15:12 don't exist in the instruction encoding, so
- * we may have fallen back to an indirect extended descriptor.
+ /* ex_desc bits 15:12 don't exist in the instruction encoding prior
+ * to Gen12, so we may have fallen back to an indirect extended
+ * descriptor.
*/
brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
} else {
brw_pop_insn_state(p);
ex_desc = addr;
+
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
- send = next_insn(p, BRW_OPCODE_SENDS);
+ send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
unsigned desc_imm)
{
if (surface.file != BRW_IMMEDIATE_VALUE) {
+ const struct tgl_swsb swsb = brw_get_default_swsb(p);
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
* some surface array is accessed out of bounds.
brw_pop_insn_state(p);
surface = addr;
+ brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
struct brw_reg dst,
struct brw_reg src,
enum opcode send_op,
- bool stall,
+ enum brw_message_target sfid,
+ bool commit_enable,
unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool commit_enable = stall ||
- devinfo->gen >= 10 || /* HSD ES # 1404612949 */
- (devinfo->gen == 7 && !devinfo->is_haswell);
- struct brw_inst *insn;
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_set_default_exec_size(p, BRW_EXECUTE_1);
dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
- insn = next_insn(p, send_op);
+ struct brw_inst *insn = next_insn(p, send_op);
+ brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+ brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
- commit_enable, bti);
-
- if (devinfo->gen == 7 && !devinfo->is_haswell) {
- /* IVB does typed surface access through the render cache, so we need to
- * flush it too. Use a different register so both flushes can be
- * pipelined by the hardware.
- */
- insn = next_insn(p, send_op);
- brw_set_dest(p, insn, offset(dst, 1));
- brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
- commit_enable, bti);
-
- /* Now write the response of the second message into the response of the
- * first to trigger a pipeline stall -- This way future render and data
- * cache messages will be properly ordered with respect to past data and
- * render cache messages.
- */
- brw_MOV(p, dst, offset(dst, 1));
- }
-
- if (stall)
- brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
-
- brw_pop_insn_state(p);
+ brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void
* hardware.
*/
brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
+ brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_AND(p, vec1(dst), exec_mask, vec1(dst));
exec_mask = vec1(dst);
}
/* Take into account the component size and horizontal stride. */
assert(src.vstride == src.hstride + src.width);
brw_SHL(p, addr, vec1(idx),
- brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+ brw_imm_ud(util_logbase2(type_sz(src.type)) +
src.hstride - 1));
/* We can only address up to limit bytes using the indirect
* register is above this limit.
*/
if (offset >= limit) {
+ brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
offset = offset % limit;
}
brw_pop_insn_state(p);
+ brw_set_default_swsb(p, tgl_swsb_regdist(1));
+
/* Use indirect addressing to fetch the specified component. */
if (type_sz(src.type) > 4 &&
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
retype(brw_vec1_indirect(addr.subnr, offset),
BRW_REGISTER_TYPE_D));
+ brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
retype(brw_vec1_indirect(addr.subnr, offset + 4),
BRW_REGISTER_TYPE_D));
brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
- brw_inst_set_gateway_notify(devinfo, inst, 1);
brw_inst_set_gateway_subfuncid(devinfo, inst,
BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
brw_float_controls_mode(struct brw_codegen *p,
unsigned mode, unsigned mask)
{
- brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
- brw_imm_ud(~mask));
- brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
-
/* From the Skylake PRM, Volume 7, page 760:
* "Implementation Restriction on Register Access: When the control
* register is used as an explicit source and/or destination, hardware
* does not ensure execution pipeline coherency. Software must set the
* thread control field to ‘switch’ for an instruction that uses
* control register as an explicit operand."
+ *
+ * On Gen12+ this is implemented in terms of SWSB annotations instead.
*/
- brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+ brw_set_default_swsb(p, tgl_swsb_regdist(1));
+
+ brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(~mask));
+ brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
+ if (p->devinfo->gen < 12)
+ brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
if (mode) {
brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(mode));
brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
- brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
+ if (p->devinfo->gen < 12)
+ brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
}
+
+ if (p->devinfo->gen >= 12)
+ brw_SYNC(p, TGL_SYNC_NOP);
}