const struct gen_device_info *devinfo = p->devinfo;
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
(devinfo->gen >= 8 && num_regs == 8));
- const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+ const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
num_regs - 1);
brw_set_desc(p, inst, brw_message_desc(
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
}
-/* Rounding operations (other than RNDD) require two instructions - the first
- * stores a rounded value (possibly the wrong way) in the dest register, but
- * also sets a per-channel "increment bit" in the flag register. A predicated
- * add of 1.0 fixes dest to contain the desired result.
- *
- * Sandybridge and later appear to round correctly without an ADD.
- */
-#define ROUND(OP) \
-void brw_##OP(struct brw_codegen *p, \
- struct brw_reg dest, \
- struct brw_reg src) \
-{ \
- const struct gen_device_info *devinfo = p->devinfo; \
- brw_inst *rnd, *add; \
- rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(p, rnd, dest); \
- brw_set_src0(p, rnd, src); \
- \
- if (devinfo->gen < 6) { \
- /* turn on round-increments */ \
- brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
- add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
- brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
- } \
-}
-
-
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
ALU2(ADDC)
ALU2(SUBB)
-ROUND(RNDZ)
-ROUND(RNDE)
-
brw_inst *
brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
{
}
brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
{
const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen < 8) {
+ if (devinfo->gen < 6) {
+ /* From the Gen4 PRM:
+ *
+ * "IP register must be put (for example, by the assembler) at <dst>
+ * and <src0> locations.
+ */
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+ } else if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
} else if (devinfo->gen < 12) {
}
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
- (ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
+ (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) {
ex_desc.ud |= ex_desc_imm;
} else {
const struct tgl_swsb swsb = brw_get_default_swsb(p);
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
- /* ex_desc bits 15:12 don't exist in the instruction encoding, so
- * we may have fallen back to an indirect extended descriptor.
+ /* ex_desc bits 15:12 don't exist in the instruction encoding prior
+ * to Gen12, so we may have fallen back to an indirect extended
+ * descriptor.
*/
brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
} else {
struct brw_reg dst,
struct brw_reg src,
enum opcode send_op,
- bool stall,
+ enum brw_message_target sfid,
+ bool commit_enable,
unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool commit_enable = stall ||
- devinfo->gen >= 10 || /* HSD ES # 1404612949 */
- (devinfo->gen == 7 && !devinfo->is_haswell);
- struct brw_inst *insn;
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_set_default_exec_size(p, BRW_EXECUTE_1);
dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
- insn = next_insn(p, send_op);
+ struct brw_inst *insn = next_insn(p, send_op);
+ brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+ brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
- commit_enable, bti);
-
- if (devinfo->gen == 7 && !devinfo->is_haswell) {
- /* IVB does typed surface access through the render cache, so we need to
- * flush it too. Use a different register so both flushes can be
- * pipelined by the hardware.
- */
- insn = next_insn(p, send_op);
- brw_set_dest(p, insn, offset(dst, 1));
- brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
- commit_enable, bti);
-
- /* Now write the response of the second message into the response of the
- * first to trigger a pipeline stall -- This way future render and data
- * cache messages will be properly ordered with respect to past data and
- * render cache messages.
- */
- brw_MOV(p, dst, offset(dst, 1));
- }
-
- if (stall) {
- brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_DST,
- brw_get_default_swsb(p).sbid));
-
- brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
- }
-
- brw_pop_insn_state(p);
+ brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void
/* Take into account the component size and horizontal stride. */
assert(src.vstride == src.hstride + src.width);
brw_SHL(p, addr, vec1(idx),
- brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+ brw_imm_ud(util_logbase2(type_sz(src.type)) +
src.hstride - 1));
/* We can only address up to limit bytes using the indirect