assert(reg.file != BRW_IMMEDIATE_VALUE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr == 0);
- assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ assert(has_scalar_region(reg) ||
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
assert(reg.file == BRW_GENERAL_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr % 16 == 0);
- assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
- reg.vstride == reg.width + 1);
+ assert(has_scalar_region(reg) ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
assert(reg.subnr == 0);
- assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ assert(has_scalar_region(reg) ||
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
reg.vstride == reg.width + 1));
assert(!reg.negate && !reg.abs);
brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
}
-#define next_insn brw_next_insn
-brw_inst *
-brw_next_insn(struct brw_codegen *p, unsigned opcode)
+static brw_inst *
+brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align)
{
- const struct gen_device_info *devinfo = p->devinfo;
- brw_inst *insn;
+ assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
+ assert(util_is_power_of_two_or_zero(align));
+ const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1);
+ const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
+ const unsigned new_nr_insn = start_insn + nr_insn;
- if (p->nr_insn + 1 > p->store_size) {
- p->store_size <<= 1;
+ if (p->store_size < new_nr_insn) {
+ p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
}
- p->next_insn_offset += 16;
- insn = &p->store[p->nr_insn++];
+ /* Memset any padding due to alignment to 0. We don't want to be hashing
+ * or caching a bunch of random bits we got from a memory allocation.
+ */
+ if (p->nr_insn < start_insn) {
+ memset(&p->store[p->nr_insn], 0,
+ (start_insn - p->nr_insn) * sizeof(brw_inst));
+ }
+
+ assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
+ p->nr_insn = new_nr_insn;
+ p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
+
+ return &p->store[start_insn];
+}
+
+void
+brw_realign(struct brw_codegen *p, unsigned align)
+{
+ brw_append_insns(p, 0, align);
+}
+
+int
+brw_append_data(struct brw_codegen *p, void *data,
+ unsigned size, unsigned align)
+{
+ unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
+ void *dst = brw_append_insns(p, nr_insn, align);
+ memcpy(dst, data, size);
+
+ /* If it's not a whole number of instructions, memset the end */
+ if (size < nr_insn * sizeof(brw_inst))
+ memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
+
+ return dst - (void *)p->store;
+}
+
+#define next_insn brw_next_insn
+brw_inst *
+brw_next_insn(struct brw_codegen *p, unsigned opcode)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
memset(insn, 0, sizeof(*insn));
brw_inst_set_opcode(devinfo, insn, opcode);
dest.file == BRW_MESSAGE_REGISTER_FILE);
}
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
- brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
ALU1(FRC)
ALU1(RNDD)
ALU1(RNDE)
+ALU1(RNDU)
ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
}
brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
{
const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen < 8) {
+ if (devinfo->gen < 6) {
+ /* From the Gen4 PRM:
+ *
+ * "IP register must be put (for example, by the assembler) at <dst>
+ * and <src0> locations.
+ */
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+ } else if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
} else if (devinfo->gen < 12) {
* asserting would be mean.
*/
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
- brw_MOV(p, dst,
- (align1 ? stride(suboffset(src, i), 0, 1, 0) :
- stride(suboffset(src, 4 * i), 0, 4, 1)));
+ src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
+ stride(suboffset(src, 4 * i), 0, 4, 1);
+
+ if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) {
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+ subscript(src, BRW_REGISTER_TYPE_D, 0));
+ brw_set_default_swsb(p, tgl_swsb_null());
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+ subscript(src, BRW_REGISTER_TYPE_D, 1));
+ } else {
+ brw_MOV(p, dst, src);
+ }
} else {
/* From the Haswell PRM section "Register Region Restrictions":
*
/* Use indirect addressing to fetch the specified component. */
if (type_sz(src.type) > 4 &&
- (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
+ !devinfo->has_64bit_float)) {
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
*
* "When source or destination datatype is 64b or operation is
if (p->devinfo->gen >= 12)
brw_SYNC(p, TGL_SYNC_NOP);
}
+
+void
+brw_update_reloc_imm(const struct gen_device_info *devinfo,
+ brw_inst *inst,
+ uint32_t value)
+{
+ /* Sanity check that the instruction is a MOV of an immediate */
+ assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV);
+ assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
+
+ /* If it was compacted, we can't safely rewrite */
+ assert(brw_inst_cmpt_control(devinfo, inst) == 0);
+
+ brw_inst_set_imm_ud(devinfo, inst, value);
+}
+
+/* A default value for constants that will be patched at run-time.
+ * We pick an arbitrary value that prevents instruction compaction.
+ */
+#define DEFAULT_PATCH_IMM 0x4a7cc037
+
+void
+brw_MOV_reloc_imm(struct brw_codegen *p,
+ struct brw_reg dst,
+ enum brw_reg_type src_type,
+ uint32_t id)
+{
+ assert(type_sz(src_type) == 4);
+ assert(type_sz(dst.type) == 4);
+
+ if (p->num_relocs + 1 > p->reloc_array_size) {
+ p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
+ p->relocs = reralloc(p->mem_ctx, p->relocs,
+ struct brw_shader_reloc, p->reloc_array_size);
+ }
+
+ p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
+ .id = id,
+ .offset = p->next_insn_offset,
+ };
+
+ brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
+}