reg.file == BRW_ARF_NULL)
return;
- assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+ assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
hstride = hstride_for_reg[reg.hstride];
if (reg.vstride == 0xf) {
vstride = -1;
} else {
- assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+ assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
vstride = vstride_for_reg[reg.vstride];
}
- assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+ assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
width = width_for_reg[reg.width];
assert(brw_inst_exec_size(brw, inst) >= 0 &&
- brw_inst_exec_size(brw, inst) < Elements(execsize_for_reg));
+ brw_inst_exec_size(brw, inst) < ARRAY_SIZE(execsize_for_reg));
execsize = execsize_for_reg[brw_inst_exec_size(brw, inst)];
/* Restrictions from 3.3.10: Register Region Restrictions. */
brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
{
const struct brw_context *brw = p->brw;
- bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ /* The F32TO16 instruction doesn't support 32-bit destination types in
+ * Align1 mode, and neither does the Gen8 implementation in terms of a
+ * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
+ * an undocumented feature.
+ */
+ const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+ (!align16 || brw->gen >= 8));
+ brw_inst *inst;
if (align16) {
assert(dst.type == BRW_REGISTER_TYPE_UD);
} else {
- assert(dst.type == BRW_REGISTER_TYPE_W ||
+ assert(dst.type == BRW_REGISTER_TYPE_UD ||
+ dst.type == BRW_REGISTER_TYPE_W ||
dst.type == BRW_REGISTER_TYPE_UW ||
dst.type == BRW_REGISTER_TYPE_HF);
}
+ brw_push_insn_state(p);
+
+ if (needs_zero_fill) {
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+ }
+
if (brw->gen >= 8) {
- if (align16) {
- /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
- * emit_pack_half_2x16 method.)
- */
- brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
- }
- return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+ inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
} else {
assert(brw->gen == 7);
- return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ }
+
+ if (needs_zero_fill) {
+ brw_inst_set_no_dd_clear(brw, inst, true);
+ inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+ brw_inst_set_no_dd_check(brw, inst, true);
}
+
+ brw_pop_insn_state(p);
+ return inst;
}
brw_inst *
if (align16) {
assert(src.type == BRW_REGISTER_TYPE_UD);
} else {
+ /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+ *
+ * Because this instruction does not have a 16-bit floating-point
+ * type, the source data type must be Word (W). The destination type
+ * must be F (Float).
+ */
+ if (src.type == BRW_REGISTER_TYPE_UD)
+ src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
+
assert(src.type == BRW_REGISTER_TYPE_W ||
src.type == BRW_REGISTER_TYPE_UW ||
src.type == BRW_REGISTER_TYPE_HF);
} else if (brw->gen == 7) {
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, 0);
brw_inst_set_uip(brw, insn, 0);
} else {
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, 0);
brw_inst_set_uip(brw, insn, 0);
} else {
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
} else {
brw_set_src0(p, insn, brw_imm_d(0));
}
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, br * (do_insn - insn));
} else {
brw_set_dest(p, insn, brw_imm_w(0));
struct brw_context *brw = p->brw;
brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
- if (brw->gen >= 8) {
- /* The CMP instruction appears to behave erratically for floating point
- * sources unless the destination type is also float. Overriding it to
- * match src0 makes it work in all cases.
- */
- dest.type = src0.type;
- }
-
brw_inst_set_cond_modifier(brw, insn, conditional);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
unsigned msg_length,
unsigned response_length,
bool eot,
+ bool last_render_target,
bool header_present)
{
struct brw_context *brw = p->brw;
msg_type,
msg_length,
header_present,
- eot, /* last render target write */
+ last_render_target,
response_length,
eot,
0 /* send_commit_msg */);