static void
validate_reg(const struct brw_context *brw, brw_inst *inst, struct brw_reg reg)
{
- int hstride_for_reg[] = {0, 1, 2, 4};
- int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
- int width_for_reg[] = {1, 2, 4, 8, 16};
- int execsize_for_reg[] = {1, 2, 4, 8, 16};
+ const int hstride_for_reg[] = {0, 1, 2, 4};
+ const int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32};
+ const int width_for_reg[] = {1, 2, 4, 8, 16};
+ const int execsize_for_reg[] = {1, 2, 4, 8, 16};
int width, hstride, vstride, execsize;
if (reg.file == BRW_IMMEDIATE_VALUE) {
reg.file == BRW_ARF_NULL)
return;
- assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+ assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
hstride = hstride_for_reg[reg.hstride];
if (reg.vstride == 0xf) {
vstride = -1;
} else {
- assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+ assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
vstride = vstride_for_reg[reg.vstride];
}
- assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+ assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
width = width_for_reg[reg.width];
assert(brw_inst_exec_size(brw, inst) >= 0 &&
- brw_inst_exec_size(brw, inst) < Elements(execsize_for_reg));
+ brw_inst_exec_size(brw, inst) < ARRAY_SIZE(execsize_for_reg));
execsize = execsize_for_reg[brw_inst_exec_size(brw, inst)];
/* Restrictions from 3.3.10: Register Region Restrictions. */
brw_set_src1(struct brw_compile *p, brw_inst *inst, struct brw_reg reg)
{
const struct brw_context *brw = p->brw;
- assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
gen7_convert_mrf_to_grf(p, ®);
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
validate_reg(brw, inst, reg);
ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
-ALU2(LINE)
ALU2(PLN)
ALU3F(MAD)
ALU3F(LRP)
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
+brw_inst *
+brw_LINE(struct brw_compile *p, struct brw_reg dest,
+ struct brw_reg src0, struct brw_reg src1)
+{
+ src0.vstride = BRW_VERTICAL_STRIDE_0;
+ src0.width = BRW_WIDTH_1;
+ src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+ return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
+}
+
brw_inst *
brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
{
const struct brw_context *brw = p->brw;
- bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ /* The F32TO16 instruction doesn't support 32-bit destination types in
+ * Align1 mode, and neither does the Gen8 implementation in terms of a
+ * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
+ * an undocumented feature.
+ */
+ const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+ (!align16 || brw->gen >= 8));
+ brw_inst *inst;
if (align16) {
assert(dst.type == BRW_REGISTER_TYPE_UD);
} else {
- assert(dst.type == BRW_REGISTER_TYPE_W ||
+ assert(dst.type == BRW_REGISTER_TYPE_UD ||
+ dst.type == BRW_REGISTER_TYPE_W ||
dst.type == BRW_REGISTER_TYPE_UW ||
dst.type == BRW_REGISTER_TYPE_HF);
}
+ brw_push_insn_state(p);
+
+ if (needs_zero_fill) {
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+ }
+
if (brw->gen >= 8) {
- if (align16) {
- /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
- * emit_pack_half_2x16 method.)
- */
- brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
- }
- return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+ inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
} else {
assert(brw->gen == 7);
- return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
}
+
+ if (needs_zero_fill) {
+ brw_inst_set_no_dd_clear(brw, inst, true);
+ inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+ brw_inst_set_no_dd_check(brw, inst, true);
+ }
+
+ brw_pop_insn_state(p);
+ return inst;
}
brw_inst *
if (align16) {
assert(src.type == BRW_REGISTER_TYPE_UD);
} else {
+ /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+ *
+ * Because this instruction does not have a 16-bit floating-point
+ * type, the source data type must be Word (W). The destination type
+ * must be F (Float).
+ */
+ if (src.type == BRW_REGISTER_TYPE_UD)
+ src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
+
assert(src.type == BRW_REGISTER_TYPE_W ||
src.type == BRW_REGISTER_TYPE_UW ||
src.type == BRW_REGISTER_TYPE_HF);
} else if (brw->gen == 7) {
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, 0);
brw_inst_set_uip(brw, insn, 0);
} else {
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, 0);
brw_inst_set_uip(brw, insn, 0);
} else {
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
} else {
brw_set_src0(p, insn, brw_imm_d(0));
}
} else if (brw->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
+ brw_set_src1(p, insn, brw_imm_w(0));
brw_inst_set_jip(brw, insn, br * (do_insn - insn));
} else {
brw_set_dest(p, insn, brw_imm_w(0));
struct brw_context *brw = p->brw;
brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
- if (brw->gen >= 8) {
- /* The CMP instruction appears to behave erratically for floating point
- * sources unless the destination type is also float. Overriding it to
- * match src0 makes it work in all cases.
- */
- dest.type = src0.type;
- }
-
brw_inst_set_cond_modifier(brw, insn, conditional);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
unsigned function,
unsigned msg_reg_nr,
struct brw_reg src,
- unsigned data_type,
unsigned precision )
{
struct brw_context *brw = p->brw;
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+ unsigned data_type;
+ if (has_scalar_region(src)) {
+ data_type = BRW_MATH_DATA_SCALAR;
+ } else {
+ data_type = BRW_MATH_DATA_VECTOR;
+ }
assert(brw->gen < 6);
if (brw->gen >= 6)
offset /= 16;
- mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+ if (p->brw->gen >= 7) {
+ /* On gen 7 and above, we no longer have message registers and we can
+ * send from any register we want. By using the destination register
+ * for the message, we guarantee that the implied message write won't
+ * accidentally overwrite anything. This has been a problem because
+ * the MRF registers and source for the final FB write are both fixed
+ * and may overlap.
+ */
+ mrf = retype(dest, BRW_REGISTER_TYPE_UD);
+ } else {
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+ }
dest = retype(dest, BRW_REGISTER_TYPE_UW);
if (num_regs == 1) {
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
- brw_MOV(p,
- retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- mrf.nr,
- 2), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(offset));
+ brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
brw_pop_insn_state(p);
}
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
- unsigned msg_reg_nr,
- struct brw_reg src0,
+ struct brw_reg payload,
+ struct brw_reg implied_header,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
+ bool last_render_target,
bool header_present)
{
struct brw_context *brw = p->brw;
brw_inst *insn;
unsigned msg_type;
- struct brw_reg dest;
+ struct brw_reg dest, src0;
if (dispatch_width == 16)
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
if (brw->gen >= 6) {
/* headerless version, just submit color payload */
- src0 = brw_message_reg(msg_reg_nr);
+ src0 = payload;
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
} else {
- brw_inst_set_base_mrf(brw, insn, msg_reg_nr);
+ assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
+ brw_inst_set_base_mrf(brw, insn, payload.nr);
+ src0 = implied_header;
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
}
msg_type,
msg_length,
header_present,
- eot, /* last render target write */
+ last_render_target,
response_length,
eot,
0 /* send_commit_msg */);
return_format);
}
+/* Adjust the message header's sampler state pointer to
+ * select the correct group of 16 samplers.
+ */
+void brw_adjust_sampler_state_pointer(struct brw_compile *p,
+ struct brw_reg header,
+ struct brw_reg sampler_index)
+{
+ /* The "Sampler Index" field can only store values between 0 and 15.
+ * However, we can add an offset to the "Sampler State Pointer"
+ * field, effectively selecting a different set of 16 samplers.
+ *
+ * The "Sampler State Pointer" needs to be aligned to a 32-byte
+ * offset, and each sampler state is only 16-bytes, so we can't
+ * exclusively use the offset - we have to use both.
+ */
+
+ struct brw_context *brw = p->brw;
+
+ if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
+ const int sampler_state_size = 16; /* 16 bytes */
+ uint32_t sampler = sampler_index.dw1.ud;
+
+ if (sampler >= 16) {
+ assert(brw->is_haswell || brw->gen >= 8);
+ brw_ADD(p,
+ get_element_ud(header, 3),
+ get_element_ud(brw_vec8_grf(0, 0), 3),
+ brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
+ }
+ } else {
+ /* Non-const sampler array indexing case */
+ if (brw->gen < 8 && !brw->is_haswell) {
+ return;
+ }
+
+ struct brw_reg temp = get_element_ud(header, 3);
+
+ brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
+ brw_SHL(p, temp, temp, brw_imm_ud(4));
+ brw_ADD(p,
+ get_element_ud(header, 3),
+ get_element_ud(brw_vec8_grf(0, 0), 3),
+ temp);
+ }
+}
+
/* All these variables are pretty confusing - we might be better off
* using bitmasks and macros for this, in the old style. Or perhaps
* just having the caller instantiate the fields in dword3 itself.
assert(brw_inst_jip(brw, insn) != 0);
break;
- case BRW_OPCODE_ENDIF:
- if (block_end_offset == 0)
- brw_inst_set_jip(brw, insn, 1 * br);
+ case BRW_OPCODE_ENDIF: {
+ int32_t jump = (block_end_offset == 0) ?
+ 1 * br : (block_end_offset - offset) / scale;
+ if (brw->gen >= 7)
+ brw_inst_set_jip(brw, insn, jump);
else
- brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
+ brw_inst_set_gen6_jump_count(brw, insn, jump);
break;
+ }
case BRW_OPCODE_HALT:
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
void
brw_untyped_atomic(struct brw_compile *p,
struct brw_reg dest,
- struct brw_reg mrf,
+ struct brw_reg payload,
unsigned atomic_op,
unsigned bind_table_index,
unsigned msg_length,
brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
- brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD));
brw_set_src1(p, insn, brw_imm_d(0));
brw_set_dp_untyped_atomic_message(
p, insn, atomic_op, bind_table_index, msg_length, response_length,