struct brw_context *brw = p->brw;
struct brw_instruction *insn;
- gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+ if (msg_reg_nr != -1)
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
(opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
- src[1].file == GRF));
+ src[1].file == GRF) ||
+ (is_tex() && src[0].file == GRF));
}
bool
imm.u == r.imm.u);
}
+fs_reg
+fs_reg::retype(uint32_t type)
+{
+ fs_reg result = *this;
+ result.type = type;
+ return result;
+}
+
bool
fs_reg::is_zero() const
{
this->force_sechalf);
}
+int
+fs_inst::regs_read(fs_visitor *v, int arg)
+{
+ if (is_tex() && arg == 0 && src[0].file == GRF) {
+ if (v->dispatch_width == 16)
+ return (mlen + 1) / 2;
+ else
+ return mlen;
+ }
+ return 1;
+}
+
/**
* Returns how many MRFs an FS opcode will write over.
*
if (inst->mlen == 0)
return 0;
+ if (inst->base_mrf == -1)
+ return 0;
+
switch (inst->opcode) {
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
break;
}
+ if (scan_inst->mlen > 0 && scan_inst->base_mrf == -1 &&
+ scan_inst->src[0].file == GRF &&
+ scan_inst->src[0].reg == inst->dst.reg) {
+ interfered = true;
+ break;
+ }
+
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
* value, there is a 33rd bit generated for the sign in the
}
}
- if (scan_inst->mlen > 0) {
+ if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) {
/* Found a SEND instruction, which means that there are
* live values in MRFs from base_mrf to base_mrf +
* scan_inst->mlen - 1. Don't go pushing our MRF write up
last_mrf_move[inst->dst.reg] = NULL;
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
/* Found a SEND instruction, which will include two or fewer
* implied MRF writes. We could do better here.
*/
bool is_zero() const;
bool is_one() const;
bool is_valid_3src() const;
+ fs_reg retype(uint32_t type);
/** Register file: GRF, MRF, IMM. */
enum register_file file;
bool overwrites_reg(const fs_reg ®);
bool is_send_from_grf();
bool is_partial_write();
+ int regs_read(fs_visitor *v, int arg);
fs_reg dst;
fs_reg src[3];
void try_replace_with_sel();
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
- void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset,
+ int count);
void emit_fragment_program_code();
void setup_fp_regs();
if (entry->src.file == IMM)
return false;
+ if (inst->regs_read(this, arg) > 1)
+ return false;
+
if (inst->src[arg].file != entry->dst.file ||
inst->src[arg].reg != entry->dst.reg ||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
dst = vec16(dst);
}
+ if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
+ /* The send-from-GRF for 16-wide texturing with a header has an extra
+ * hardware register allocated to it, which we need to skip over (since
+ * our coordinates in the payload are in the even-numbered registers,
+ * and the header comes right before the first one).
+ */
+ assert(src.file == BRW_GENERAL_REGISTER_FILE);
+ src.nr++;
+ }
+
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
*/
if (inst->texture_offset) {
+ struct brw_reg header_reg;
+
+ if (brw->gen >= 7) {
+ header_reg = src;
+ } else {
+ assert(inst->base_mrf != -1);
+ header_reg = retype(brw_message_reg(inst->base_mrf),
+ BRW_REGISTER_TYPE_UD);
+ }
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
/* Explicitly set up the message header by copying g0 to the MRF. */
- brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* Then set the offset bits in DWord 2. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+ brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
+ header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(inst->texture_offset));
brw_pop_insn_state(p);
} else if (inst->header_present) {
+ assert(brw->gen < 7);
/* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
}
if (reg.file != GRF)
continue;
- int regs_read = 1;
- /* We don't know how many components are read in a send-from-grf,
- * so just assume "all of them."
- */
- if (inst->is_send_from_grf())
- regs_read = v->virtual_grf_sizes[reg.reg];
-
- for (int i = 0; i < regs_read; i++) {
+ for (int j = 0; j < inst->regs_read(v, i); j++) {
setup_one_read(block, inst, ip, reg);
reg.reg_offset++;
}
}
void
-fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
+fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,
+ int count)
{
- fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
- unspill_inst->offset = spill_offset;
- unspill_inst->ir = inst->ir;
- unspill_inst->annotation = inst->annotation;
+ for (int i = 0; i < count; i++) {
+ fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+ unspill_inst->offset = spill_offset;
+ unspill_inst->ir = inst->ir;
+ unspill_inst->annotation = inst->annotation;
+
+ /* Choose a MRF that won't conflict with an MRF that's live across the
+ * spill. Nothing else will make it up to MRF 14/15.
+ */
+ unspill_inst->base_mrf = 14;
+ unspill_inst->mlen = 1; /* header contains offset */
+ inst->insert_before(unspill_inst);
- /* Choose a MRF that won't conflict with an MRF that's live across the
- * spill. Nothing else will make it up to MRF 14/15.
- */
- unspill_inst->base_mrf = 14;
- unspill_inst->mlen = 1; /* header contains offset */
- inst->insert_before(unspill_inst);
+ dst.reg_offset++;
+ spill_offset += REG_SIZE;
+ }
}
int
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
inst->src[i].reg == spill_reg) {
- inst->src[i].reg = virtual_grf_alloc(1);
- emit_unspill(inst, inst->src[i],
- spill_offset + REG_SIZE * inst->src[i].reg_offset);
+ int regs_read = inst->regs_read(this, i);
+
+ inst->src[i].reg = virtual_grf_alloc(regs_read);
+ inst->src[i].reg_offset = 0;
+
+ emit_unspill(inst, inst->src[i],
+ spill_offset + REG_SIZE * inst->src[i].reg_offset,
+ regs_read);
}
}
* since we write back out all of the regs_written().
*/
if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
- fs_reg unspill_reg = inst->dst;
- for (int chan = 0; chan < inst->regs_written; chan++) {
- emit_unspill(inst, unspill_reg,
- subset_spill_offset + REG_SIZE * chan);
- unspill_reg.reg_offset++;
- }
+ emit_unspill(inst, inst->dst, subset_spill_offset,
+ inst->regs_written);
}
fs_reg spill_src = inst->dst;
fs_reg shadow_c, fs_reg lod, fs_reg lod2,
fs_reg sample_index)
{
- int mlen = 0;
- int base_mrf = 2;
int reg_width = dispatch_width / 8;
bool header_present = false;
int offsets[3];
+ fs_reg payload = fs_reg(this, glsl_type::float_type);
+ fs_reg next = payload;
+
if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
- /* * The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
+ /* For general texture offsets (no txf workaround), we need a header to
+ * put them in. Note that for 16-wide we're making space for two actual
+ * hardware registers here, so the emit will have to fix up for this.
*
* * ir4_tg4 needs to place its channel select in the header,
* for interaction with ARB_texture_swizzle
*/
header_present = true;
- mlen++;
- base_mrf--;
+ next.reg_offset++;
}
if (ir->shadow_comparitor) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
- mlen += reg_width;
+ emit(MOV(next, shadow_c));
+ next.reg_offset++;
}
/* Set up the LOD info */
case ir_tg4:
break;
case ir_txb:
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
- mlen += reg_width;
+ emit(MOV(next, lod));
+ next.reg_offset++;
break;
case ir_txl:
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
- mlen += reg_width;
+ emit(MOV(next, lod));
+ next.reg_offset++;
break;
case ir_txd: {
if (dispatch_width == 16)
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+ emit(MOV(next, coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
/* For cube map array, the coordinate is (u,v,r,ai) but there are
* only derivatives for (u, v, r).
*/
if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+ emit(MOV(next, lod));
lod.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+ emit(MOV(next, lod2));
lod2.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
}
break;
}
case ir_txs:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod));
+ next.reg_offset++;
break;
case ir_query_levels:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0)));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ next.reg_offset++;
break;
case ir_txf:
/* It appears that the ld instruction used for txf does its
}
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
- emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate, offsets[0]));
+ emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0]));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod));
+ next.reg_offset++;
for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
- emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate, offsets[i]));
+ emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i]));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
break;
case ir_txf_ms:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
+ next.reg_offset++;
/* constant zero MCS; we arrange to never actually have a compressed
* multisample surface here for now. TODO: issue ld_mcs to get this first,
* if we ever support texturing from compressed multisample surfaces
*/
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ next.reg_offset++;
/* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate));
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
break;
}
/* Set up the coordinate (except for cases where it was done above) */
if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+ emit(MOV(next, coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
}
/* Generate the SEND */
fs_inst *inst = NULL;
switch (ir->op) {
- case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
- case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
- case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
- case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
- case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
- case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
- case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
- case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst); break;
- case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
- case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
- }
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
+ case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
+ case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
+ case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
+ case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
+ case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
+ case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+ case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+ case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+ case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
+ case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break;
+ }
+ inst->base_mrf = -1;
+ if (reg_width == 2)
+ inst->mlen = next.reg_offset * reg_width - header_present;
+ else
+ inst->mlen = next.reg_offset * reg_width;
+
inst->header_present = header_present;
inst->regs_written = 4;
- if (mlen > 11) {
+ virtual_grf_sizes[payload.reg] = next.reg_offset;
+ if (inst->mlen > 11) {
fail("Message length >11 disallowed by hardware\n");
}
lod, lod2);
}
- /* The header is set up by generate_tex() when necessary. */
- inst->src[0] = reg_undef;
-
if (ir->offset != NULL && ir->op != ir_txf)
inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
+ for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
add_dep(last_grf_write[inst->src[i].reg + r], n);
} else {
add_dep(last_grf_write[inst->src[i].reg], n);
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->predicate) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
+ for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
add_dep(n, last_grf_write[inst->src[i].reg + r]);
} else {
add_dep(n, last_grf_write[inst->src[i].reg]);
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->predicate) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}