break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_ATOMIC:
/* Test code:
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
* mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
/* Test code:
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
class instruction_scheduler {
public:
- instruction_scheduler(backend_visitor *v, int grf_count,
+ instruction_scheduler(backend_shader *s, int grf_count,
instruction_scheduler_mode mode)
{
- this->bv = v;
+ this->bs = s;
this->mem_ctx = ralloc_context(NULL);
this->grf_count = grf_count;
this->instructions.make_empty();
int grf_count;
int time;
exec_list instructions;
- backend_visitor *bv;
+ backend_shader *bs;
instruction_scheduler_mode mode;
if (inst->dst.file == GRF) {
if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->virtual_grf_sizes[inst->dst.reg];
+ benefit += v->alloc.sizes[inst->dst.reg];
if (!grf_active[inst->dst.reg])
- benefit -= v->virtual_grf_sizes[inst->dst.reg];
+ benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
continue;
if (remaining_grf_uses[inst->src[i].reg] == 1)
- benefit += v->virtual_grf_sizes[inst->src[i].reg];
+ benefit += v->alloc.sizes[inst->src[i].reg];
if (!grf_active[inst->src[i].reg])
- benefit -= v->virtual_grf_sizes[inst->src[i].reg];
+ benefit -= v->alloc.sizes[inst->src[i].reg];
}
return benefit;
schedule_node::schedule_node(backend_instruction *inst,
instruction_scheduler *sched)
{
- struct brw_context *brw = sched->bv->brw;
+ const struct brw_device_info *devinfo = sched->bs->devinfo;
this->inst = inst;
this->child_array_size = 0;
*/
if (!sched->post_reg_alloc)
this->latency = 1;
- else if (brw->gen >= 6)
- set_latency_gen7(brw->is_haswell);
+ else if (devinfo->gen >= 6)
+ set_latency_gen7(devinfo->is_haswell);
else
set_latency_gen4();
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(v, i); r++)
+ for (int r = 0; r < inst->regs_read(i); r++)
add_dep(last_grf_write[inst->src[i].reg + r], n);
} else {
- for (int r = 0; r < inst->regs_read(v, i); r++) {
+ for (int r = 0; r < inst->regs_read(i); r++) {
add_dep(last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], n);
}
}
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
last_conditional_mod[inst->flag_subreg] = n;
}
- if (inst->writes_accumulator_implicitly(v->brw) &&
+ if (inst->writes_accumulator_implicitly(v->devinfo) &&
!inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(v, i); r++)
+ for (int r = 0; r < inst->regs_read(i); r++)
add_dep(n, last_grf_write[inst->src[i].reg + r]);
} else {
- for (int r = 0; r < inst->regs_read(v, i); r++) {
+ for (int r = 0; r < inst->regs_read(i); r++) {
add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]);
}
}
}
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
last_conditional_mod[inst->flag_subreg] = n;
}
- if (inst->writes_accumulator_implicitly(v->brw)) {
+ if (inst->writes_accumulator_implicitly(v->devinfo)) {
last_accumulator_write = n;
}
}
/* read-after-write deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(last_grf_write[inst->src[i].reg], n);
+ for (unsigned j = 0; j < inst->regs_read(i); ++j)
+ add_dep(last_grf_write[inst->src[i].reg + j], n);
} else if (inst->src[i].file == HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
+ if (!inst->is_send_from_grf()) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->reads_flag()) {
/* write-after-write deps. */
if (inst->dst.file == GRF) {
- add_dep(last_grf_write[inst->dst.reg], n);
- last_grf_write[inst->dst.reg] = n;
+ for (unsigned j = 0; j < inst->regs_written; ++j) {
+ add_dep(last_grf_write[inst->dst.reg + j], n);
+ last_grf_write[inst->dst.reg + j] = n;
+ }
} else if (inst->dst.file == MRF) {
add_dep(last_mrf_write[inst->dst.reg], n);
last_mrf_write[inst->dst.reg] = n;
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && !inst->is_send_from_grf()) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
last_conditional_mod = n;
}
- if (inst->writes_accumulator_implicitly(v->brw) &&
+ if (inst->writes_accumulator_implicitly(v->devinfo) &&
!inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
/* write-after-read deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(n, last_grf_write[inst->src[i].reg]);
+ for (unsigned j = 0; j < inst->regs_read(i); ++j)
+ add_dep(n, last_grf_write[inst->src[i].reg + j]);
} else if (inst->src[i].file == HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ if (!inst->is_send_from_grf()) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->reads_flag()) {
* can mark this as WAR dependency.
*/
if (inst->dst.file == GRF) {
- last_grf_write[inst->dst.reg] = n;
+ for (unsigned j = 0; j < inst->regs_written; ++j)
+ last_grf_write[inst->dst.reg + j] = n;
} else if (inst->dst.file == MRF) {
last_mrf_write[inst->dst.reg] = n;
} else if (inst->dst.file == HW_REG &&
last_fixed_grf_write = n;
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && !inst->is_send_from_grf()) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}
last_conditional_mod = n;
}
- if (inst->writes_accumulator_implicitly(v->brw)) {
+ if (inst->writes_accumulator_implicitly(v->devinfo)) {
last_accumulator_write = n;
}
}
schedule_node *
fs_instruction_scheduler::choose_instruction_to_schedule()
{
- struct brw_context *brw = v->brw;
schedule_node *chosen = NULL;
if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
* then the MRFs for the next SEND, then the next SEND, then the
* MRFs, etc., without ever consuming the results of a send.
*/
- if (brw->gen < 7) {
+ if (v->devinfo->gen < 7) {
fs_inst *chosen_inst = (fs_inst *)chosen->inst;
/* We use regs_written > 1 as our test for the kind of send
* single-result send is probably actually reducing register
* pressure.
*/
- if (inst->regs_written <= inst->dst.width / 8 &&
- chosen_inst->regs_written > chosen_inst->dst.width / 8) {
+ if (inst->regs_written <= inst->exec_size / 8 &&
+ chosen_inst->regs_written > chosen_inst->exec_size / 8) {
chosen = n;
continue;
} else if (inst->regs_written > chosen_inst->regs_written) {
void
instruction_scheduler::schedule_instructions(bblock_t *block)
{
- struct brw_context *brw = bv->brw;
+ const struct brw_device_info *devinfo = bs->devinfo;
backend_instruction *inst = block->end();
time = 0;
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
- bv->dump_instruction(chosen->inst);
+ bs->dump_instruction(chosen->inst);
}
/* Now that we've scheduled a new instruction, some of its
if (debug) {
fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count);
- bv->dump_instruction(child->inst);
+ bs->dump_instruction(child->inst);
}
child->cand_generation = cand_generation;
* the next math instruction isn't going to make progress until the first
* is done.
*/
- if (brw->gen < 6 && chosen->inst->is_math()) {
+ if (devinfo->gen < 6 && chosen->inst->is_math()) {
foreach_in_list(schedule_node, n, &instructions) {
if (n->inst->is_math())
n->unblocked_time = MAX2(n->unblocked_time,
if (debug) {
fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
/* Populate the remaining GRF uses array to improve the pre-regalloc
if (debug) {
fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
}
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
- grf_count = virtual_grf_count;
+ grf_count = alloc.count;
fs_instruction_scheduler sched(this, grf_count, mode);
sched.run(cfg);
- if (unlikely(INTEL_DEBUG & DEBUG_WM) && mode == SCHEDULE_POST) {
- fprintf(stderr, "fs%d estimated execution time: %d cycles\n",
- dispatch_width, sched.time);
+ if (unlikely(debug_enabled) && mode == SCHEDULE_POST) {
+ fprintf(stderr, "%s%d estimated execution time: %d cycles\n",
+ stage_abbrev, dispatch_width, sched.time);
}
invalidate_live_intervals();
vec4_instruction_scheduler sched(this, prog_data->total_grf);
sched.run(cfg);
- if (unlikely(debug_flag)) {
- fprintf(stderr, "vec4 estimated execution time: %d cycles\n", sched.time);
+ if (unlikely(debug_enabled)) {
+ fprintf(stderr, "%s estimated execution time: %d cycles\n",
+ stage_abbrev, sched.time);
}
invalidate_live_intervals();