break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_ATOMIC:
/* Test code:
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
* mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
/* Test code:
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
class instruction_scheduler {
public:
- instruction_scheduler(backend_visitor *v, int grf_count,
+ instruction_scheduler(backend_shader *s, int grf_count,
instruction_scheduler_mode mode)
{
- this->bv = v;
+ this->bs = s;
this->mem_ctx = ralloc_context(NULL);
this->grf_count = grf_count;
this->instructions.make_empty();
this->remaining_grf_uses = NULL;
this->grf_active = NULL;
}
- v->calculate_cfg();
}
~instruction_scheduler()
int grf_count;
int time;
exec_list instructions;
- backend_visitor *bv;
+ backend_shader *bs;
instruction_scheduler_mode mode;
if (inst->dst.file == GRF) {
if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->virtual_grf_sizes[inst->dst.reg];
+ benefit += v->alloc.sizes[inst->dst.reg];
if (!grf_active[inst->dst.reg])
- benefit -= v->virtual_grf_sizes[inst->dst.reg];
+ benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
continue;
if (remaining_grf_uses[inst->src[i].reg] == 1)
- benefit += v->virtual_grf_sizes[inst->src[i].reg];
+ benefit += v->alloc.sizes[inst->src[i].reg];
if (!grf_active[inst->src[i].reg])
- benefit -= v->virtual_grf_sizes[inst->src[i].reg];
+ benefit -= v->alloc.sizes[inst->src[i].reg];
}
return benefit;
schedule_node::schedule_node(backend_instruction *inst,
instruction_scheduler *sched)
{
- struct brw_context *brw = sched->bv->brw;
+ const struct brw_device_info *devinfo = sched->bs->devinfo;
this->inst = inst;
this->child_array_size = 0;
*/
if (!sched->post_reg_alloc)
this->latency = 1;
- else if (brw->gen >= 6)
- set_latency_gen7(brw->is_haswell);
+ else if (devinfo->gen >= 6)
+ set_latency_gen7(devinfo->is_haswell);
else
set_latency_gen4();
}
/* Removing the last instruction from a basic block removes the block as
* well, so put a NOP at the end to keep it alive.
*/
- if (!block->end->is_control_flow()) {
+ if (!block->end()->is_control_flow()) {
backend_instruction *nop = new(mem_ctx) backend_instruction();
nop->opcode = BRW_OPCODE_NOP;
- block->end->insert_after(block, nop);
+ block->end()->insert_after(block, nop);
}
foreach_inst_in_block_safe(backend_instruction, inst, block) {
*/
void
instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
- int latency)
+ int latency)
{
if (!before || !after)
return;
for (int i = 0; i < before->child_count; i++) {
if (before->children[i] == after) {
- before->child_latency[i] = MAX2(before->child_latency[i], latency);
- return;
+ before->child_latency[i] = MAX2(before->child_latency[i], latency);
+ return;
}
}
if (before->child_array_size <= before->child_count) {
if (before->child_array_size < 16)
- before->child_array_size = 16;
+ before->child_array_size = 16;
else
- before->child_array_size *= 2;
+ before->child_array_size *= 2;
before->children = reralloc(mem_ctx, before->children,
- schedule_node *,
- before->child_array_size);
+ schedule_node *,
+ before->child_array_size);
before->child_latency = reralloc(mem_ctx, before->child_latency,
- int, before->child_array_size);
+ int, before->child_array_size);
}
before->children[before->child_count] = after;
if (prev) {
while (!prev->is_head_sentinel()) {
- add_dep(prev, n, 0);
- prev = (schedule_node *)prev->prev;
+ add_dep(prev, n, 0);
+ prev = (schedule_node *)prev->prev;
}
}
if (next) {
while (!next->is_tail_sentinel()) {
- add_dep(n, next, 0);
- next = (schedule_node *)next->next;
+ add_dep(n, next, 0);
+ next = (schedule_node *)next->next;
}
}
}
bool
fs_instruction_scheduler::is_compressed(fs_inst *inst)
{
- return (v->dispatch_width == 16 &&
- !inst->force_uncompressed &&
- !inst->force_sechalf);
+ return inst->exec_size == 16;
}
void
/* read-after-write deps. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
+ for (int r = 0; r < inst->regs_read(i); r++)
add_dep(last_grf_write[inst->src[i].reg + r], n);
} else {
- for (int r = 0; r < inst->regs_read(v, i); r++) {
+ for (int r = 0; r < inst->regs_read(i); r++) {
add_dep(last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], n);
}
}
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
- if (post_reg_alloc) {
+ } else if (inst->src[i].file == HW_REG &&
+ (inst->src[i].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE)) {
+ if (post_reg_alloc) {
int size = reg_width;
if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
size = 1;
}
} else if (inst->src[i].is_accumulator()) {
add_dep(last_accumulator_write, n);
- } else if (inst->src[i].file != BAD_FILE &&
- inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
+ } else if (inst->src[i].file != BAD_FILE &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
inst->src[i].fixed_hw_reg.file != IMM)) {
- assert(inst->src[i].file != MRF);
- add_barrier_deps(n);
- }
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
}
if (inst->base_mrf != -1) {
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
- }
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->reads_flag()) {
- add_dep(last_conditional_mod[inst->flag_subreg], n);
+ add_dep(last_conditional_mod[inst->flag_subreg], n);
}
if (inst->reads_accumulator_implicitly()) {
/* write-after-write deps. */
if (inst->dst.file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written * reg_width; r++) {
+ for (int r = 0; r < inst->regs_written; r++) {
add_dep(last_grf_write[inst->dst.reg + r], n);
last_grf_write[inst->dst.reg + r] = n;
}
}
}
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
-
- add_dep(last_mrf_write[reg], n);
- last_mrf_write[reg] = n;
- if (is_compressed(inst)) {
- if (inst->dst.reg & BRW_MRF_COMPR4)
- reg += 4;
- else
- reg++;
- add_dep(last_mrf_write[reg], n);
- last_mrf_write[reg] = n;
- }
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+
+ add_dep(last_mrf_write[reg], n);
+ last_mrf_write[reg] = n;
+ if (is_compressed(inst)) {
+ if (inst->dst.reg & BRW_MRF_COMPR4)
+ reg += 4;
+ else
+ reg++;
+ add_dep(last_mrf_write[reg], n);
+ last_mrf_write[reg] = n;
+ }
} else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
if (post_reg_alloc) {
for (int r = 0; r < reg_width; r++)
last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
- add_barrier_deps(n);
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
+ add_barrier_deps(n);
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
- for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
- add_dep(last_mrf_write[inst->base_mrf + i], n);
- last_mrf_write[inst->base_mrf + i] = n;
- }
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
}
if (inst->writes_flag()) {
- add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
- last_conditional_mod[inst->flag_subreg] = n;
+ add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
+ last_conditional_mod[inst->flag_subreg] = n;
}
- if (inst->writes_accumulator_implicitly(v->brw) &&
+ if (inst->writes_accumulator_implicitly(v->devinfo) &&
!inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
exec_node *node;
exec_node *prev;
for (node = instructions.get_tail(), prev = node->prev;
- !node->is_head_sentinel();
- node = prev, prev = node->prev) {
+ !node->is_head_sentinel();
+ node = prev, prev = node->prev) {
schedule_node *n = (schedule_node *)node;
fs_inst *inst = (fs_inst *)n->inst;
/* write-after-read deps. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
+ for (int r = 0; r < inst->regs_read(i); r++)
add_dep(n, last_grf_write[inst->src[i].reg + r]);
} else {
- for (int r = 0; r < inst->regs_read(v, i); r++) {
+ for (int r = 0; r < inst->regs_read(i); r++) {
add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]);
}
}
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
- if (post_reg_alloc) {
+ } else if (inst->src[i].file == HW_REG &&
+ (inst->src[i].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE)) {
+ if (post_reg_alloc) {
int size = reg_width;
if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
size = 1;
} else if (inst->src[i].is_accumulator()) {
add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
- inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
inst->src[i].fixed_hw_reg.file != IMM)) {
- assert(inst->src[i].file != MRF);
- add_barrier_deps(n);
- }
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
}
if (inst->base_mrf != -1) {
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
- }
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->reads_flag()) {
- add_dep(n, last_conditional_mod[inst->flag_subreg]);
+ add_dep(n, last_conditional_mod[inst->flag_subreg]);
}
if (inst->reads_accumulator_implicitly()) {
*/
if (inst->dst.file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written * reg_width; r++)
+ for (int r = 0; r < inst->regs_written; r++)
last_grf_write[inst->dst.reg + r] = n;
} else {
for (int r = 0; r < inst->regs_written; r++) {
}
}
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
- last_mrf_write[reg] = n;
+ last_mrf_write[reg] = n;
- if (is_compressed(inst)) {
- if (inst->dst.reg & BRW_MRF_COMPR4)
- reg += 4;
- else
- reg++;
+ if (is_compressed(inst)) {
+ if (inst->dst.reg & BRW_MRF_COMPR4)
+ reg += 4;
+ else
+ reg++;
- last_mrf_write[reg] = n;
- }
+ last_mrf_write[reg] = n;
+ }
} else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
if (post_reg_alloc) {
for (int r = 0; r < reg_width; r++)
last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
}
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
- add_barrier_deps(n);
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
+ add_barrier_deps(n);
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
- for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
- last_mrf_write[inst->base_mrf + i] = n;
- }
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
}
if (inst->writes_flag()) {
- last_conditional_mod[inst->flag_subreg] = n;
+ last_conditional_mod[inst->flag_subreg] = n;
}
- if (inst->writes_accumulator_implicitly(v->brw)) {
+ if (inst->writes_accumulator_implicitly(v->devinfo)) {
last_accumulator_write = n;
}
}
/* read-after-write deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(last_grf_write[inst->src[i].reg], n);
+ for (unsigned j = 0; j < inst->regs_read(i); ++j)
+ add_dep(last_grf_write[inst->src[i].reg + j], n);
} else if (inst->src[i].file == HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
+ if (!inst->is_send_from_grf()) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->reads_flag()) {
/* write-after-write deps. */
if (inst->dst.file == GRF) {
- add_dep(last_grf_write[inst->dst.reg], n);
- last_grf_write[inst->dst.reg] = n;
+ for (unsigned j = 0; j < inst->regs_written; ++j) {
+ add_dep(last_grf_write[inst->dst.reg + j], n);
+ last_grf_write[inst->dst.reg + j] = n;
+ }
} else if (inst->dst.file == MRF) {
add_dep(last_mrf_write[inst->dst.reg], n);
last_mrf_write[inst->dst.reg] = n;
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && !inst->is_send_from_grf()) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
last_conditional_mod = n;
}
- if (inst->writes_accumulator_implicitly(v->brw) &&
+ if (inst->writes_accumulator_implicitly(v->devinfo) &&
!inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
/* write-after-read deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(n, last_grf_write[inst->src[i].reg]);
+ for (unsigned j = 0; j < inst->regs_read(i); ++j)
+ add_dep(n, last_grf_write[inst->src[i].reg + j]);
} else if (inst->src[i].file == HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ if (!inst->is_send_from_grf()) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->reads_flag()) {
* can mark this as WAR dependency.
*/
if (inst->dst.file == GRF) {
- last_grf_write[inst->dst.reg] = n;
+ for (unsigned j = 0; j < inst->regs_written; ++j)
+ last_grf_write[inst->dst.reg + j] = n;
} else if (inst->dst.file == MRF) {
last_mrf_write[inst->dst.reg] = n;
} else if (inst->dst.file == HW_REG &&
last_fixed_grf_write = n;
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
- } else if (inst->dst.file != BAD_FILE) {
+ } else if (inst->dst.file != BAD_FILE &&
+ !inst->dst.is_null()) {
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && !inst->is_send_from_grf()) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}
last_conditional_mod = n;
}
- if (inst->writes_accumulator_implicitly(v->brw)) {
+ if (inst->writes_accumulator_implicitly(v->devinfo)) {
last_accumulator_write = n;
}
}
schedule_node *
fs_instruction_scheduler::choose_instruction_to_schedule()
{
- struct brw_context *brw = v->brw;
schedule_node *chosen = NULL;
if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
* then the MRFs for the next SEND, then the next SEND, then the
* MRFs, etc., without ever consuming the results of a send.
*/
- if (brw->gen < 7) {
+ if (v->devinfo->gen < 7) {
fs_inst *chosen_inst = (fs_inst *)chosen->inst;
/* We use regs_written > 1 as our test for the kind of send
* single-result send is probably actually reducing register
* pressure.
*/
- if (inst->regs_written <= 1 && chosen_inst->regs_written > 1) {
+ if (inst->regs_written <= inst->exec_size / 8 &&
+ chosen_inst->regs_written > chosen_inst->exec_size / 8) {
chosen = n;
continue;
} else if (inst->regs_written > chosen_inst->regs_written) {
void
instruction_scheduler::schedule_instructions(bblock_t *block)
{
- struct brw_context *brw = bv->brw;
- backend_instruction *inst = block->end;
+ const struct brw_device_info *devinfo = bs->devinfo;
+ backend_instruction *inst = block->end();
time = 0;
/* Remove non-DAG heads from the list. */
foreach_in_list_safe(schedule_node, n, &instructions) {
if (n->parent_count != 0)
- n->remove();
+ n->remove();
}
unsigned cand_generation = 1;
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
- bv->dump_instruction(chosen->inst);
+ bs->dump_instruction(chosen->inst);
}
/* Now that we've scheduled a new instruction, some of its
* DAG edge as we do so.
*/
for (int i = chosen->child_count - 1; i >= 0; i--) {
- schedule_node *child = chosen->children[i];
+ schedule_node *child = chosen->children[i];
- child->unblocked_time = MAX2(child->unblocked_time,
- time + chosen->child_latency[i]);
+ child->unblocked_time = MAX2(child->unblocked_time,
+ time + chosen->child_latency[i]);
if (debug) {
fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count);
- bv->dump_instruction(child->inst);
+ bs->dump_instruction(child->inst);
}
child->cand_generation = cand_generation;
- child->parent_count--;
- if (child->parent_count == 0) {
+ child->parent_count--;
+ if (child->parent_count == 0) {
if (debug) {
fprintf(stderr, "\t\tnow available\n");
}
- instructions.push_head(child);
- }
+ instructions.push_head(child);
+ }
}
cand_generation++;
* the next math instruction isn't going to make progress until the first
* is done.
*/
- if (brw->gen < 6 && chosen->inst->is_math()) {
+ if (devinfo->gen < 6 && chosen->inst->is_math()) {
foreach_in_list(schedule_node, n, &instructions) {
- if (n->inst->is_math())
- n->unblocked_time = MAX2(n->unblocked_time,
- time + chosen->latency);
- }
+ if (n->inst->is_math())
+ n->unblocked_time = MAX2(n->unblocked_time,
+ time + chosen->latency);
+ }
}
}
- if (block->end->opcode == BRW_OPCODE_NOP)
- block->end->remove(block);
+ if (block->end()->opcode == BRW_OPCODE_NOP)
+ block->end()->remove(block);
assert(instructions_to_schedule == 0);
}
if (debug) {
fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
/* Populate the remaining GRF uses array to improve the pre-regalloc
if (debug) {
fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
}
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
- grf_count = virtual_grf_count;
+ grf_count = alloc.count;
fs_instruction_scheduler sched(this, grf_count, mode);
sched.run(cfg);
- if (unlikely(INTEL_DEBUG & DEBUG_WM) && mode == SCHEDULE_POST) {
- fprintf(stderr, "fs%d estimated execution time: %d cycles\n",
- dispatch_width, sched.time);
+ if (unlikely(debug_enabled) && mode == SCHEDULE_POST) {
+ fprintf(stderr, "%s%d estimated execution time: %d cycles\n",
+ stage_abbrev, dispatch_width, sched.time);
}
invalidate_live_intervals();
vec4_instruction_scheduler sched(this, prog_data->total_grf);
sched.run(cfg);
- if (unlikely(debug_flag)) {
- fprintf(stderr, "vec4 estimated execution time: %d cycles\n", sched.time);
+ if (unlikely(debug_enabled)) {
+ fprintf(stderr, "%s estimated execution time: %d cycles\n",
+ stage_abbrev, sched.time);
}
invalidate_live_intervals();