* successors is an exit node.
*/
schedule_node *exit;
-
- bool is_barrier;
};
/**
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
case SHADER_OPCODE_TYPED_ATOMIC:
/* Test code:
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
latency = is_haswell ? 300 : 600;
break;
+ case SHADER_OPCODE_SEND:
+ switch (inst->sfid) {
+ default:
+ unreachable("Unknown SFID");
+ }
+ break;
+
default:
/* 2 cycles:
* mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q };
class instruction_scheduler {
public:
instruction_scheduler(backend_shader *s, int grf_count,
- int hw_reg_count, int block_count,
+ unsigned hw_reg_count, int block_count,
instruction_scheduler_mode mode)
{
this->bs = s;
bool post_reg_alloc;
int instructions_to_schedule;
int grf_count;
- int hw_reg_count;
+ unsigned hw_reg_count;
int reg_pressure;
int block_idx;
exec_list instructions;
int payload_last_use_ip[hw_reg_count];
v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip);
- for (int i = 0; i < hw_reg_count; i++) {
+ for (unsigned i = 0; i < hw_reg_count; i++) {
if (payload_last_use_ip[i] == -1)
continue;
}
void
-vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+vec4_instruction_scheduler::count_reads_remaining(backend_instruction *)
{
}
void
-vec4_instruction_scheduler::setup_liveness(cfg_t *cfg)
+vec4_instruction_scheduler::setup_liveness(cfg_t *)
{
}
void
-vec4_instruction_scheduler::update_register_pressure(backend_instruction *be)
+vec4_instruction_scheduler::update_register_pressure(backend_instruction *)
{
}
int
-vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
+vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *)
{
return 0;
}
this->cand_generation = 0;
this->delay = 0;
this->exit = NULL;
- this->is_barrier = false;
/* We can't measure Gen6 timings directly but expect them to be much
* closer to Gen7 than Gen4.
add_dep(before, after, before->latency);
}
+static bool
+is_scheduling_barrier(const backend_instruction *inst)
+{
+ return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
+ inst->is_control_flow() ||
+ inst->has_side_effects();
+}
+
/**
* Sometimes we really want this node to execute after everything that
* was before it and before everything that followed it. This adds
schedule_node *prev = (schedule_node *)n->prev;
schedule_node *next = (schedule_node *)n->next;
- n->is_barrier = true;
-
if (prev) {
while (!prev->is_head_sentinel()) {
add_dep(prev, n, 0);
- if (prev->is_barrier)
+ if (is_scheduling_barrier(prev->inst))
break;
prev = (schedule_node *)prev->prev;
}
if (next) {
while (!next->is_tail_sentinel()) {
add_dep(n, next, 0);
- if (next->is_barrier)
+ if (is_scheduling_barrier(next->inst))
break;
next = (schedule_node *)next->next;
}
return inst->exec_size == 16;
}
-static bool
-is_scheduling_barrier(const fs_inst *inst)
-{
- return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
- inst->is_control_flow() ||
- inst->has_side_effects();
-}
-
void
fs_instruction_scheduler::calculate_deps()
{
* After register allocation, reg_offsets are gone and we track individual
* GRF registers.
*/
- schedule_node *last_grf_write[grf_count * 16];
+ schedule_node **last_grf_write;
schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
- schedule_node *last_conditional_mod[4] = {};
+ schedule_node *last_conditional_mod[8] = {};
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
*/
schedule_node *last_fixed_grf_write = NULL;
- memset(last_grf_write, 0, sizeof(last_grf_write));
+ last_grf_write = (schedule_node **)calloc(sizeof(schedule_node *), grf_count * 16);
memset(last_mrf_write, 0, sizeof(last_mrf_write));
/* top-to-bottom dependencies: RAW and WAW. */
}
/* bottom-to-top dependencies: WAR */
- memset(last_grf_write, 0, sizeof(last_grf_write));
+ memset(last_grf_write, 0, sizeof(schedule_node *) * grf_count * 16);
memset(last_mrf_write, 0, sizeof(last_mrf_write));
memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
last_accumulator_write = NULL;
last_accumulator_write = n;
}
}
-}
-static bool
-is_scheduling_barrier(const vec4_instruction *inst)
-{
- return inst->is_control_flow() ||
- inst->has_side_effects();
+ free(last_grf_write);
}
void
}
}
+ if (inst->reads_g0_implicitly())
+ add_dep(last_fixed_grf_write, n);
+
if (!inst->is_send_from_grf()) {
for (int i = 0; i < inst->mlen; i++) {
/* It looks like the MRF regs are released in the send
int
fs_instruction_scheduler::issue_time(backend_instruction *inst)
{
+ const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
if (is_compressed((fs_inst *)inst))
- return 4;
+ return 4 + overhead;
else
- return 2;
+ return 2 + overhead;
}
int
-vec4_instruction_scheduler::issue_time(backend_instruction *inst)
+vec4_instruction_scheduler::issue_time(backend_instruction *)
{
/* We always execute as two vec4s in parallel. */
return 2;