int *slots;
gpir_node_type type;
bool spillless;
+ bool schedule_first;
bool may_consume_two_slots;
} gpir_op_info;
*
* (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
* alu_num_slot_needed_by_max +
- * alu_num_slot_needed_by_next_max
+ * max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
* (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
* alu_num_slot_neede_by_non_cplx_store
+ *
+ * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
+ * nodes for the next instruction) but when there is a complex1 node in
+ * this instruction it reduces to 4 to reserve a slot for complex2 in the
+ * next instruction.
*/
int alu_num_slot_needed_by_store;
int alu_num_slot_needed_by_non_cplx_store;
int alu_num_slot_needed_by_max;
- int alu_num_slot_needed_by_next_max;
+ int alu_num_unscheduled_next_max;
+ int alu_max_allowed_next_max;
/* Used to communicate to the scheduler how many slots need to be cleared
* up in order to satisfy the invariants.
instr->index = block->sched.instr_index++;
instr->alu_num_slot_free = 6;
instr->alu_non_cplx_slot_free = 5;
+ instr->alu_max_allowed_next_max = 5;
list_add(&instr->list, &block->instr_list);
return instr;
int non_cplx_store_reduce_slot = 0;
int max_reduce_slot = node->sched.max_node ? 1 : 0;
int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
+ int alu_new_max_allowed_next_max =
+ node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max;
/* check if this node is child of one store node.
* complex1 won't be any of this instr's store node's child,
int slot_difference =
instr->alu_num_slot_needed_by_store - store_reduce_slot +
instr->alu_num_slot_needed_by_max - max_reduce_slot +
- MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) -
+ MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot -
+ alu_new_max_allowed_next_max, 0) -
(instr->alu_num_slot_free - consume_slot);
if (slot_difference > 0) {
gpir_debug("failed %d because of alu slot\n", node->index);
instr->alu_num_slot_needed_by_store -= store_reduce_slot;
instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot;
instr->alu_num_slot_needed_by_max -= max_reduce_slot;
- instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot;
+ instr->alu_num_unscheduled_next_max -= next_max_reduce_slot;
+ instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max;
return true;
}
if (node->sched.max_node)
instr->alu_num_slot_needed_by_max++;
if (node->sched.next_max_node)
- instr->alu_num_slot_needed_by_next_max++;
+ instr->alu_num_unscheduled_next_max++;
+ if (node->op == gpir_op_complex1)
+ instr->alu_max_allowed_next_max = 5;
}
static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
*/
int slot_difference = instr->alu_num_slot_needed_by_store + 1
+ instr->alu_num_slot_needed_by_max +
- MAX2(instr->alu_num_slot_needed_by_next_max, 0) -
+ MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) -
instr->alu_num_slot_free;
if (slot_difference > 0) {
instr->slot_difference = slot_difference;
.name = "complex2",
.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
.spillless = true,
+ .schedule_first = true,
},
[gpir_op_add] = {
.name = "add",
.name = "rcp_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
+ .schedule_first = true,
},
[gpir_op_rsqrt_impl] = {
.name = "rsqrt_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
+ .schedule_first = true,
},
[gpir_op_load_uniform] = {
.name = "ld_uni",
struct list_head *insert_pos = &ctx->ready_list;
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
- if (insert_node->sched.dist > node->sched.dist) {
+ if (insert_node->sched.dist > node->sched.dist ||
+ gpir_op_infos[insert_node->op].schedule_first) {
insert_pos = &node->list;
break;
}
}
if (node->sched.next_max_node) {
node->sched.next_max_node = false;
- ctx->instr->alu_num_slot_needed_by_next_max--;
+ ctx->instr->alu_num_unscheduled_next_max--;
}
}
}
static void sched_find_max_nodes(sched_ctx *ctx)
{
- ctx->instr->alu_num_slot_needed_by_next_max = -5;
+ ctx->instr->alu_num_unscheduled_next_max = 0;
ctx->instr->alu_num_slot_needed_by_max = 0;
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (node->sched.max_node)
ctx->instr->alu_num_slot_needed_by_max++;
if (node->sched.next_max_node)
- ctx->instr->alu_num_slot_needed_by_next_max++;
+ ctx->instr->alu_num_unscheduled_next_max++;
}
}
static void verify_max_nodes(sched_ctx *ctx)
{
int alu_num_slot_needed_by_max = 0;
- int alu_num_slot_needed_by_next_max = -5;
+ int alu_num_unscheduled_next_max = 0;
int alu_num_slot_needed_by_store = 0;
int alu_num_slot_needed_by_non_cplx_store = 0;
+ int alu_max_allowed_next_max = 5;
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (!gpir_is_input_node(node))
if (node->sched.max_node)
alu_num_slot_needed_by_max++;
if (node->sched.next_max_node)
- alu_num_slot_needed_by_next_max++;
+ alu_num_unscheduled_next_max++;
if (used_by_store(node, ctx->instr)) {
alu_num_slot_needed_by_store++;
if (node->sched.next_max_node && !node->sched.complex_allowed)
}
}
+ if (ctx->instr->slots[GPIR_INSTR_SLOT_MUL0] &&
+ ctx->instr->slots[GPIR_INSTR_SLOT_MUL0]->op == gpir_op_complex1)
+ alu_max_allowed_next_max = 4;
+
assert(ctx->instr->alu_num_slot_needed_by_max == alu_num_slot_needed_by_max);
- assert(ctx->instr->alu_num_slot_needed_by_next_max == alu_num_slot_needed_by_next_max);
+ assert(ctx->instr->alu_num_unscheduled_next_max == alu_num_unscheduled_next_max);
+ assert(ctx->instr->alu_max_allowed_next_max == alu_max_allowed_next_max);
assert(ctx->instr->alu_num_slot_needed_by_store == alu_num_slot_needed_by_store);
assert(ctx->instr->alu_num_slot_needed_by_non_cplx_store ==
alu_num_slot_needed_by_non_cplx_store);
- assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_slot_needed_by_next_max, 0));
+ assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0));
assert(ctx->instr->alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + alu_num_slot_needed_by_non_cplx_store);
}
score = schedule_try_node(ctx, node, true);
}
+ /* schedule_first nodes must be scheduled if possible */
+ if (gpir_op_infos[node->op].schedule_first && score != INT_MIN) {
+ best_node = node;
+ best_score = score;
+ break;
+ }
+
if (score > best_score) {
best_score = score;
best_node = node;
* need to insert the move.
*/
- if (ctx->instr->alu_num_slot_needed_by_next_max > 0) {
+ if (ctx->instr->alu_num_unscheduled_next_max >
+ ctx->instr->alu_max_allowed_next_max) {
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (!can_place_move(ctx, node))
continue;