unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
unsigned &ifs, unsigned add) {
unsigned stack_elements = add;
- bool has_non_wqm_push_with_loops_on_stack = false;
bool has_non_wqm_push = (add != 0);
region_node *r = n->is_region() ?
static_cast<region_node*>(n) : n->get_parent_region();
while (r) {
if (r->is_loop()) {
++loops;
- if (has_non_wqm_push)
- has_non_wqm_push_with_loops_on_stack = true;
} else {
++ifs;
has_non_wqm_push = true;
switch (ctx.hw_class) {
case HW_CLASS_R600:
case HW_CLASS_R700:
+ // If any non-WQM push is invoked, 2 elements should be reserved.
if (has_non_wqm_push)
stack_elements += 2;
break;
case HW_CLASS_CAYMAN:
+ // If any stack operation is invoked, 2 elements should be reserved
if (stack_elements)
stack_elements += 2;
break;
case HW_CLASS_EVERGREEN:
- if (has_non_wqm_push_with_loops_on_stack)
+ // According to the docs we need to reserve 1 element for each of the
+ // following cases:
+ // 1) non-WQM push is used with WQM/LOOP frames on stack
+ // 2) ALU_ELSE_AFTER is used at the point of max stack usage
+ // NOTE:
+ // It was found that the conditions above are not sufficient, there are
+ // other cases where we also need to reserve stack space, that's why
+ // we always reserve 1 stack element if we have non-WQM push on stack.
+ // Condition 2 is ignored for now because we don't use this instruction.
+ if (has_non_wqm_push)
++stack_elements;
break;
}