From 4746796b825d9eb607e6d0a5132339c313010146 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 May 2020 16:36:46 +1000 Subject: [PATCH] r600/sfn: add callstack non-evergreen support Reviewed-by: Gert Wollny Part-of: --- .../drivers/r600/sfn/sfn_callstack.cpp | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_callstack.cpp b/src/gallium/drivers/r600/sfn/sfn_callstack.cpp index da423a657fa..681b89d8679 100644 --- a/src/gallium/drivers/r600/sfn/sfn_callstack.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_callstack.cpp @@ -90,12 +90,40 @@ int CallStack::update_max_depth(unsigned type) elements = (stack.loop + stack.push_wqm ) * entry_size; elements += stack.push; - /* These next three lines are EVERGREEN specific and should - * be moved to a virtual function when other chipsets are to - * be supported */ - assert(m_bc.chip_class == EVERGREEN); - if (type == FC_PUSH_VPM || stack.push > 0) { - elements += 1; + switch (m_bc.chip_class) { + case R600: + case R700: + /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on + * the stack must be reserved to hold the current active/continue + * masks */ + if (type == FC_PUSH_VPM || stack.push > 0) { + elements += 2; + } + break; + case CAYMAN: + /* r9xx: any stack operation on empty stack consumes 2 additional + * elements */ + elements += 2; + break; + case EVERGREEN: + /* r8xx+: 2 extra elements are not always required, but one extra + * element must be added for each of the following cases: + * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest + * stack usage. + * (Currently we don't use ALU_ELSE_AFTER.) + * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM + * PUSH instruction executed. + * + * NOTE: it seems we also need to reserve additional element in some + * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, + * then STACK_SIZE should be 2 instead of 1 */ + if (type == FC_PUSH_VPM || stack.push > 0) { + elements += 1; + } + break; + default: + assert(0); + break; } entry_size = 4; -- 2.30.2