From 7ea3baa64da061f86a50c41081a26e0c2859e99c Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 29 Nov 2013 00:16:43 -0800 Subject: [PATCH] glsl/loops: Stop creating normatively bound loops in loop_controls. Previously, when loop_controls analyzed a loop and found that it had a fixed bound (known at compile time), it would remove all of the loop terminators and instead set the loop's normative_bound field to force the loop to execute the correct number of times. This made loop unrolling easy, but it had a serious disadvantage. Since most GPU's don't have a native mechanism for executing a loop a fixed number of times, in order to implement the normative bound, the back-ends would have to synthesize a new loop induction variable. As a result, many loops wound up having two induction variables instead of one. This caused extra register pressure and unnecessary instructions. This patch modifies loop_controls so that it doesn't set the loop's normative_bound anymore. Instead it leaves one of the terminators in the loop (the limiting terminator), so the back-end doesn't have to go to any extra work to ensure the loop terminates at the right time. This complicates loop unrolling slightly: when deciding whether a loop can be unrolled, we have to account for the presence of the limiting terminator. And when we do unroll the loop, we have to remove the limiting terminator first. For an example of how this results in more efficient back end code, consider the loop: for (int i = 0; i < 100; i++) { total += i; } Previous to this patch, on i965, this loop would compile down to this (vec4) native code: mov(8) g4<1>.xD 0D mov(8) g8<1>.xD 0D loop: cmp.ge.f0(8) null g8<4;4,1>.xD 100D (+f0) if(8) break(8) endif(8) add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD add(8) g8<1>.xD g8<4;4,1>.xD 1D add(8) g4<1>.xD g4<4;4,1>.xD 1D while(8) loop (notice that both g8 and g4 are loop induction variables; one is used to terminate the loop, and the other is used to accumulate the total). After this patch, the same loop compiles to: mov(8) g4<1>.xD 0D loop: cmp.ge.f0(8) null g4<4;4,1>.xD 100D (+f0) if(8) break(8) endif(8) add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD add(8) g4<1>.xD g4<4;4,1>.xD 1D while(8) loop Reviewed-by: Ian Romanick --- src/glsl/loop_analysis.h | 9 +++------ src/glsl/loop_controls.cpp | 25 +++++++++++++++---------- src/glsl/loop_unroll.cpp | 27 +++++++++++++++++++++++---- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/glsl/loop_analysis.h b/src/glsl/loop_analysis.h index ae980567c06..6799f876cd7 100644 --- a/src/glsl/loop_analysis.h +++ b/src/glsl/loop_analysis.h @@ -39,15 +39,12 @@ analyze_loop_variables(exec_list *instructions); /** * Fill in loop control fields * - * Based on analysis of loop variables, this function tries to remove sequences - * in the loop of the form + * Based on analysis of loop variables, this function tries to remove + * redundant sequences in the loop of the form * * (if (expression bool ...) (break)) * - * and fill in the \c normative_bound field of the \c ir_loop. - * - * In this process, some conditional break-statements may be eliminated - * altogether. For example, if it is provable that one loop exit condition will + * For example, if it is provable that one loop exit condition will * always be satisfied before another, the unnecessary exit condition will be * removed. */ diff --git a/src/glsl/loop_controls.cpp b/src/glsl/loop_controls.cpp index 9e32ff5d5fa..385c2031c42 100644 --- a/src/glsl/loop_controls.cpp +++ b/src/glsl/loop_controls.cpp @@ -195,16 +195,19 @@ loop_control_visitor::visit_leave(ir_loop *ir) } /* If the limiting terminator has a lower iteration count than the - * normative loop bound (if any), then make this a normatively bounded - * loop with the new iteration count. + * normative loop bound (if any), then the loop doesn't need a normative + * bound anymore. */ - if (ir->normative_bound < 0 || iterations < ir->normative_bound) - ir->normative_bound = iterations; + if (ir->normative_bound >= 0 && iterations < ir->normative_bound) + ir->normative_bound = -1; } /* Remove the conditional break statements associated with all terminators - * that are associated with a fixed iteration count; the normative bound - * will take care of terminating the loop. + * that are associated with a fixed iteration count, except for the one + * associated with the limiting terminator--that one needs to stay, since + * it terminates the loop. Exception: if the loop still has a normative + * bound, then that terminates the loop, so we don't even need the limiting + * terminator. */ foreach_list(node, &ls->terminators) { loop_terminator *t = (loop_terminator *) node; @@ -212,12 +215,14 @@ loop_control_visitor::visit_leave(ir_loop *ir) if (t->iterations < 0) continue; - t->ir->remove(); + if (ir->normative_bound >= 0 || t != ls->limiting_terminator) { + t->ir->remove(); - assert(ls->num_loop_jumps > 0); - ls->num_loop_jumps--; + assert(ls->num_loop_jumps > 0); + ls->num_loop_jumps--; - this->progress = true; + this->progress = true; + } } return visit_continue; diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp index 9472bc5a510..4645dcbab19 100644 --- a/src/glsl/loop_unroll.cpp +++ b/src/glsl/loop_unroll.cpp @@ -228,6 +228,9 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) loop_variable_state *const ls = this->state->get(ir); int iterations; + /* Note: normatively-bounded loops aren't created anymore. */ + assert(ir->normative_bound < 0); + /* If we've entered a loop that hasn't been analyzed, something really, * really bad has happened. */ @@ -239,10 +242,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) /* Don't try to unroll loops where the number of iterations is not known * at compile-time. */ - if (ir->normative_bound < 0) + if (ls->limiting_terminator == NULL) return visit_continue; - iterations = ir->normative_bound; + iterations = ls->limiting_terminator->iterations; /* Don't try to unroll loops that have zillions of iterations either. */ @@ -256,10 +259,17 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) if (count.fail || count.nodes * iterations > (int)max_iterations * 5) return visit_continue; - if (ls->num_loop_jumps > 1) + /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. + * We'll be removing the limiting terminator before we unroll. + */ + assert(ls->num_loop_jumps > 0); + unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; + + if (predicted_num_loop_jumps > 1) return visit_continue; - if (ls->num_loop_jumps == 0) { + if (predicted_num_loop_jumps == 0) { + ls->limiting_terminator->ir->remove(); simple_unroll(ir, iterations); return visit_continue; } @@ -274,6 +284,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) */ last_ir->remove(); + ls->limiting_terminator->ir->remove(); simple_unroll(ir, 1); return visit_continue; } @@ -282,6 +293,12 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) /* recognize loops in the form produced by ir_lower_jumps */ ir_instruction *cur_ir = (ir_instruction *) node; + /* Skip the limiting terminator, since it will go away when we + * unroll. + */ + if (cur_ir == ls->limiting_terminator->ir) + continue; + ir_if *ir_if = cur_ir->as_if(); if (ir_if != NULL) { /* Determine which if-statement branch, if any, ends with a @@ -296,6 +313,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) (ir_instruction *) ir_if->then_instructions.get_tail(); if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); splice_post_if_instructions(ir_if, &ir_if->else_instructions); ir_if_last->remove(); complex_unroll(ir, iterations, false); @@ -305,6 +323,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) (ir_instruction *) ir_if->else_instructions.get_tail(); if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); splice_post_if_instructions(ir_if, &ir_if->then_instructions); ir_if_last->remove(); complex_unroll(ir, iterations, true); -- 2.30.2