From 2c17f97fe6a40e4a963fb4eec0ea0555f562b1be Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 27 Nov 2013 17:57:19 -0800 Subject: [PATCH] glsl/loops: consolidate bounded loop handling into a lowering pass. Previously, all of the back-ends (ir_to_mesa, st_glsl_to_tgsi, and the i965 fs and vec4 visitors) had nearly identical logic for handling bounded loops. This replaces the duplicate logic with an equivalent lowering pass that is used by all the back-ends. Note: on i965, there is a slight increase in instruction count. For example, a loop like this: for (int i = 0; i < 100; i++) { total += i; } would previously compile down to this (vec4) native code: mov(8) g4<1>.xD 0D mov(8) g8<1>.xD 0D loop: cmp.ge.f0(8) null g8<4;4,1>.xD 100D (+f0) break(8) add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD add(8) g8<1>.xD g8<4;4,1>.xD 1D add(8) g4<1>.xD g4<4;4,1>.xD 1D while(8) loop After this patch, the "(+f0) break(8)" turns into: (+f0) if(8) break(8) endif(8) because the back-end isn't smart enough to recognize that "if (condition) break;" can be done using a conditional break instruction. However, it should be relatively easy for a future peephole optimization to properly optimize this. Reviewed-by: Jordan Justen Reviewed-by: Ian Romanick --- src/glsl/Makefile.sources | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/lower_bounded_loops.cpp | 117 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 33 +---- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 34 +---- src/mesa/program/ir_to_mesa.cpp | 42 +------ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +------ 8 files changed, 133 insertions(+), 143 deletions(-) create mode 100644 src/glsl/lower_bounded_loops.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 2e81deddabb..724c7b403ca 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -59,6 +59,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/loop_analysis.cpp \ $(GLSL_SRCDIR)/loop_controls.cpp \ $(GLSL_SRCDIR)/loop_unroll.cpp \ + $(GLSL_SRCDIR)/lower_bounded_loops.cpp \ $(GLSL_SRCDIR)/lower_clip_distance.cpp \ $(GLSL_SRCDIR)/lower_discard.cpp \ $(GLSL_SRCDIR)/lower_discard_flow.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 3ca9f574453..87c2c820b3c 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -101,6 +101,7 @@ bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); bool do_vec_index_to_swizzle(exec_list *instructions); +bool lower_bounded_loops(exec_list *instructions); bool lower_discard(exec_list *instructions); void lower_discard_flow(exec_list *instructions); bool lower_instructions(exec_list *instructions, unsigned what_to_lower); diff --git a/src/glsl/lower_bounded_loops.cpp b/src/glsl/lower_bounded_loops.cpp new file mode 100644 index 00000000000..10f272f36f8 --- /dev/null +++ b/src/glsl/lower_bounded_loops.cpp @@ -0,0 +1,117 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** \file + * + * This pass converts bounded loops (those whose ir_loop contains non-null + * values for \c from, \c to, \c increment, and \c counter) into unbounded + * loops. + * + * For instance: + * + * (loop (declare () uint i) (constant uint 0) (constant uint 4) + * (constant uint 1) + * ...loop body...) + * + * Is transformed into: + * + * (declare () uint i) + * (assign (x) (var_ref i) (constant uint 0)) + * (loop + * (if (expression bool >= (var_ref i) (constant uint 4)) + * (break) + * ()) + * ...loop body... + * (assign (x) (var_ref i) + * (expression uint + (var_ref i) (constant uint 1)))) + */ + +#include "ir_hierarchical_visitor.h" +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; + +namespace { + +class lower_bounded_loops_visitor : public ir_hierarchical_visitor { +public: + lower_bounded_loops_visitor() + : progress(false) + { + } + + virtual ir_visitor_status visit_leave(ir_loop *ir); + + bool progress; +}; + +} /* anonymous namespace */ + + +ir_visitor_status +lower_bounded_loops_visitor::visit_leave(ir_loop *ir) +{ + if (ir->counter == NULL) + return visit_continue; + + exec_list new_instructions; + ir_factory f(&new_instructions, ralloc_parent(ir)); + + /* Before the loop, declare the counter and initialize it to "from". */ + f.emit(ir->counter); + f.emit(assign(ir->counter, ir->from)); + ir->insert_before(&new_instructions); + + /* At the top of the loop, compare the counter to "to", and break if the + * comparison succeeds. + */ + ir_loop_jump *brk = new(f.mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); + ir_expression_operation cmp = (ir_expression_operation) ir->cmp; + ir->body_instructions.push_head(if_tree(expr(cmp, ir->counter, ir->to), + brk)); + + /* At the bottom of the loop, increment the counter. */ + ir->body_instructions.push_tail(assign(ir->counter, + add(ir->counter, ir->increment))); + + /* NULL out the counter, from, to, and increment variables. */ + ir->counter = NULL; + ir->from = NULL; + ir->to = NULL; + ir->increment = NULL; + + this->progress = true; + return visit_continue; +} + + +bool +lower_bounded_loops(exec_list *instructions) +{ + lower_bounded_loops_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index a28dc6cdffe..3e32f6e0eba 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2181,39 +2181,16 @@ fs_visitor::visit(ir_if *ir) void fs_visitor::visit(ir_loop *ir) { - fs_reg counter = reg_undef; + /* Any bounded loops should have been lowered by lower_bounded_loops(). */ + assert(ir->counter == NULL); if (brw->gen < 6 && dispatch_width == 16) { fail("Can't support (non-uniform) control flow on 16-wide\n"); } - if (ir->counter) { - this->base_ir = ir->counter; - ir->counter->accept(this); - counter = *(variable_storage(ir->counter)); - - if (ir->from) { - this->base_ir = ir->from; - ir->from->accept(this); - - emit(MOV(counter, this->result)); - } - } - this->base_ir = NULL; emit(BRW_OPCODE_DO); - if (ir->to) { - this->base_ir = ir->to; - ir->to->accept(this); - - emit(CMP(reg_null_d, counter, this->result, - brw_conditional_for_comparison(ir->cmp))); - - fs_inst *inst = emit(BRW_OPCODE_BREAK); - inst->predicate = BRW_PREDICATE_NORMAL; - } - foreach_list(node, &ir->body_instructions) { ir_instruction *ir = (ir_instruction *)node; @@ -2221,12 +2198,6 @@ fs_visitor::visit(ir_loop *ir) ir->accept(this); } - if (ir->increment) { - this->base_ir = ir->increment; - ir->increment->accept(this); - emit(ADD(counter, counter, this->result)); - } - this->base_ir = NULL; emit(BRW_OPCODE_WHILE); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 88aa169fc1f..12035c26793 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -213,6 +213,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) || progress; } while (progress); + lower_bounded_loops(shader->ir); + /* Make a pass over the IR to add state references for any built-in * uniforms that are used. This has to be done now (during linking). * Code generation doesn't happen until the first time this shader is diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b599c29d475..cf62e7ed0b0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1007,48 +1007,18 @@ vec4_visitor::visit(ir_variable *ir) void vec4_visitor::visit(ir_loop *ir) { - dst_reg counter; + /* Any bounded loops should have been lowered by lower_bounded_loops(). */ + assert(ir->counter == NULL); /* We don't want debugging output to print the whole body of the * loop as the annotation. */ this->base_ir = NULL; - if (ir->counter != NULL) { - this->base_ir = ir->counter; - ir->counter->accept(this); - counter = *(variable_storage(ir->counter)); - - if (ir->from != NULL) { - this->base_ir = ir->from; - ir->from->accept(this); - - emit(MOV(counter, this->result)); - } - } - emit(BRW_OPCODE_DO); - if (ir->to) { - this->base_ir = ir->to; - ir->to->accept(this); - - emit(CMP(dst_null_d(), src_reg(counter), this->result, - brw_conditional_for_comparison(ir->cmp))); - - vec4_instruction *inst = emit(BRW_OPCODE_BREAK); - inst->predicate = BRW_PREDICATE_NORMAL; - } - visit_instructions(&ir->body_instructions); - - if (ir->increment) { - this->base_ir = ir->increment; - ir->increment->accept(this); - emit(ADD(counter, src_reg(counter), this->result)); - } - emit(BRW_OPCODE_WHILE); } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index c833a12f2aa..9fd10d1d91c 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -759,49 +759,13 @@ ir_to_mesa_visitor::visit(ir_variable *ir) void ir_to_mesa_visitor::visit(ir_loop *ir) { - ir_dereference_variable *counter = NULL; - - if (ir->counter != NULL) - counter = new(mem_ctx) ir_dereference_variable(ir->counter); - - if (ir->from != NULL) { - assert(ir->counter != NULL); - - ir_assignment *a = - new(mem_ctx) ir_assignment(counter, ir->from, NULL); - - a->accept(this); - } + /* Any bounded loops should have been lowered by lower_bounded_loops(). */ + assert(ir->counter == NULL); emit(NULL, OPCODE_BGNLOOP); - if (ir->to) { - ir_expression *e = - new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(mem_ctx) ir_if(e); - - ir_loop_jump *brk = - new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); - - if_stmt->then_instructions.push_tail(brk); - - if_stmt->accept(this); - } - visit_exec_list(&ir->body_instructions, this); - if (ir->increment) { - ir_expression *e = - new(mem_ctx) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); - - ir_assignment *a = - new(mem_ctx) ir_assignment(counter, e, NULL); - - a->accept(this); - } - emit(NULL, OPCODE_ENDLOOP); } @@ -3091,6 +3055,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = lower_vector_insert(ir, true) || progress; } while (progress); + lower_bounded_loops(ir); + validate_ir_tree(ir); } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ac95968d6f5..f748ed85092 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1137,53 +1137,13 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) void glsl_to_tgsi_visitor::visit(ir_loop *ir) { - ir_dereference_variable *counter = NULL; - - if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); - - if (ir->from != NULL) { - assert(ir->counter != NULL); - - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); - - a->accept(this); - delete a; - } + /* Any bounded loops should have been lowered by lower_bounded_loops(). */ + assert(ir->counter == NULL); emit(NULL, TGSI_OPCODE_BGNLOOP); - if (ir->to) { - ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); - - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); - - if_stmt->then_instructions.push_tail(brk); - - if_stmt->accept(this); - - delete if_stmt; - delete e; - delete brk; - } - visit_exec_list(&ir->body_instructions, this); - if (ir->increment) { - ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); - - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); - - a->accept(this); - delete a; - delete e; - } - emit(NULL, TGSI_OPCODE_ENDLOOP); } @@ -5345,6 +5305,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) } while (progress); + lower_bounded_loops(ir); + validate_ir_tree(ir); } -- 2.30.2