X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fglsl%2Fopt_vectorize.cpp;h=2f71a83583b1f5f7c4078cd7f6d6895afe9d94e3;hb=c8bc8d723598ec87bbce9a2439075dfe1612a359;hp=9ca811a8603a8677f2160bd52d51a91f729ac598;hpb=4bd6e0d7c69b304be88996a6c2b96ce7d996e627;p=mesa.git diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp index 9ca811a8603..2f71a83583b 100644 --- a/src/glsl/opt_vectorize.cpp +++ b/src/glsl/opt_vectorize.cpp @@ -82,6 +82,11 @@ public: virtual ir_visitor_status visit_enter(ir_assignment *); virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_loop *); + virtual ir_visitor_status visit_enter(ir_texture *); virtual ir_visitor_status visit_leave(ir_assignment *); @@ -104,9 +109,10 @@ public: * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). * - * The function modifies only ir_expressions and ir_swizzles. For expressions - * it sets a new type and swizzles any scalar dereferences into appropriately - * sized vector arguments. For example, if combining + * The function operates on ir_expressions (and its operands) and ir_swizzles. + * For expressions it sets a new type and swizzles any non-expression and non- + * swizzle scalar operands into appropriately sized vector arguments. For + * example, if combining * * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) @@ -144,9 +150,10 @@ rewrite_swizzle(ir_instruction *ir, void *data) mask->num_components, 1); for (unsigned i = 0; i < 4; i++) { if (expr->operands[i]) { - ir_dereference *deref = expr->operands[i]->as_dereference(); - if (deref && deref->type->is_scalar()) { - expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0, + ir_rvalue *rval = expr->operands[i]->as_rvalue(); + if (rval && rval->type->is_scalar() && + !rval->as_expression() && !rval->as_swizzle()) { + expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, mask->num_components); } } @@ -170,22 +177,31 @@ void ir_vectorize_visitor::try_vectorize() { if (this->last_assignment && this->channels > 1) { - ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0}; - - visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; this->last_assignment->write_mask = 0; - for (unsigned i = 0; i < 4; i++) { + for (unsigned i = 0, j = 0; i < 4; i++) { if (this->assignment[i]) { this->last_assignment->write_mask |= 1 << i; if (this->assignment[i] != this->last_assignment) { this->assignment[i]->remove(); } + + switch (j) { + case 0: mask.x = i; break; + case 1: mask.y = i; break; + case 2: mask.z = i; break; + case 3: mask.w = i; break; + } + + j++; } } + visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + this->progress = true; } clear(); @@ -212,8 +228,7 @@ write_mask_to_swizzle(unsigned write_mask) case WRITEMASK_Z: return SWIZZLE_Z; case WRITEMASK_W: return SWIZZLE_W; } - assert(!"not reached"); - unreachable(); + unreachable("not reached"); } /** @@ -245,6 +260,7 @@ ir_vectorize_visitor::visit_enter(ir_assignment *ir) if (ir->condition || this->channels >= 4 || !single_channel_write_mask(ir->write_mask) || + this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || (lhs && !ir->lhs->equals(lhs)) || (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { try_vectorize(); @@ -276,6 +292,78 @@ ir_vectorize_visitor::visit_enter(ir_swizzle *ir) return visit_continue; } +/* Upon entering an ir_array_dereference, remove the current assignment from + * further consideration. Since the index of an array dereference must scalar, + * we are not able to vectorize it. + * + * FINISHME: If all of scalar indices are identical we could vectorize. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_dereference_array *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_expression, remove the current assignment from further + * consideration if the expression operates horizontally on vectors. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_expression *ir) +{ + if (ir->is_horizontal()) { + this->current_assignment = NULL; + return visit_continue_with_parent; + } + return visit_continue; +} + +/* Since there is no statement to visit between the "then" and "else" + * instructions try to vectorize before, in between, and after them to avoid + * combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_if *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->then_instructions); + try_vectorize(); + + visit_list_elements(this, &ir->else_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/* Since there is no statement to visit between the instructions in the body of + * the loop and the instructions after it try to vectorize before and after the + * body to avoid combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_loop *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->body_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_texture, remove the current assignment from + * further consideration. Vectorizing multiple texture lookups into one + * is wrong. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_texture *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + /** * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if * the swizzle mask(s) found were appropriate. Also save a pointer in