From 96b22fb080894ba1840af2372f28a46cc0f40c76 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Wed, 4 Nov 2015 14:58:54 -0800 Subject: [PATCH] glsl: Use array deref for access to vector components MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We've assumed that we could lower per-component vector access from vec[i] = scalar to vec = ir_triop_vector_insert(vec, scalar, i) but with SSBOs (and compute shader SLM and tesselation outputs) this is no longer valid. If a vector is "externally visible", multiple threads can write independent components simultaneously. With lowering to ir_triop_vector_insert, each thread read the entire vector, changes one component, then writes out the entire vector. This is racy. Instead of generating a ir_binop_vector_extract when we see v[i], we generate ir_dereference_array. We then add a lowering pass to lower the ir_dereference_array to ir_binop_vector_extract for rvalues and for to vector_insert for lvalues in a separate lowering pass. The resulting IR is the same as before, but we now have a window between ast->ir conversion and the lowering pass where v[i] appears in the IR as an array deref. This lets us run lowering passes that lower the vector access to I/O (eg for SSBO load/store) before we lower the per-component access to full vector writes. Reviewed-by: Jordan Justen Signed-off-by: Kristian Høgsberg Kristensen --- src/glsl/Makefile.sources | 1 + src/glsl/ast_array_index.cpp | 5 +- src/glsl/ast_function.cpp | 24 ++----- src/glsl/ast_to_hir.cpp | 43 ------------- src/glsl/ir_optimization.h | 1 + src/glsl/ir_validate.cpp | 7 ++- src/glsl/linker.cpp | 2 + src/glsl/lower_ubo_reference.cpp | 14 ++++- src/glsl/lower_vector_derefs.cpp | 104 +++++++++++++++++++++++++++++++ src/glsl/opt_dead_code_local.cpp | 5 ++ 10 files changed, 138 insertions(+), 68 deletions(-) create mode 100644 src/glsl/lower_vector_derefs.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 0266f290ccb..78d295b8e91 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -176,6 +176,7 @@ LIBGLSL_FILES = \ lower_vec_index_to_cond_assign.cpp \ lower_vec_index_to_swizzle.cpp \ lower_vector.cpp \ + lower_vector_derefs.cpp \ lower_vector_insert.cpp \ lower_vertex_id.cpp \ lower_output_reads.cpp \ diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index 74d403fdb65..ca7a9a10c36 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, * expression. */ if (array->type->is_array() - || array->type->is_matrix()) { + || array->type->is_matrix() + || array->type->is_vector()) { return new(mem_ctx) ir_dereference_array(array, idx); - } else if (array->type->is_vector()) { - return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx); } else if (array->type->is_error()) { return array; } else { diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index e4e4a3fe148..55844706d35 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state, actual->variable_referenced()->name); return false; } else if (!actual->is_lvalue()) { - /* Even though ir_binop_vector_extract is not an l-value, let it - * slop through. generate_call will handle it correctly. - */ - ir_expression *const expr = ((ir_rvalue *) actual)->as_expression(); - if (expr == NULL - || expr->operation != ir_binop_vector_extract - || !expr->operands[0]->is_lvalue()) { - _mesa_glsl_error(&loc, state, - "function parameter '%s %s' is not an lvalue", - mode, formal->name); - return false; - } + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' is not an lvalue", + mode, formal->name); + return false; } } @@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type, ir_rvalue *lhs = actual; if (expr != NULL && expr->operation == ir_binop_vector_extract) { - rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, - expr->operands[0]->type, - expr->operands[0]->clone(mem_ctx, NULL), - rhs, - expr->operands[1]->clone(mem_ctx, NULL)); - lhs = expr->operands[0]->clone(mem_ctx, NULL); + lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL), + expr->operands[1]->clone(mem_ctx, NULL)); } ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs); diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 6f5f3c1b245..9d341e8cf92 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, { void *ctx = state; bool error_emitted = (lhs->type->is_error() || rhs->type->is_error()); - ir_rvalue *extract_channel = NULL; - - /* If the assignment LHS comes back as an ir_binop_vector_extract - * expression, move it to the RHS as an ir_triop_vector_insert. - */ - if (lhs->ir_type == ir_type_expression) { - ir_expression *const lhs_expr = lhs->as_expression(); - - if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) { - ir_rvalue *new_rhs = - validate_assignment(state, lhs_loc, lhs, - rhs, is_initializer); - - if (new_rhs == NULL) { - return lhs; - } else { - /* This converts: - * - LHS: (expression float vector_extract ) - * - RHS: - * into: - * - LHS: - * - RHS: (expression vec2 vector_insert ) - * - * The LHS type is now a vector instead of a scalar. Since GLSL - * allows assignments to be used as rvalues, we need to re-extract - * the channel from assignment_temp when returning the rvalue. - */ - extract_channel = lhs_expr->operands[1]; - rhs = new(ctx) ir_expression(ir_triop_vector_insert, - lhs_expr->operands[0]->type, - lhs_expr->operands[0], - new_rhs, - extract_channel); - lhs = lhs_expr->operands[0]->clone(ctx, NULL); - } - } - } ir_variable *lhs_var = lhs->variable_referenced(); if (lhs_var) @@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, } ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var); - if (extract_channel) { - rvalue = new(ctx) ir_expression(ir_binop_vector_extract, - rvalue, - extract_channel->clone(ctx, NULL)); - } - *out_rvalue = rvalue; } else { if (!error_emitted) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 6d19a6ca476..2fee81c09c2 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx, unsigned locations_used, ir_variable_mode mode, unsigned gs_input_vertices, gl_shader *shader); bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index); +bool lower_vector_derefs(gl_shader *shader); void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); bool optimize_redundant_jumps(exec_list *instructions); bool optimize_split_arrays(exec_list *instructions, bool linked); diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 935571ae1d6..e63b5c318e3 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir) ir_visitor_status ir_validate::visit_enter(class ir_dereference_array *ir) { - if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) { - printf("ir_dereference_array @ %p does not specify an array or a " - "matrix\n", + if (!ir->array->type->is_array() && !ir->array->type->is_matrix() && + !ir->array->type->is_vector()) { + printf("ir_dereference_array @ %p does not specify an array, a vector " + "or a matrix\n", (void *) ir); ir->print(); printf("\n"); diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index a8baee07f10..db00f8febc6 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -4451,6 +4451,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) lower_ubo_reference(prog->_LinkedShaders[i]); + + lower_vector_derefs(prog->_LinkedShaders[i]); } done: diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 24806ac6ce9..b74aa3d0630 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, case ir_type_dereference_array: { ir_dereference_array *deref_array = (ir_dereference_array *) deref; unsigned array_stride; - if (deref_array->array->type->is_matrix() && *row_major) { + if (deref_array->array->type->is_vector()) { + /* We get this when storing or loading a component out of a vector + * with a non-constant index. This happens for v[i] = f where v is + * a vector (or m[i][j] = f where m is a matrix). If we don't + * lower that here, it gets turned into v = vector_insert(v, i, + * f), which loads the entire vector, modifies one component and + * then write the entire thing back. That breaks if another + * thread or SIMD channel is modifying the same vector. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + } else if (deref_array->array->type->is_matrix() && *row_major) { /* When loading a vector out of a row major matrix, the * step between the columns (vectors) is the size of a * float, while the step between the rows (elements of a diff --git a/src/glsl/lower_vector_derefs.cpp b/src/glsl/lower_vector_derefs.cpp new file mode 100644 index 00000000000..4a5d6f0da4c --- /dev/null +++ b/src/glsl/lower_vector_derefs.cpp @@ -0,0 +1,104 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class vector_deref_visitor : public ir_rvalue_enter_visitor { +public: + vector_deref_visitor() + : progress(false) + { + } + + virtual ~vector_deref_visitor() + { + } + + virtual void handle_rvalue(ir_rvalue **rv); + virtual ir_visitor_status visit_enter(ir_assignment *ir); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +vector_deref_visitor::visit_enter(ir_assignment *ir) +{ + if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs; + if (!deref->array->type->is_vector()) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference *const new_lhs = (ir_dereference *) deref->array; + ir->set_lhs(new_lhs); + + ir_constant *old_index_constant = deref->array_index->constant_expression_value(); + void *mem_ctx = ralloc_parent(ir); + if (!old_index_constant) { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + new_lhs->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + deref->array_index); + ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; + } else { + ir->write_mask = 1 << old_index_constant->get_int_component(0); + } + + return ir_rvalue_enter_visitor::visit_enter(ir); +} + +void +vector_deref_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array) + return; + + ir_dereference_array *const deref = (ir_dereference_array *) *rv; + if (!deref->array->type->is_vector()) + return; + + void *mem_ctx = ralloc_parent(deref); + *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract, + deref->array, + deref->array_index); +} + +bool +lower_vector_derefs(gl_shader *shader) +{ + vector_deref_visitor v; + + visit_list_elements(&v, shader->ir); + + return v.progress; +} diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp index 4770fcff2ea..ee9f22c0373 100644 --- a/src/glsl/opt_dead_code_local.cpp +++ b/src/glsl/opt_dead_code_local.cpp @@ -197,6 +197,11 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments) if (entry->lhs != var) continue; + /* Skip if the assignment we're trying to eliminate isn't a plain + * variable deref. */ + if (entry->ir->lhs->ir_type != ir_type_dereference_variable) + continue; + int remove = entry->unused & ir->write_mask; if (debug) { printf("%s 0x%01x - 0x%01x = 0x%01x\n", -- 2.30.2