From: Jason Ekstrand Date: Fri, 2 Dec 2016 19:36:42 +0000 (-0800) Subject: nir: Add a pass for selectively lowering variables to scratch space X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=18ed82b084c79bf63666f2da22e5d675fb01aa26;p=mesa.git nir: Add a pass for selectively lowering variables to scratch space This commit adds new nir_load/store_scratch opcodes which read and write a virtual scratch space. It's up to the back-end to figure out what to do with it and where to put the actual scratch data. v2: Drop const_index comments (by anholt) Reviewed-by: Eric Anholt --- diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index d201ea5855c..5737a827daa 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -263,6 +263,7 @@ NIR_FILES = \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_regs_to_ssa.c \ nir/nir_lower_returns.c \ + nir/nir_lower_scratch.c \ nir/nir_lower_subgroups.c \ nir/nir_lower_system_values.c \ nir/nir_lower_tex.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 54655f7cd7c..4e5039e28e0 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -143,6 +143,7 @@ files_libnir = files( 'nir_lower_phis_to_scalar.c', 'nir_lower_regs_to_ssa.c', 'nir_lower_returns.c', + 'nir_lower_scratch.c', 'nir_lower_subgroups.c', 'nir_lower_system_values.c', 'nir_lower_tex.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0f110dd959f..91cad825653 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2359,6 +2359,9 @@ typedef struct nir_shader { */ unsigned num_inputs, num_uniforms, num_outputs, num_shared; + /** Size in bytes of required scratch space */ + unsigned scratch_size; + /** Constant data associated with this shader. * * Constant data is loaded through load_constant intrinsics. See also @@ -3012,6 +3015,11 @@ void nir_lower_io_to_temporaries(nir_shader *shader, nir_function_impl *entrypoint, bool outputs, bool inputs); +bool nir_lower_vars_to_scratch(nir_shader *shader, + nir_variable_mode modes, + int size_threshold, + glsl_type_size_align_func size_align); + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index a45a581bd05..1baa60b2fe5 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -735,6 +735,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; ns->num_shared = s->num_shared; + ns->scratch_size = s->scratch_size; ns->constant_data_size = s->constant_data_size; if (s->constant_data_size > 0) { diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 9b3f480f7af..bf06f8385a5 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -654,6 +654,8 @@ load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { address }. load("kernel_input", 1, [BASE, RANGE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +load("scratch", 1, [ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # Stores work the same way as loads, except now the first source is the value # to store and the second (and possibly third) source specify where to store @@ -673,7 +675,8 @@ store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) # src[] = { value, address }. store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) - +# src[] = { value, offset }. +store("scratch", 2, [ALIGN_MUL, ALIGN_OFFSET, WRMASK]) # IR3-specific version of most SSBO intrinsics. The only different # compare to the originals is that they add an extra source to hold diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 5f18f1df445..331ecc08324 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1178,6 +1178,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_load_shared: case nir_intrinsic_load_uniform: case nir_intrinsic_load_global: + case nir_intrinsic_load_scratch: return &instr->src[0]; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: @@ -1187,6 +1188,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_store_output: case nir_intrinsic_store_shared: case nir_intrinsic_store_global: + case nir_intrinsic_store_scratch: return &instr->src[1]; case nir_intrinsic_store_ssbo: case nir_intrinsic_store_per_vertex_output: diff --git a/src/compiler/nir/nir_lower_scratch.c b/src/compiler/nir/nir_lower_scratch.c new file mode 100644 index 00000000000..df0d3f43124 --- /dev/null +++ b/src/compiler/nir/nir_lower_scratch.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * This lowering pass converts references to variables with loads/stores to + * scratch space based on a few configurable parameters. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" + +static bool +deref_has_indirect(nir_deref_instr *deref) +{ + while (deref->deref_type != nir_deref_type_var) { + if (deref->deref_type == nir_deref_type_array && + nir_src_as_const_value(deref->arr.index) == NULL) + return true; + + deref = nir_deref_instr_parent(deref); + } + + return false; +} + +static void +lower_load_store(nir_builder *b, + nir_intrinsic_instr *intrin, + glsl_type_size_align_func size_align) +{ + b->cursor = nir_before_instr(&intrin->instr); + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + nir_ssa_def *offset = + nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align), + var->data.location); + + unsigned align, UNUSED size; + size_align(deref->type, &size, &align); + + if (intrin->intrinsic == nir_intrinsic_load_deref) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch); + load->num_components = intrin->num_components; + load->src[0] = nir_src_for_ssa(offset); + nir_intrinsic_set_align(load, align, 0); + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def *value = &load->dest.ssa; + if (glsl_type_is_boolean(deref->type)) + value = nir_b2i32(b, value); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + assert(intrin->intrinsic == nir_intrinsic_store_deref); + + assert(intrin->src[1].is_ssa); + nir_ssa_def *value = intrin->src[1].ssa; + if (glsl_type_is_boolean(deref->type)) + value = nir_i2b(b, value); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch); + store->num_components = intrin->num_components; + store->src[0] = nir_src_for_ssa(value); + store->src[1] = nir_src_for_ssa(offset); + nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); + nir_intrinsic_set_align(store, align, 0); + nir_builder_instr_insert(b, &store->instr); + } + + nir_instr_remove(&intrin->instr); + nir_deref_instr_remove_if_unused(deref); +} + +bool +nir_lower_vars_to_scratch(nir_shader *shader, + nir_variable_mode modes, + int size_threshold, + glsl_type_size_align_func size_align) +{ + /* First, we walk the instructions and flag any variables we want to lower + * by removing them from their respective list and setting the mode to 0. + */ + nir_foreach_function(function, shader) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & modes)) + continue; + + if (!deref_has_indirect(nir_src_as_deref(intrin->src[0]))) + continue; + + nir_variable *var = nir_deref_instr_get_variable(deref); + + /* We set var->mode to 0 to indicate that a variable will be moved + * to scratch. Don't assign a scratch location twice. + */ + if (var->data.mode == 0) + continue; + + unsigned var_size, var_align; + size_align(var->type, &var_size, &var_align); + if (var_size <= size_threshold) + continue; + + /* Remove it from its list */ + exec_node_remove(&var->node); + /* Invalid mode used to flag "moving to scratch" */ + var->data.mode = 0; + + var->data.location = ALIGN_POT(shader->scratch_size, var_align); + shader->scratch_size = var->data.location + var_size; + } + } + } + + bool progress = false; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder build; + nir_builder_init(&build, function->impl); + + bool impl_progress = false; + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_variable *var = nir_intrinsic_get_var(intrin, 0); + /* Variables flagged for lowering above have mode == 0 */ + if (!var || var->data.mode) + continue; + + lower_load_store(&build, intrin, size_align); + impl_progress = true; + } + } + + if (impl_progress) { + progress = true; + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + return progress; +} diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index bab42232992..42053dc2d8c 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1376,6 +1376,8 @@ nir_print_shader_annotated(nir_shader *shader, FILE *fp, fprintf(fp, "outputs: %u\n", shader->num_outputs); fprintf(fp, "uniforms: %u\n", shader->num_uniforms); fprintf(fp, "shared: %u\n", shader->num_shared); + if (shader->scratch_size) + fprintf(fp, "scratch: %u\n", shader->scratch_size); nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 324c0a154b3..fe74603115a 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1122,6 +1122,7 @@ nir_serialize(struct blob *blob, const nir_shader *nir) blob_write_uint32(blob, nir->num_uniforms); blob_write_uint32(blob, nir->num_outputs); blob_write_uint32(blob, nir->num_shared); + blob_write_uint32(blob, nir->scratch_size); blob_write_uint32(blob, exec_list_length(&nir->functions)); nir_foreach_function(fxn, nir) { @@ -1179,6 +1180,7 @@ nir_deserialize(void *mem_ctx, ctx.nir->num_uniforms = blob_read_uint32(blob); ctx.nir->num_outputs = blob_read_uint32(blob); ctx.nir->num_shared = blob_read_uint32(blob); + ctx.nir->scratch_size = blob_read_uint32(blob); unsigned num_functions = blob_read_uint32(blob); for (unsigned i = 0; i < num_functions; i++)