From 663b2e9a92f152c314f611526b8a16ff4c06249f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 3 Oct 2016 20:32:22 -0700 Subject: [PATCH] nir: Add a "compact array" flag and IO lowering code. Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[], gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar arrays. Normal scalar arrays are sparse - each array element usually occupies a whole vec4 slot. However, most hardware assumes these built-in arrays are tightly packed. The new var->data.compact flag indicates that a scalar array should be tightly packed, so a float[4] array would take up a single vec4 slot, and a float[8] array would take up two slots. They are still arrays, not vec4s, however. nir_lower_io will generate intrinsics using ARB_enhanced_layouts style component qualifiers. v2: Add nir_validate code to enforce type restrictions. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/compiler/glsl/glsl_to_nir.cpp | 1 + src/compiler/nir/nir.h | 7 ++++ src/compiler/nir/nir_gather_info.c | 9 ++-- src/compiler/nir/nir_lower_indirect_derefs.c | 8 +++- src/compiler/nir/nir_lower_io.c | 44 ++++++++++++++------ src/compiler/nir/nir_print.c | 3 +- src/compiler/nir/nir_validate.c | 13 ++++++ 7 files changed, 67 insertions(+), 18 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 90fdd694007..628f8de14b3 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -329,6 +329,7 @@ nir_visitor::visit(ir_variable *ir) var->data.explicit_index = ir->data.explicit_index; var->data.explicit_binding = ir->data.explicit_binding; var->data.has_initializer = ir->data.has_initializer; + var->data.compact = false; var->data.location_frac = ir->data.location_frac; switch (ir->data.depth_layout) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 3d463840793..68d62d98d86 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -229,6 +229,13 @@ typedef struct nir_variable { */ unsigned location_frac:2; + /** + * If true, this variable represents an array of scalars that should + * be tightly packed. In other words, consecutive array elements + * should be stored one component apart, rather than one slot apart. + */ + bool compact:1; + /** * Whether this is a fragment shader output implicitly initialized with * the previous contents of the specified render target at the diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 82452b439a6..07c99497146 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable *var) var->data.mode == nir_var_shader_in) is_vertex_input = true; - set_io_mask(shader, var, 0, - glsl_count_attribute_slots(type, is_vertex_input)); + const unsigned slots = + var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) + : glsl_count_attribute_slots(type, is_vertex_input); + + set_io_mask(shader, var, 0, slots); } static unsigned @@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref) * here marking the entire variable as used. */ if (!(glsl_type_is_matrix(type) || - (glsl_type_is_array(type) && + (glsl_type_is_array(type) && !var->data.compact && (glsl_type_is_numeric(glsl_without_array(type)) || glsl_type_is_boolean(glsl_without_array(type)))))) { diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index 356373e2788..5c97dc8e5fe 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b, if (!deref_has_indirect(intrin->variables[0])) continue; - /* Only lower variables whose mode is in the mask */ - if (!(modes & intrin->variables[0]->var->data.mode)) + /* Only lower variables whose mode is in the mask, or compact + * array variables. (We can't handle indirects on tightly packed + * scalar arrays, so we need to lower them regardless.) + */ + if (!(modes & intrin->variables[0]->var->data.mode) && + !intrin->variables[0]->var->data.compact) continue; b->cursor = nir_before_instr(&intrin->instr); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index a7e7f148f13..66289477adb 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage) static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *), + unsigned *component) { nir_deref *tail = &deref->deref; @@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, *vertex_index = vtx; } + if (deref->var->data.compact) { + assert(tail->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + /* We always lower indirect dereferences for "compact" array vars. */ + assert(deref_array->deref_array_type == nir_deref_array_type_direct); + + const unsigned total_offset = *component + deref_array->base_offset; + const unsigned slot_offset = total_offset / 4; + *component = total_offset % 4; + return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); + } + /* Just emit code and let constant-folding go to town */ nir_ssa_def *offset = nir_imm_int(b, 0); @@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, static nir_intrinsic_instr * lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *vertex_index, nir_ssa_def *offset) + nir_ssa_def *vertex_index, nir_ssa_def *offset, + unsigned component) { const nir_shader *nir = state->builder.shader; nir_variable *var = intrin->variables[0]->var; @@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(load, var->data.driver_location); if (mode == nir_var_shader_in || mode == nir_var_shader_out) - nir_intrinsic_set_component(load, var->data.location_frac); + nir_intrinsic_set_component(load, component); if (load->intrinsic == nir_intrinsic_load_uniform) nir_intrinsic_set_range(load, state->type_size(var->type)); @@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *vertex_index, nir_ssa_def *offset) + nir_ssa_def *vertex_index, nir_ssa_def *offset, + unsigned component) { nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; @@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(store, var->data.driver_location); if (mode == nir_var_shader_out) - nir_intrinsic_set_component(store, var->data.location_frac); + nir_intrinsic_set_component(store, component); nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); @@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *offset) + nir_ssa_def *offset, unsigned component) { nir_variable *var = intrin->variables[0]->var; @@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, /* Ignore interpolateAt() for flat variables - flat is flat. */ if (var->data.interpolation == INTERP_MODE_FLAT) - return lower_load(intrin, state, NULL, offset); + return lower_load(intrin, state, NULL, offset, component); nir_intrinsic_op bary_op; switch (intrin->intrinsic) { @@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, load->num_components = intrin->num_components; nir_intrinsic_set_base(load, var->data.driver_location); - nir_intrinsic_set_component(load, var->data.location_frac); + nir_intrinsic_set_component(load, component); load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); load->src[1] = nir_src_for_ssa(offset); @@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block, nir_ssa_def *offset; nir_ssa_def *vertex_index = NULL; + unsigned component_offset = var->data.location_frac; offset = get_io_offset(b, intrin->variables[0], per_vertex ? &vertex_index : NULL, - state->type_size); + state->type_size, &component_offset); nir_intrinsic_instr *replacement; switch (intrin->intrinsic) { case nir_intrinsic_load_var: - replacement = lower_load(intrin, state, vertex_index, offset); + replacement = lower_load(intrin, state, vertex_index, offset, + component_offset); break; case nir_intrinsic_store_var: - replacement = lower_store(intrin, state, vertex_index, offset); + replacement = lower_store(intrin, state, vertex_index, offset, + component_offset); break; case nir_intrinsic_var_atomic_add: @@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: assert(vertex_index == NULL); - replacement = lower_interpolate_at(intrin, state, offset); + replacement = lower_interpolate_at(intrin, state, offset, + component_offset); break; default: diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 242bffba472..ed0243506d5 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -432,7 +432,8 @@ print_var_decl(nir_variable *var, print_state *state) loc = buf; } - fprintf(fp, " (%s, %u)", loc, var->data.driver_location); + fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location, + var->data.compact ? " compact" : ""); } if (var->constant_initializer) { diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index bd73f0437f2..15ba65f2b12 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -942,6 +942,19 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state) /* Must have exactly one mode set */ validate_assert(state, util_bitcount(var->data.mode) == 1); + if (var->data.compact) { + /* The "compact" flag is only valid on arrays of scalars. */ + assert(glsl_type_is_array(var->type)); + + const struct glsl_type *type = glsl_get_array_element(var->type); + if (nir_is_per_vertex_io(var, state->shader->stage)) { + assert(glsl_type_is_array(type)); + assert(glsl_type_is_scalar(glsl_get_array_element(type))); + } else { + assert(glsl_type_is_scalar(type)); + } + } + /* * TODO validate some things ir_validate.cpp does (requires more GLSL type * support) -- 2.30.2