From 2953c3d76178d7589947e6ea1dbd902b7b02b3d4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 14 Aug 2015 15:15:11 -0700 Subject: [PATCH] i965/vs: Map scalar VS input locations properly; avoid tons of MOVs. Previously, we used nir_lower_io with the scalar type_size function, which mapped VERT_ATTRIB_* locations to...some numbers. Then, in fs_visitor::nir_setup_inputs(), we created temporaries indexed by those numbers, and emitted MOVs from the actual ATTR registers to those temporaries. Virtually all of these were copy propagated away, but it's still ugly. This patch reworks our input lowering to produce NIR lower_input intrinsics that properly index into the ATTR file, so we can access it directly. No changes in shader-db. v2: Fix unreachable() message (Ken), update commit message (Matt). Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 71 +++++++----------------- src/mesa/drivers/dri/i965/brw_nir.c | 23 +++++++- 2 files changed, 42 insertions(+), 52 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index bc0df6850c4..51189a2d263 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -56,61 +56,25 @@ fs_visitor::emit_nir_code() void fs_visitor::nir_setup_inputs() { + if (stage != MESA_SHADER_FRAGMENT) + return; + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); nir_foreach_variable(var, &nir->inputs) { - enum brw_reg_type type = brw_type_for_base_type(var->type); fs_reg input = offset(nir_inputs, bld, var->data.driver_location); fs_reg reg; - switch (stage) { - case MESA_SHADER_VERTEX: { - /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value - * stored in nir_variable::location. - * - * However, NIR's load_input intrinsics use a different index - an - * offset into a single contiguous array containing all inputs. - * This index corresponds to the nir_variable::driver_location field. - * - * So, we need to copy from fs_reg(ATTR, var->location) to - * offset(nir_inputs, var->data.driver_location). - */ - const glsl_type *const t = var->type->without_array(); - const unsigned components = t->components(); - const unsigned cols = t->matrix_columns; - const unsigned elts = t->vector_elements; - unsigned array_length = var->type->is_array() ? var->type->length : 1; - for (unsigned i = 0; i < array_length; i++) { - for (unsigned j = 0; j < cols; j++) { - for (unsigned k = 0; k < elts; k++) { - bld.MOV(offset(retype(input, type), bld, - components * i + elts * j + k), - offset(fs_reg(ATTR, var->data.location + i, type), - bld, 4 * j + k)); - } - } - } - break; - } - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_COMPUTE: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - unreachable("fs_visitor not used for these stages yet."); - break; - case MESA_SHADER_FRAGMENT: - if (var->data.location == VARYING_SLOT_POS) { - reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, - var->data.origin_upper_left); - emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), - input, reg), 0xF); - } else { - emit_general_interpolation(input, var->name, var->type, - (glsl_interp_qualifier) var->data.interpolation, - var->data.location, var->data.centroid, - var->data.sample); - } - break; + if (var->data.location == VARYING_SLOT_POS) { + reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, + var->data.origin_upper_left); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), + input, reg), 0xF); + } else { + emit_general_interpolation(input, var->name, var->type, + (glsl_interp_qualifier) var->data.interpolation, + var->data.location, var->data.centroid, + var->data.sample); } } } @@ -1575,8 +1539,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_load_input: { unsigned index = 0; for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), bld, - instr->const_index[0] + index); + fs_reg src; + if (stage == MESA_SHADER_VERTEX) { + src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index); + } else { + src = offset(retype(nir_inputs, dest.type), bld, + instr->const_index[0] + index); + } if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 80f36dc2399..15c1b1984a1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -31,15 +31,36 @@ static void brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) { switch (nir->stage) { + case MESA_SHADER_VERTEX: + /* For now, leave the vec4 backend doing the old method. */ + if (!is_scalar) { + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, + type_size_vec4); + break; + } + + /* Start with the location of the variable's base. */ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } + + /* Now use nir_lower_io to walk dereference chains. Attribute arrays + * are loaded as one vec4 per element (or matrix column), so we use + * type_size_vec4 here. + */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + break; case MESA_SHADER_GEOMETRY: foreach_list_typed(nir_variable, var, node, &nir->inputs) { var->data.driver_location = var->data.location; } break; - default: + case MESA_SHADER_FRAGMENT: nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar ? type_size_scalar : type_size_vec4); break; + default: + unreachable("unsupported shader stage"); } } -- 2.30.2