From: Kenneth Graunke Date: Thu, 25 Feb 2016 07:43:17 +0000 (-0800) Subject: i965: Avoid recalculating the normal VUE map for IO lowering. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8151003ade952c3e9d8284fada9237e1311cf173;p=mesa.git i965: Avoid recalculating the normal VUE map for IO lowering. The caller already computes it. Now that we have stage specific functions, it's really easy to pass this in. Signed-off-by: Kenneth Graunke Reviewed-by: Iago Toral Quiroga --- diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 90c4f668767..883603ed98f 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -246,9 +246,8 @@ brw_nir_lower_vs_inputs(nir_shader *nir, } void -brw_nir_lower_vue_inputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar) +brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map) { if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) { foreach_list_typed(nir_variable, var, node, &nir->inputs) { @@ -256,26 +255,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, } nir_lower_io(nir, nir_var_shader_in, type_size_vec4); } else { - /* The GLSL linker will have already matched up GS inputs and - * the outputs of prior stages. The driver does extend VS outputs - * in some cases, but only for legacy OpenGL or Gen4-5 hardware, - * neither of which offer geometry shader support. So we can - * safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location to make this - * work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - struct brw_vue_map input_vue_map; - GLbitfield64 inputs_read = - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(devinfo, &input_vue_map, inputs_read, - nir->info.separate_shader || - nir->stage == MESA_SHADER_TESS_CTRL); - foreach_list_typed(nir_variable, var, node, &nir->inputs) { var->data.driver_location = var->data.location; } @@ -291,7 +270,7 @@ brw_nir_lower_vue_inputs(nir_shader *nir, nir_foreach_function(nir, function) { if (function->impl) { nir_foreach_block(function->impl, remap_inputs_with_vue_map, - &input_vue_map); + (void *) vue_map); } } } diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 0fbdc5fa625..2d8341fd40e 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -88,9 +88,8 @@ void brw_nir_lower_vs_inputs(nir_shader *nir, bool is_scalar, bool use_legacy_snorm_formula, const uint8_t *vs_attrib_wa_flags); -void brw_nir_lower_vue_inputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar); +void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map); void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue); void brw_nir_lower_fs_inputs(nir_shader *nir); void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 7f59db4485d..7df6c721430 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -596,9 +596,27 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + + /* The GLSL linker will have already matched up GS inputs and the outputs + * of prior stages. The driver does extend VS outputs in some cases, but + * only for legacy OpenGL or Gen4-5 hardware, neither of which offer + * geometry shader support. So we can safely ignore that. + * + * For SSO pipelines, we use a fixed VUE map layout based on variable + * locations, so we can rely on rendezvous-by-location making this work. + * + * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not + * written by previous stages and shows up via payload magic. + */ + GLbitfield64 inputs_read = + shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; + brw_compute_vue_map(compiler->devinfo, + &c.input_vue_map, inputs_read, + shader->info.separate_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(shader, compiler->devinfo, is_scalar); + brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); @@ -777,23 +795,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, prog_data->vertices_in = shader->info.gs.vertices_in; - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(compiler->devinfo, - &c.input_vue_map, inputs_read, - shader->info.separate_shader); - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 53e7aef37f2..8f77b59ea03 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -516,12 +516,17 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir->info.outputs_written = key->outputs_written; nir->info.patch_outputs_written = key->patch_outputs_written; + struct brw_vue_map input_vue_map; + brw_compute_vue_map(devinfo, &input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + true); + brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(nir, compiler->devinfo, is_scalar); + brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); @@ -553,11 +558,6 @@ brw_compile_tcs(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_vue_map(devinfo, &input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - true); - /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway.