X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_link.cpp;h=9ddf023018370aeed130062a60f8233f04a8b56e;hb=379b24a40d3d34ffdaaeb1b328f50e28ecb01468;hp=e124f92cba46298cf7c506671a7cc75057a885c5;hpb=424ac809bfac60ed8482abfb7ef8c8aa88877b55;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index e124f92cba4..9ddf0230183 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -22,12 +22,13 @@ */ #include "brw_context.h" -#include "brw_nir.h" +#include "compiler/brw_nir.h" #include "brw_program.h" #include "compiler/glsl/ir.h" #include "compiler/glsl/ir_optimization.h" #include "compiler/glsl/program.h" #include "program/program.h" +#include "main/mtypes.h" #include "main/shaderapi.h" #include "main/shaderobj.h" #include "main/uniforms.h" @@ -73,10 +74,12 @@ static void brw_lower_packing_builtins(struct brw_context *brw, exec_list *ir) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; + /* Gens < 7 don't have instructions to convert to or from half-precision, * and Gens < 6 don't expose that functionality. */ - if (brw->gen != 6) + if (devinfo->gen != 6) return; lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16); @@ -87,10 +90,8 @@ process_glsl_ir(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_linked_shader *shader) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; - const struct brw_compiler *compiler = brw->screen->compiler; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; /* Temporary memory context for any new IR. */ void *mem_ctx = ralloc_context(NULL); @@ -110,7 +111,7 @@ process_glsl_ir(struct brw_context *brw, EXP_TO_EXP2 | LOG_TO_LOG2 | DFREXP_DLDEXP_TO_ARITH); - if (brw->gen < 7) { + if (devinfo->gen < 7) { instructions_to_lower |= BIT_COUNT_TO_MATH | EXTRACT_TO_SHIFTS | INSERT_TO_SHIFTS | @@ -122,7 +123,7 @@ process_glsl_ir(struct brw_context *brw, /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. */ - if (brw->gen < 6) + if (devinfo->gen < 6) lower_if_to_cond_assign(shader->Stage, shader->ir, 16); do_lower_texture_projection(shader->ir); @@ -132,21 +133,6 @@ process_glsl_ir(struct brw_context *brw, lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); - bool progress; - do { - progress = false; - - if (compiler->scalar_stage[shader->Stage]) { - if (shader->Stage == MESA_SHADER_VERTEX || - shader->Stage == MESA_SHADER_FRAGMENT) - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); - } - - progress = do_common_optimization(shader->ir, true, true, - options, ctx->Const.NativeIntegers) || progress; - } while (progress); - validate_ir_tree(shader->ir); /* Now that we've finished altering the linked IR, reparent any live IR back @@ -194,6 +180,42 @@ unify_interfaces(struct shader_info **infos) } } +static void +update_xfb_info(struct gl_transform_feedback_info *xfb_info, + struct shader_info *info) +{ + if (!xfb_info) + return; + + for (unsigned i = 0; i < xfb_info->NumOutputs; i++) { + struct gl_transform_feedback_output *output = &xfb_info->Outputs[i]; + + /* The VUE header contains three scalar fields packed together: + * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w + * - gl_Layer is stored in VARYING_SLOT_PSIZ.y + * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z + */ + switch (output->OutputRegister) { + case VARYING_SLOT_LAYER: + assert(output->NumComponents == 1); + output->OutputRegister = VARYING_SLOT_PSIZ; + output->ComponentOffset = 1; + break; + case VARYING_SLOT_VIEWPORT: + assert(output->NumComponents == 1); + output->OutputRegister = VARYING_SLOT_PSIZ; + output->ComponentOffset = 2; + break; + case VARYING_SLOT_PSIZ: + assert(output->NumComponents == 1); + output->ComponentOffset = 3; + break; + } + + info->outputs_written |= 1ull << output->OutputRegister; + } +} + extern "C" GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { @@ -229,7 +251,79 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, compiler->scalar_stage[stage]); - infos[stage] = prog->nir->info; + } + + /* Determine first and last stage. */ + unsigned first = MESA_SHADER_STAGES; + unsigned last = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + if (first != last) { + int next = last; + for (int i = next - 1; i >= 0; i--) { + if (shProg->_LinkedShaders[i] == NULL) + continue; + + nir_shader *producer = shProg->_LinkedShaders[i]->Program->nir; + nir_shader *consumer = shProg->_LinkedShaders[next]->Program->nir; + + nir_remove_dead_variables(producer, nir_var_shader_out); + nir_remove_dead_variables(consumer, nir_var_shader_in); + + if (nir_remove_unused_varyings(producer, consumer)) { + nir_lower_global_vars_to_local(producer); + nir_lower_global_vars_to_local(consumer); + + nir_variable_mode indirect_mask = (nir_variable_mode) 0; + if (compiler->glsl_compiler_options[i].EmitNoIndirectTemp) + indirect_mask = (nir_variable_mode) nir_var_local; + + /* The backend might not be able to handle indirects on + * temporaries so we need to lower indirects on any of the + * varyings we have demoted here. + */ + nir_lower_indirect_derefs(producer, indirect_mask); + nir_lower_indirect_derefs(consumer, indirect_mask); + + const bool p_is_scalar = compiler->scalar_stage[producer->stage]; + shProg->_LinkedShaders[i]->Program->nir = + brw_nir_optimize(producer, compiler, p_is_scalar); + + const bool c_is_scalar = compiler->scalar_stage[producer->stage]; + shProg->_LinkedShaders[next]->Program->nir = + brw_nir_optimize(consumer, compiler, c_is_scalar); + } + + next = i; + } + } + + for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { + struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; + if (!shader) + continue; + + struct gl_program *prog = shader->Program; + nir_shader *nir = shader->Program->nir; + brw_shader_gather_info(nir, prog); + + NIR_PASS_V(nir, nir_lower_samplers, shProg); + NIR_PASS_V(nir, nir_lower_atomics, shProg); + + infos[stage] = &prog->nir->info; + + update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]); /* Make a pass over the IR to add state references for any built-in * uniforms that are used. This has to be done now (during linking).