X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_link.cpp;h=9ddf023018370aeed130062a60f8233f04a8b56e;hp=14421d421b6f6ef6d2d2f70567b9099d0f74c1a6;hb=379b24a40d3d34ffdaaeb1b328f50e28ecb01468;hpb=44d6c0c805d2911cc5dfe853e5bc5a505f87775f diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 14421d421b6..9ddf0230183 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -21,16 +21,17 @@ * IN THE SOFTWARE. */ -#include "main/macros.h" #include "brw_context.h" -#include "brw_vs.h" -#include "brw_gs.h" -#include "brw_fs.h" -#include "brw_cfg.h" -#include "brw_nir.h" -#include "glsl/ir_optimization.h" -#include "glsl/glsl_parser_extras.h" +#include "compiler/brw_nir.h" +#include "brw_program.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_optimization.h" +#include "compiler/glsl/program.h" +#include "program/program.h" +#include "main/mtypes.h" #include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" /** * Performs a compile of the shader stages even when we don't know @@ -41,21 +42,29 @@ static bool brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *sh_prog) { - struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; - struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; + struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; + struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; + struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + if (fs && !brw_fs_precompile(ctx, fs->Program)) + return false; + + if (gs && !brw_gs_precompile(ctx, gs->Program)) + return false; - if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program)) + if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program)) return false; - if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) + if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program)) return false; - if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) + if (vs && !brw_vs_precompile(ctx, vs->Program)) return false; - if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program)) + if (cs && !brw_cs_precompile(ctx, cs->Program)) return false; return true; @@ -63,122 +72,67 @@ brw_shader_precompile(struct gl_context *ctx, static void brw_lower_packing_builtins(struct brw_context *brw, - gl_shader_stage shader_type, exec_list *ir) { - const struct brw_compiler *compiler = brw->intelScreen->compiler; - - int ops = LOWER_PACK_SNORM_2x16 - | LOWER_UNPACK_SNORM_2x16 - | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16; - - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_UNPACK_UNORM_4x8 - | LOWER_UNPACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8 - | LOWER_PACK_SNORM_4x8; - } + const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (brw->gen >= 7) { - /* Gen7 introduced the f32to16 and f16to32 instructions, which can be - * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no - * lowering is needed. For SOA code, the Half2x16 ops must be - * scalarized. - */ - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_PACK_HALF_2x16_TO_SPLIT - | LOWER_UNPACK_HALF_2x16_TO_SPLIT; - } - } else { - ops |= LOWER_PACK_HALF_2x16 - | LOWER_UNPACK_HALF_2x16; - } + /* Gens < 7 don't have instructions to convert to or from half-precision, + * and Gens < 6 don't expose that functionality. + */ + if (devinfo->gen != 6) + return; - lower_packing_builtins(ir, ops); + lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16); } static void -process_glsl_ir(gl_shader_stage stage, - struct brw_context *brw, +process_glsl_ir(struct brw_context *brw, struct gl_shader_program *shader_prog, - struct gl_shader *shader) + struct gl_linked_shader *shader) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; - const struct brw_compiler *compiler = brw->intelScreen->compiler; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; /* Temporary memory context for any new IR. */ void *mem_ctx = ralloc_context(NULL); ralloc_adopt(mem_ctx, shader->ir); + lower_blend_equation_advanced(shader); + /* lower_packing_builtins() inserts arithmetic instructions, so it * must precede lower_instructions(). */ - brw_lower_packing_builtins(brw, shader->Stage, shader->ir); + brw_lower_packing_builtins(brw, shader->ir); do_mat_op_to_vec(shader->ir); - const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; - lower_instructions(shader->ir, - MOD_TO_FLOOR | - DIV_TO_MUL_RCP | - SUB_TO_ADD_NEG | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - bitfield_insert | - LDEXP_TO_ARITH | - CARRY_TO_ARITH | - BORROW_TO_ARITH); + + unsigned instructions_to_lower = (DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2 | + DFREXP_DLDEXP_TO_ARITH); + if (devinfo->gen < 7) { + instructions_to_lower |= BIT_COUNT_TO_MATH | + EXTRACT_TO_SHIFTS | + INSERT_TO_SHIFTS | + REVERSE_TO_SHIFTS; + } + + lower_instructions(shader->ir, instructions_to_lower); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. */ - if (brw->gen < 6) - lower_if_to_cond_assign(shader->ir, 16); + if (devinfo->gen < 6) + lower_if_to_cond_assign(shader->Stage, shader->ir, 16); do_lower_texture_projection(shader->ir); - brw_lower_texture_gradients(brw, shader->ir); do_vec_index_to_cond_assign(shader->ir); lower_vector_insert(shader->ir, true); lower_offset_arrays(shader->ir); - brw_do_lower_unnormalized_offset(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); - bool lowered_variable_indexing = - lower_variable_index_to_cond_assign((gl_shader_stage)stage, - shader->ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform); - - if (unlikely(brw->perf_debug && lowered_variable_indexing)) { - perf_debug("Unsupported form of variable indexing in %s; falling " - "back to very inefficient code generation\n", - _mesa_shader_stage_to_abbrev(shader->Stage)); - } - - bool progress; - do { - progress = false; - - if (compiler->scalar_stage[shader->Stage]) { - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); - } - - progress = do_lower_jumps(shader->ir, true, true, - true, /* main return */ - false, /* continue */ - false /* loops */ - ) || progress; - - progress = do_common_optimization(shader->ir, true, true, - options, ctx->Const.NativeIntegers) || progress; - } while (progress); - validate_ir_tree(shader->ir); /* Now that we've finished altering the linked IR, reparent any live IR back @@ -190,76 +144,215 @@ process_glsl_ir(gl_shader_stage stage, if (ctx->_Shader->Flags & GLSL_DUMP) { fprintf(stderr, "\n"); - fprintf(stderr, "GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->Stage), - shader_prog->Name); - _mesa_print_ir(stderr, shader->ir, NULL); + if (shader->ir) { + fprintf(stderr, "GLSL IR for linked %s program %d:\n", + _mesa_shader_stage_to_string(shader->Stage), + shader_prog->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + } else { + fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be " + "from cache)\n", _mesa_shader_stage_to_string(shader->Stage), + shader_prog->Name); + } fprintf(stderr, "\n"); } } -GLboolean +static void +unify_interfaces(struct shader_info **infos) +{ + struct shader_info *prev_info = NULL; + + for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { + if (!infos[i]) + continue; + + if (prev_info) { + prev_info->outputs_written |= infos[i]->inputs_read & + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); + infos[i]->inputs_read |= prev_info->outputs_written & + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); + + prev_info->patch_outputs_written |= infos[i]->patch_inputs_read; + infos[i]->patch_inputs_read |= prev_info->patch_outputs_written; + } + prev_info = infos[i]; + } +} + +static void +update_xfb_info(struct gl_transform_feedback_info *xfb_info, + struct shader_info *info) +{ + if (!xfb_info) + return; + + for (unsigned i = 0; i < xfb_info->NumOutputs; i++) { + struct gl_transform_feedback_output *output = &xfb_info->Outputs[i]; + + /* The VUE header contains three scalar fields packed together: + * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w + * - gl_Layer is stored in VARYING_SLOT_PSIZ.y + * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z + */ + switch (output->OutputRegister) { + case VARYING_SLOT_LAYER: + assert(output->NumComponents == 1); + output->OutputRegister = VARYING_SLOT_PSIZ; + output->ComponentOffset = 1; + break; + case VARYING_SLOT_VIEWPORT: + assert(output->NumComponents == 1); + output->OutputRegister = VARYING_SLOT_PSIZ; + output->ComponentOffset = 2; + break; + case VARYING_SLOT_PSIZ: + assert(output->NumComponents == 1); + output->ComponentOffset = 3; + break; + } + + info->outputs_written |= 1ull << output->OutputRegister; + } +} + +extern "C" GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->intelScreen->compiler; + const struct brw_compiler *compiler = brw->screen->compiler; unsigned int stage; + struct shader_info *infos[MESA_SHADER_STAGES] = { 0, }; for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_shader *shader = shProg->_LinkedShaders[stage]; + struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; if (!shader) - continue; + continue; - struct gl_program *prog = - ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), - shader->Name); - if (!prog) - return false; + struct gl_program *prog = shader->Program; prog->Parameters = _mesa_new_parameter_list(); - _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); + process_glsl_ir(brw, shProg, shader); - process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader); + _mesa_copy_linked_program_data(shProg, shader); - /* Make a pass over the IR to add state references for any built-in - * uniforms that are used. This has to be done now (during linking). - * Code generation doesn't happen until the first time this shader is - * used for rendering. Waiting until then to generate the parameters is - * too late. At that point, the values for the built-in uniforms won't - * get sent to the shader. - */ - foreach_in_list(ir_instruction, node, shader->ir) { - ir_variable *var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform) - || (strncmp(var->name, "gl_", 3) != 0)) - continue; + prog->ShadowSamplers = shader->shadow_samplers; + _mesa_update_shader_textures_used(shProg, prog); - const ir_state_slot *const slots = var->get_state_slots(); - assert(slots != NULL); + bool debug_enabled = + (INTEL_DEBUG & intel_debug_flag_for_shader_stage(shader->Stage)); - for (unsigned int i = 0; i < var->get_num_state_slots(); i++) { - _mesa_add_state_reference(prog->Parameters, - (gl_state_index *) slots[i].tokens); - } + if (debug_enabled && shader->ir) { + fprintf(stderr, "GLSL IR for native %s shader %d:\n", + _mesa_shader_stage_to_string(shader->Stage), shProg->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + fprintf(stderr, "\n\n"); } - do_set_program_inouts(shader->ir, prog, shader->Stage); + prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, + compiler->scalar_stage[stage]); + } - prog->SamplersUsed = shader->active_samplers; - prog->ShadowSamplers = shader->shadow_samplers; - _mesa_update_shader_textures_used(shProg, prog); + /* Determine first and last stage. */ + unsigned first = MESA_SHADER_STAGES; + unsigned last = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } - _mesa_reference_program(ctx, &shader->Program, prog); + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + if (first != last) { + int next = last; + for (int i = next - 1; i >= 0; i--) { + if (shProg->_LinkedShaders[i] == NULL) + continue; + + nir_shader *producer = shProg->_LinkedShaders[i]->Program->nir; + nir_shader *consumer = shProg->_LinkedShaders[next]->Program->nir; + + nir_remove_dead_variables(producer, nir_var_shader_out); + nir_remove_dead_variables(consumer, nir_var_shader_in); + + if (nir_remove_unused_varyings(producer, consumer)) { + nir_lower_global_vars_to_local(producer); + nir_lower_global_vars_to_local(consumer); + + nir_variable_mode indirect_mask = (nir_variable_mode) 0; + if (compiler->glsl_compiler_options[i].EmitNoIndirectTemp) + indirect_mask = (nir_variable_mode) nir_var_local; + + /* The backend might not be able to handle indirects on + * temporaries so we need to lower indirects on any of the + * varyings we have demoted here. + */ + nir_lower_indirect_derefs(producer, indirect_mask); + nir_lower_indirect_derefs(consumer, indirect_mask); + + const bool p_is_scalar = compiler->scalar_stage[producer->stage]; + shProg->_LinkedShaders[i]->Program->nir = + brw_nir_optimize(producer, compiler, p_is_scalar); + + const bool c_is_scalar = compiler->scalar_stage[producer->stage]; + shProg->_LinkedShaders[next]->Program->nir = + brw_nir_optimize(consumer, compiler, c_is_scalar); + } + + next = i; + } + } - brw_add_texrect_params(prog); + for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { + struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; + if (!shader) + continue; - prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - compiler->scalar_stage[stage]); + struct gl_program *prog = shader->Program; + nir_shader *nir = shader->Program->nir; + brw_shader_gather_info(nir, prog); - _mesa_reference_program(ctx, &prog, NULL); + NIR_PASS_V(nir, nir_lower_samplers, shProg); + NIR_PASS_V(nir, nir_lower_atomics, shProg); + + infos[stage] = &prog->nir->info; + + update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]); + + /* Make a pass over the IR to add state references for any built-in + * uniforms that are used. This has to be done now (during linking). + * Code generation doesn't happen until the first time this shader is + * used for rendering. Waiting until then to generate the parameters is + * too late. At that point, the values for the built-in uniforms won't + * get sent to the shader. + */ + nir_foreach_variable(var, &prog->nir->uniforms) { + if (strncmp(var->name, "gl_", 3) == 0) { + const nir_state_slot *const slots = var->state_slots; + assert(var->state_slots != NULL); + + for (unsigned int i = 0; i < var->num_state_slots; i++) { + _mesa_add_state_reference(prog->Parameters, + (gl_state_index *)slots[i].tokens); + } + } + } } + /* The linker tries to dead code eliminate unused varying components, + * and make sure interfaces match. But it isn't able to do so in all + * cases. So, explicitly make the interfaces match by OR'ing together + * the inputs_read/outputs_written bitfields of adjacent stages. + */ + if (!shProg->SeparateShader) + unify_interfaces(infos); + if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { for (unsigned i = 0; i < shProg->NumShaders; i++) { const struct gl_shader *sh = shProg->Shaders[i]; @@ -277,5 +370,17 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) if (brw->precompile && !brw_shader_precompile(ctx, shProg)) return false; + build_program_resource_list(ctx, shProg); + + for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { + struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; + if (!shader) + continue; + + /* The GLSL IR won't be needed anymore. */ + ralloc_free(shader->ir); + shader->ir = NULL; + } + return true; }