src/mesa/drivers/dri/i965/brw_link.cpp

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_context.h"
  25 #include "compiler/brw_nir.h"
  26 #include "brw_program.h"
  27 #include "compiler/glsl/ir.h"
  28 #include "compiler/glsl/ir_optimization.h"
  29 #include "compiler/glsl/program.h"
  30 #include "program/program.h"
  31 #include "main/mtypes.h"
  32 #include "main/shaderapi.h"
  33 #include "main/shaderobj.h"
  34 #include "main/uniforms.h"
  35
  36 /**
  37  * Performs a compile of the shader stages even when we don't know
  38  * what non-orthogonal state will be set, in the hope that it reflects
  39  * the eventual NOS used, and thus allows us to produce link failures.
  40  */
  41 static bool
  42 brw_shader_precompile(struct gl_context *ctx,
  43                       struct gl_shader_program *sh_prog)
  44 {
  45    struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
  46    struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
  47    struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
  48    struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
  49    struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  50    struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
  51
  52    if (fs && !brw_fs_precompile(ctx, fs->Program))
  53       return false;
  54
  55    if (gs && !brw_gs_precompile(ctx, gs->Program))
  56       return false;
  57
  58    if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
  59       return false;
  60
  61    if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
  62       return false;
  63
  64    if (vs && !brw_vs_precompile(ctx, vs->Program))
  65       return false;
  66
  67    if (cs && !brw_cs_precompile(ctx, cs->Program))
  68       return false;
  69
  70    return true;
  71 }
  72
  73 static void
  74 brw_lower_packing_builtins(struct brw_context *brw,
  75                            exec_list *ir)
  76 {
  77    const struct gen_device_info *devinfo = &brw->screen->devinfo;
  78
  79    /* Gens < 7 don't have instructions to convert to or from half-precision,
  80     * and Gens < 6 don't expose that functionality.
  81     */
  82    if (devinfo->gen != 6)
  83       return;
  84
  85    lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
  86 }
  87
  88 static void
  89 process_glsl_ir(struct brw_context *brw,
  90                 struct gl_shader_program *shader_prog,
  91                 struct gl_linked_shader *shader)
  92 {
  93    const struct gen_device_info *devinfo = &brw->screen->devinfo;
  94    struct gl_context *ctx = &brw->ctx;
  95
  96    /* Temporary memory context for any new IR. */
  97    void *mem_ctx = ralloc_context(NULL);
  98
  99    ralloc_adopt(mem_ctx, shader->ir);
 100
 101    lower_blend_equation_advanced(shader);
 102
 103    /* lower_packing_builtins() inserts arithmetic instructions, so it
 104     * must precede lower_instructions().
 105     */
 106    brw_lower_packing_builtins(brw, shader->ir);
 107    do_mat_op_to_vec(shader->ir);
 108
 109    unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
 110                                      SUB_TO_ADD_NEG |
 111                                      EXP_TO_EXP2 |
 112                                      LOG_TO_LOG2 |
 113                                      DFREXP_DLDEXP_TO_ARITH);
 114    if (devinfo->gen < 7) {
 115       instructions_to_lower |= BIT_COUNT_TO_MATH |
 116                                EXTRACT_TO_SHIFTS |
 117                                INSERT_TO_SHIFTS |
 118                                REVERSE_TO_SHIFTS;
 119    }
 120
 121    lower_instructions(shader->ir, instructions_to_lower);
 122
 123    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
 124     * if-statements need to be flattened.
 125     */
 126    if (devinfo->gen < 6)
 127       lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
 128
 129    do_lower_texture_projection(shader->ir);
 130    do_vec_index_to_cond_assign(shader->ir);
 131    lower_vector_insert(shader->ir, true);
 132    lower_offset_arrays(shader->ir);
 133    lower_noise(shader->ir);
 134    lower_quadop_vector(shader->ir, false);
 135
 136    validate_ir_tree(shader->ir);
 137
 138    /* Now that we've finished altering the linked IR, reparent any live IR back
 139     * to the permanent memory context, and free the temporary one (discarding any
 140     * junk we optimized away).
 141     */
 142    reparent_ir(shader->ir, shader->ir);
 143    ralloc_free(mem_ctx);
 144
 145    if (ctx->_Shader->Flags & GLSL_DUMP) {
 146       fprintf(stderr, "\n");
 147       if (shader->ir) {
 148          fprintf(stderr, "GLSL IR for linked %s program %d:\n",
 149                  _mesa_shader_stage_to_string(shader->Stage),
 150                  shader_prog->Name);
 151          _mesa_print_ir(stderr, shader->ir, NULL);
 152       } else {
 153          fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
 154                  "from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
 155                  shader_prog->Name);
 156       }
 157       fprintf(stderr, "\n");
 158    }
 159 }
 160
 161 static void
 162 unify_interfaces(struct shader_info **infos)
 163 {
 164    struct shader_info *prev_info = NULL;
 165
 166    for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
 167       if (!infos[i])
 168          continue;
 169
 170       if (prev_info) {
 171          prev_info->outputs_written |= infos[i]->inputs_read &
 172             ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
 173          infos[i]->inputs_read |= prev_info->outputs_written &
 174             ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
 175
 176          prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
 177          infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
 178       }
 179       prev_info = infos[i];
 180    }
 181 }
 182
 183 static void
 184 update_xfb_info(struct gl_transform_feedback_info *xfb_info,
 185                 struct shader_info *info)
 186 {
 187    if (!xfb_info)
 188       return;
 189
 190    for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
 191       struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
 192
 193       /* The VUE header contains three scalar fields packed together:
 194        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
 195        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
 196        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
 197        */
 198       switch (output->OutputRegister) {
 199       case VARYING_SLOT_LAYER:
 200          assert(output->NumComponents == 1);
 201          output->OutputRegister = VARYING_SLOT_PSIZ;
 202          output->ComponentOffset = 1;
 203          break;
 204       case VARYING_SLOT_VIEWPORT:
 205          assert(output->NumComponents == 1);
 206          output->OutputRegister = VARYING_SLOT_PSIZ;
 207          output->ComponentOffset = 2;
 208          break;
 209       case VARYING_SLOT_PSIZ:
 210          assert(output->NumComponents == 1);
 211          output->ComponentOffset = 3;
 212          break;
 213       }
 214
 215       info->outputs_written |= 1ull << output->OutputRegister;
 216    }
 217 }
 218
 219 extern "C" GLboolean
 220 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 221 {
 222    struct brw_context *brw = brw_context(ctx);
 223    const struct brw_compiler *compiler = brw->screen->compiler;
 224    unsigned int stage;
 225    struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
 226
 227    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
 228       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
 229       if (!shader)
 230          continue;
 231
 232       struct gl_program *prog = shader->Program;
 233       prog->Parameters = _mesa_new_parameter_list();
 234
 235       process_glsl_ir(brw, shProg, shader);
 236
 237       _mesa_copy_linked_program_data(shProg, shader);
 238
 239       prog->ShadowSamplers = shader->shadow_samplers;
 240       _mesa_update_shader_textures_used(shProg, prog);
 241
 242       bool debug_enabled =
 243          (INTEL_DEBUG & intel_debug_flag_for_shader_stage(shader->Stage));
 244
 245       if (debug_enabled && shader->ir) {
 246          fprintf(stderr, "GLSL IR for native %s shader %d:\n",
 247                  _mesa_shader_stage_to_string(shader->Stage), shProg->Name);
 248          _mesa_print_ir(stderr, shader->ir, NULL);
 249          fprintf(stderr, "\n\n");
 250       }
 251
 252       prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
 253                                  compiler->scalar_stage[stage]);
 254    }
 255
 256    /* Determine first and last stage. */
 257    unsigned first = MESA_SHADER_STAGES;
 258    unsigned last = 0;
 259    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 260       if (!shProg->_LinkedShaders[i])
 261          continue;
 262       if (first == MESA_SHADER_STAGES)
 263          first = i;
 264       last = i;
 265    }
 266
 267    /* Linking the stages in the opposite order (from fragment to vertex)
 268     * ensures that inter-shader outputs written to in an earlier stage
 269     * are eliminated if they are (transitively) not used in a later
 270     * stage.
 271     */
 272     if (first != last) {
 273        int next = last;
 274        for (int i = next - 1; i >= 0; i--) {
 275           if (shProg->_LinkedShaders[i] == NULL)
 276              continue;
 277
 278             nir_shader *producer = shProg->_LinkedShaders[i]->Program->nir;
 279             nir_shader *consumer = shProg->_LinkedShaders[next]->Program->nir;
 280
 281             nir_remove_dead_variables(producer, nir_var_shader_out);
 282             nir_remove_dead_variables(consumer, nir_var_shader_in);
 283
 284             if (nir_remove_unused_varyings(producer, consumer)) {
 285                nir_lower_global_vars_to_local(producer);
 286                nir_lower_global_vars_to_local(consumer);
 287
 288                nir_variable_mode indirect_mask = (nir_variable_mode) 0;
 289                if (compiler->glsl_compiler_options[i].EmitNoIndirectTemp)
 290                   indirect_mask = (nir_variable_mode) nir_var_local;
 291
 292                /* The backend might not be able to handle indirects on
 293                 * temporaries so we need to lower indirects on any of the
 294                 * varyings we have demoted here.
 295                 */
 296                nir_lower_indirect_derefs(producer, indirect_mask);
 297                nir_lower_indirect_derefs(consumer, indirect_mask);
 298
 299                const bool p_is_scalar = compiler->scalar_stage[producer->stage];
 300                shProg->_LinkedShaders[i]->Program->nir =
 301                  brw_nir_optimize(producer, compiler, p_is_scalar);
 302
 303                const bool c_is_scalar = compiler->scalar_stage[producer->stage];
 304                shProg->_LinkedShaders[next]->Program->nir =
 305                  brw_nir_optimize(consumer, compiler, c_is_scalar);
 306             }
 307
 308             next = i;
 309        }
 310     }
 311
 312    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
 313       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
 314       if (!shader)
 315          continue;
 316
 317       struct gl_program *prog = shader->Program;
 318       nir_shader *nir = shader->Program->nir;
 319       brw_shader_gather_info(nir, prog);
 320
 321       NIR_PASS_V(nir, nir_lower_samplers, shProg);
 322       NIR_PASS_V(nir, nir_lower_atomics, shProg);
 323
 324       infos[stage] = &prog->nir->info;
 325
 326       update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
 327
 328       /* Make a pass over the IR to add state references for any built-in
 329        * uniforms that are used.  This has to be done now (during linking).
 330        * Code generation doesn't happen until the first time this shader is
 331        * used for rendering.  Waiting until then to generate the parameters is
 332        * too late.  At that point, the values for the built-in uniforms won't
 333        * get sent to the shader.
 334        */
 335       nir_foreach_variable(var, &prog->nir->uniforms) {
 336          if (strncmp(var->name, "gl_", 3) == 0) {
 337             const nir_state_slot *const slots = var->state_slots;
 338             assert(var->state_slots != NULL);
 339
 340             for (unsigned int i = 0; i < var->num_state_slots; i++) {
 341                _mesa_add_state_reference(prog->Parameters,
 342                                          (gl_state_index *)slots[i].tokens);
 343             }
 344          }
 345       }
 346    }
 347
 348    /* The linker tries to dead code eliminate unused varying components,
 349     * and make sure interfaces match.  But it isn't able to do so in all
 350     * cases.  So, explicitly make the interfaces match by OR'ing together
 351     * the inputs_read/outputs_written bitfields of adjacent stages.
 352     */
 353    if (!shProg->SeparateShader)
 354       unify_interfaces(infos);
 355
 356    if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
 357       for (unsigned i = 0; i < shProg->NumShaders; i++) {
 358          const struct gl_shader *sh = shProg->Shaders[i];
 359          if (!sh)
 360             continue;
 361
 362          fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
 363                  _mesa_shader_stage_to_string(sh->Stage),
 364                  i, shProg->Name);
 365          fprintf(stderr, "%s", sh->Source);
 366          fprintf(stderr, "\n");
 367       }
 368    }
 369
 370    if (brw->precompile && !brw_shader_precompile(ctx, shProg))
 371       return false;
 372
 373    build_program_resource_list(ctx, shProg);
 374
 375    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
 376       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
 377       if (!shader)
 378          continue;
 379
 380       /* The GLSL IR won't be needed anymore. */
 381       ralloc_free(shader->ir);
 382       shader->ir = NULL;
 383    }
 384
 385    return true;
 386 }