src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "compiler/brw_nir.h"
  47 #include "brw_defines.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static bool
  51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  52 {
  53    if (is_scalar) {
  54       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  55                                type_size_scalar_bytes);
  56       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  57    } else {
  58       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  59                                type_size_vec4_bytes);
  60       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  61    }
  62 }
  63
  64 nir_shader *
  65 brw_create_nir(struct brw_context *brw,
  66                const struct gl_shader_program *shader_prog,
  67                struct gl_program *prog,
  68                gl_shader_stage stage,
  69                bool is_scalar)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    const nir_shader_compiler_options *options =
  73       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  74    bool progress;
  75    nir_shader *nir;
  76
  77    /* First, lower the GLSL IR or Mesa IR to NIR */
  78    if (shader_prog) {
  79       nir = glsl_to_nir(shader_prog, stage, options);
  80       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  81       nir_lower_returns(nir);
  82       nir_validate_shader(nir);
  83       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  84                  nir_shader_get_entrypoint(nir), true, false);
  85    } else {
  86       nir = prog_to_nir(prog, options);
  87       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  88    }
  89    nir_validate_shader(nir);
  90
  91    (void)progress;
  92
  93    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  94
  95    if (stage == MESA_SHADER_FRAGMENT) {
  96       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  97          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  98          .fs_coord_pixel_center_integer = 1,
  99          .fs_coord_origin_upper_left = 1,
 100       };
 101       _mesa_add_state_reference(prog->Parameters,
 102                                 (gl_state_index *) wpos_options.state_tokens);
 103
 104       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 105    }
 106
 107    NIR_PASS(progress, nir, nir_lower_system_values);
 108    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 109
 110    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 111
 112    /* nir_shader may have been cloned so make sure shader_info is in sync */
 113    if (nir->info != &prog->info) {
 114       const char *name = prog->info.name;
 115       const char *label = prog->info.label;
 116       prog->info = *nir->info;
 117       prog->info.name = name;
 118       prog->info.label = label;
 119    }
 120
 121    if (shader_prog) {
 122       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 123       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 124    }
 125
 126    return nir;
 127 }
 128
 129 static unsigned
 130 get_new_program_id(struct intel_screen *screen)
 131 {
 132    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
 133    pthread_mutex_lock(&m);
 134    unsigned id = screen->program_id++;
 135    pthread_mutex_unlock(&m);
 136    return id;
 137 }
 138
 139 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 140                                         GLuint id, bool is_arb_asm)
 141 {
 142    struct brw_context *brw = brw_context(ctx);
 143
 144    switch (target) {
 145    case GL_VERTEX_PROGRAM_ARB:
 146    case GL_TESS_CONTROL_PROGRAM_NV:
 147    case GL_TESS_EVALUATION_PROGRAM_NV:
 148    case GL_GEOMETRY_PROGRAM_NV:
 149    case GL_COMPUTE_PROGRAM_NV: {
 150       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 151       if (prog) {
 152          prog->id = get_new_program_id(brw->screen);
 153
 154          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 155       }
 156       else
 157          return NULL;
 158    }
 159
 160    case GL_FRAGMENT_PROGRAM_ARB: {
 161       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 162
 163       if (prog) {
 164          prog->id = get_new_program_id(brw->screen);
 165
 166          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 167       }
 168       else
 169          return NULL;
 170    }
 171
 172    default:
 173       unreachable("Unsupported target in brwNewProgram()");
 174    }
 175 }
 176
 177 static void brwDeleteProgram( struct gl_context *ctx,
 178                               struct gl_program *prog )
 179 {
 180    struct brw_context *brw = brw_context(ctx);
 181
 182    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 183     *
 184     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 185     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 186     * pointer has changed.
 187     *
 188     * We cannot leave brw->foo_program as a dangling pointer to the dead
 189     * program.  malloc() may allocate the same memory for a new gl_program,
 190     * causing us to see matching pointers...but totally different programs.
 191     *
 192     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 193     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 194     * would cause us to see matching pointers (NULL == NULL), and fail to
 195     * detect that a program has changed since our last draw.
 196     *
 197     * So, set it to a bogus gl_program pointer that will never match,
 198     * causing us to properly reevaluate the state on our next draw.
 199     *
 200     * Getting this wrong causes heisenbugs which are very hard to catch,
 201     * as you need a very specific allocation pattern to hit the problem.
 202     */
 203    static const struct gl_program deleted_program;
 204
 205    if (brw->vertex_program == prog)
 206       brw->vertex_program = &deleted_program;
 207
 208    if (brw->tess_ctrl_program == prog)
 209       brw->tess_ctrl_program = &deleted_program;
 210
 211    if (brw->tess_eval_program == prog)
 212       brw->tess_eval_program = &deleted_program;
 213
 214    if (brw->geometry_program == prog)
 215       brw->geometry_program = &deleted_program;
 216
 217    if (brw->fragment_program == prog)
 218       brw->fragment_program = &deleted_program;
 219
 220    if (brw->compute_program == prog)
 221       brw->compute_program = &deleted_program;
 222
 223    _mesa_delete_program( ctx, prog );
 224 }
 225
 226
 227 static GLboolean
 228 brwProgramStringNotify(struct gl_context *ctx,
 229                        GLenum target,
 230                        struct gl_program *prog)
 231 {
 232    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 233
 234    struct brw_context *brw = brw_context(ctx);
 235    const struct brw_compiler *compiler = brw->screen->compiler;
 236
 237    switch (target) {
 238    case GL_FRAGMENT_PROGRAM_ARB: {
 239       struct brw_program *newFP = brw_program(prog);
 240       const struct brw_program *curFP =
 241          brw_program_const(brw->fragment_program);
 242
 243       if (newFP == curFP)
 244          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 245       newFP->id = get_new_program_id(brw->screen);
 246
 247       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 248
 249       brw_fs_precompile(ctx, prog);
 250       break;
 251    }
 252    case GL_VERTEX_PROGRAM_ARB: {
 253       struct brw_program *newVP = brw_program(prog);
 254       const struct brw_program *curVP =
 255          brw_program_const(brw->vertex_program);
 256
 257       if (newVP == curVP)
 258          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 259       if (newVP->program.arb.IsPositionInvariant) {
 260          _mesa_insert_mvp_code(ctx, &newVP->program);
 261       }
 262       newVP->id = get_new_program_id(brw->screen);
 263
 264       /* Also tell tnl about it:
 265        */
 266       _tnl_program_string(ctx, target, prog);
 267
 268       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 269                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 270
 271       brw_vs_precompile(ctx, prog);
 272       break;
 273    }
 274    default:
 275       /*
 276        * driver->ProgramStringNotify is only called for ARB programs, fixed
 277        * function vertex programs, and ir_to_mesa (which isn't used by the
 278        * i965 back-end).  Therefore, even after geometry shaders are added,
 279        * this function should only ever be called with a target of
 280        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 281        */
 282       unreachable("Unexpected target in brwProgramStringNotify");
 283    }
 284
 285    return true;
 286 }
 287
 288 static void
 289 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 290 {
 291    struct brw_context *brw = brw_context(ctx);
 292    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 293                     PIPE_CONTROL_NO_WRITE |
 294                     PIPE_CONTROL_CS_STALL);
 295    assert(brw->gen >= 7 && brw->gen <= 9);
 296
 297    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 298                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 299                    GL_COMMAND_BARRIER_BIT))
 300       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 301
 302    if (barriers & GL_UNIFORM_BARRIER_BIT)
 303       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 304                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 305
 306    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 307       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 308
 309    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 310       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 311
 312    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 313       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 314                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 315
 316    /* Typed surface messages are handled by the render cache on IVB, so we
 317     * need to flush it too.
 318     */
 319    if (brw->gen == 7 && !brw->is_haswell)
 320       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 321
 322    brw_emit_pipe_control_flush(brw, bits);
 323 }
 324
 325 static void
 326 brw_blend_barrier(struct gl_context *ctx)
 327 {
 328    struct brw_context *brw = brw_context(ctx);
 329
 330    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 331       if (brw->gen >= 6) {
 332          brw_emit_pipe_control_flush(brw,
 333                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 334                                      PIPE_CONTROL_CS_STALL);
 335          brw_emit_pipe_control_flush(brw,
 336                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 337       } else {
 338          brw_emit_pipe_control_flush(brw,
 339                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 340       }
 341    }
 342 }
 343
 344 void
 345 brw_get_scratch_bo(struct brw_context *brw,
 346                    drm_intel_bo **scratch_bo, int size)
 347 {
 348    drm_intel_bo *old_bo = *scratch_bo;
 349
 350    if (old_bo && old_bo->size < size) {
 351       drm_intel_bo_unreference(old_bo);
 352       old_bo = NULL;
 353    }
 354
 355    if (!old_bo) {
 356       *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 357    }
 358 }
 359
 360 /**
 361  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 362  * bytes times the given \p thread_count.
 363  */
 364 void
 365 brw_alloc_stage_scratch(struct brw_context *brw,
 366                         struct brw_stage_state *stage_state,
 367                         unsigned per_thread_size,
 368                         unsigned thread_count)
 369 {
 370    if (stage_state->per_thread_scratch < per_thread_size) {
 371       stage_state->per_thread_scratch = per_thread_size;
 372
 373       if (stage_state->scratch_bo)
 374          drm_intel_bo_unreference(stage_state->scratch_bo);
 375
 376       stage_state->scratch_bo =
 377          drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
 378                             per_thread_size * thread_count, 4096);
 379    }
 380 }
 381
 382 void brwInitFragProgFuncs( struct dd_function_table *functions )
 383 {
 384    assert(functions->ProgramStringNotify == _tnl_program_string);
 385
 386    functions->NewProgram = brwNewProgram;
 387    functions->DeleteProgram = brwDeleteProgram;
 388    functions->ProgramStringNotify = brwProgramStringNotify;
 389
 390    functions->LinkShader = brw_link_shader;
 391
 392    functions->MemoryBarrier = brw_memory_barrier;
 393    functions->BlendBarrier = brw_blend_barrier;
 394 }
 395
 396 struct shader_times {
 397    uint64_t time;
 398    uint64_t written;
 399    uint64_t reset;
 400 };
 401
 402 void
 403 brw_init_shader_time(struct brw_context *brw)
 404 {
 405    const int max_entries = 2048;
 406    brw->shader_time.bo =
 407       drm_intel_bo_alloc(brw->bufmgr, "shader time",
 408                          max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 409    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 410    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 411    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 412                                           max_entries);
 413    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 414                                                max_entries);
 415    brw->shader_time.max_entries = max_entries;
 416 }
 417
 418 static int
 419 compare_time(const void *a, const void *b)
 420 {
 421    uint64_t * const *a_val = a;
 422    uint64_t * const *b_val = b;
 423
 424    /* We don't just subtract because we're turning the value to an int. */
 425    if (**a_val < **b_val)
 426       return -1;
 427    else if (**a_val == **b_val)
 428       return 0;
 429    else
 430       return 1;
 431 }
 432
 433 static void
 434 print_shader_time_line(const char *stage, const char *name,
 435                        int shader_num, uint64_t time, uint64_t total)
 436 {
 437    fprintf(stderr, "%-6s%-18s", stage, name);
 438
 439    if (shader_num != 0)
 440       fprintf(stderr, "%4d: ", shader_num);
 441    else
 442       fprintf(stderr, "    : ");
 443
 444    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 445            (long long)time,
 446            (double)time / 1000000000.0,
 447            (double)time / total * 100.0);
 448 }
 449
 450 static void
 451 brw_report_shader_time(struct brw_context *brw)
 452 {
 453    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 454       return;
 455
 456    uint64_t scaled[brw->shader_time.num_entries];
 457    uint64_t *sorted[brw->shader_time.num_entries];
 458    uint64_t total_by_type[ST_CS + 1];
 459    memset(total_by_type, 0, sizeof(total_by_type));
 460    double total = 0;
 461    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 462       uint64_t written = 0, reset = 0;
 463       enum shader_time_shader_type type = brw->shader_time.types[i];
 464
 465       sorted[i] = &scaled[i];
 466
 467       switch (type) {
 468       case ST_VS:
 469       case ST_TCS:
 470       case ST_TES:
 471       case ST_GS:
 472       case ST_FS8:
 473       case ST_FS16:
 474       case ST_CS:
 475          written = brw->shader_time.cumulative[i].written;
 476          reset = brw->shader_time.cumulative[i].reset;
 477          break;
 478
 479       default:
 480          /* I sometimes want to print things that aren't the 3 shader times.
 481           * Just print the sum in that case.
 482           */
 483          written = 1;
 484          reset = 0;
 485          break;
 486       }
 487
 488       uint64_t time = brw->shader_time.cumulative[i].time;
 489       if (written) {
 490          scaled[i] = time / written * (written + reset);
 491       } else {
 492          scaled[i] = time;
 493       }
 494
 495       switch (type) {
 496       case ST_VS:
 497       case ST_TCS:
 498       case ST_TES:
 499       case ST_GS:
 500       case ST_FS8:
 501       case ST_FS16:
 502       case ST_CS:
 503          total_by_type[type] += scaled[i];
 504          break;
 505       default:
 506          break;
 507       }
 508
 509       total += scaled[i];
 510    }
 511
 512    if (total == 0) {
 513       fprintf(stderr, "No shader time collected yet\n");
 514       return;
 515    }
 516
 517    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 518
 519    fprintf(stderr, "\n");
 520    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 521    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 522       const char *stage;
 523       /* Work back from the sorted pointers times to a time to print. */
 524       int i = sorted[s] - scaled;
 525
 526       if (scaled[i] == 0)
 527          continue;
 528
 529       int shader_num = brw->shader_time.ids[i];
 530       const char *shader_name = brw->shader_time.names[i];
 531
 532       switch (brw->shader_time.types[i]) {
 533       case ST_VS:
 534          stage = "vs";
 535          break;
 536       case ST_TCS:
 537          stage = "tcs";
 538          break;
 539       case ST_TES:
 540          stage = "tes";
 541          break;
 542       case ST_GS:
 543          stage = "gs";
 544          break;
 545       case ST_FS8:
 546          stage = "fs8";
 547          break;
 548       case ST_FS16:
 549          stage = "fs16";
 550          break;
 551       case ST_CS:
 552          stage = "cs";
 553          break;
 554       default:
 555          stage = "other";
 556          break;
 557       }
 558
 559       print_shader_time_line(stage, shader_name, shader_num,
 560                              scaled[i], total);
 561    }
 562
 563    fprintf(stderr, "\n");
 564    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 565    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 566    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 567    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 568    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 569    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 570    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 571 }
 572
 573 static void
 574 brw_collect_shader_time(struct brw_context *brw)
 575 {
 576    if (!brw->shader_time.bo)
 577       return;
 578
 579    /* This probably stalls on the last rendering.  We could fix that by
 580     * delaying reading the reports, but it doesn't look like it's a big
 581     * overhead compared to the cost of tracking the time in the first place.
 582     */
 583    drm_intel_bo_map(brw->shader_time.bo, true);
 584    void *bo_map = brw->shader_time.bo->virtual;
 585
 586    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 587       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 588
 589       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 590       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 591       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 592    }
 593
 594    /* Zero the BO out to clear it out for our next collection.
 595     */
 596    memset(bo_map, 0, brw->shader_time.bo->size);
 597    drm_intel_bo_unmap(brw->shader_time.bo);
 598 }
 599
 600 void
 601 brw_collect_and_report_shader_time(struct brw_context *brw)
 602 {
 603    brw_collect_shader_time(brw);
 604
 605    if (brw->shader_time.report_time == 0 ||
 606        get_time() - brw->shader_time.report_time >= 1.0) {
 607       brw_report_shader_time(brw);
 608       brw->shader_time.report_time = get_time();
 609    }
 610 }
 611
 612 /**
 613  * Chooses an index in the shader_time buffer and sets up tracking information
 614  * for our printouts.
 615  *
 616  * Note that this holds on to references to the underlying programs, which may
 617  * change their lifetimes compared to normal operation.
 618  */
 619 int
 620 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 621                           enum shader_time_shader_type type, bool is_glsl_sh)
 622 {
 623    int shader_time_index = brw->shader_time.num_entries++;
 624    assert(shader_time_index < brw->shader_time.max_entries);
 625    brw->shader_time.types[shader_time_index] = type;
 626
 627    const char *name;
 628    if (prog->Id == 0) {
 629       name = "ff";
 630    } else if (is_glsl_sh) {
 631       name = prog->info.label ?
 632          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 633    } else {
 634       name = "prog";
 635    }
 636
 637    brw->shader_time.names[shader_time_index] = name;
 638    brw->shader_time.ids[shader_time_index] = prog->Id;
 639
 640    return shader_time_index;
 641 }
 642
 643 void
 644 brw_destroy_shader_time(struct brw_context *brw)
 645 {
 646    drm_intel_bo_unreference(brw->shader_time.bo);
 647    brw->shader_time.bo = NULL;
 648 }
 649
 650 void
 651 brw_stage_prog_data_free(const void *p)
 652 {
 653    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 654
 655    ralloc_free(prog_data->param);
 656    ralloc_free(prog_data->pull_param);
 657    ralloc_free(prog_data->image_param);
 658 }
 659
 660 void
 661 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 662 {
 663    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 664            stage, prog->Id, stage);
 665    _mesa_print_program(prog);
 666 }
 667
 668 void
 669 brw_setup_tex_for_precompile(struct brw_context *brw,
 670                              struct brw_sampler_prog_key_data *tex,
 671                              struct gl_program *prog)
 672 {
 673    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
 674    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 675    for (unsigned i = 0; i < sampler_count; i++) {
 676       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 677          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 678          tex->swizzles[i] =
 679             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 680       } else {
 681          /* Color sampler: assume no swizzling. */
 682          tex->swizzles[i] = SWIZZLE_XYZW;
 683       }
 684    }
 685 }
 686
 687 /**
 688  * Sets up the starting offsets for the groups of binding table entries
 689  * common to all pipeline stages.
 690  *
 691  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 692  * unused but also make sure that addition of small offsets to them will
 693  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 694  */
 695 uint32_t
 696 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 697                                         const struct gl_program *prog,
 698                                         struct brw_stage_prog_data *stage_prog_data,
 699                                         uint32_t next_binding_table_offset)
 700 {
 701    int num_textures = util_last_bit(prog->SamplersUsed);
 702
 703    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 704    next_binding_table_offset += num_textures;
 705
 706    if (prog->info.num_ubos) {
 707       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 708       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 709       next_binding_table_offset += prog->info.num_ubos;
 710    } else {
 711       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 712    }
 713
 714    if (prog->info.num_ssbos) {
 715       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 716       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 717       next_binding_table_offset += prog->info.num_ssbos;
 718    } else {
 719       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 720    }
 721
 722    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 723       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 724       next_binding_table_offset++;
 725    } else {
 726       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 727    }
 728
 729    if (prog->nir->info->uses_texture_gather) {
 730       if (devinfo->gen >= 8) {
 731          stage_prog_data->binding_table.gather_texture_start =
 732             stage_prog_data->binding_table.texture_start;
 733       } else {
 734          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 735          next_binding_table_offset += num_textures;
 736       }
 737    } else {
 738       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 739    }
 740
 741    if (prog->info.num_abos) {
 742       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 743       next_binding_table_offset += prog->info.num_abos;
 744    } else {
 745       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 746    }
 747
 748    if (prog->info.num_images) {
 749       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 750       next_binding_table_offset += prog->info.num_images;
 751    } else {
 752       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 753    }
 754
 755    /* This may or may not be used depending on how the compile goes. */
 756    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 757    next_binding_table_offset++;
 758
 759    /* Plane 0 is just the regular texture section */
 760    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 761
 762    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 763    next_binding_table_offset += num_textures;
 764
 765    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 766    next_binding_table_offset += num_textures;
 767
 768    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 769
 770    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 771    return next_binding_table_offset;
 772 }