src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "brw_nir.h"
  47 #include "intel_batchbuffer.h"
  48
  49 static void
  50 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  51 {
  52    if (is_scalar) {
  53       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  54                                type_size_scalar_bytes);
  55       nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  56    } else {
  57       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  58                                type_size_vec4_bytes);
  59       nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  60    }
  61 }
  62
  63 nir_shader *
  64 brw_create_nir(struct brw_context *brw,
  65                const struct gl_shader_program *shader_prog,
  66                struct gl_program *prog,
  67                gl_shader_stage stage,
  68                bool is_scalar)
  69 {
  70    struct gl_context *ctx = &brw->ctx;
  71    const nir_shader_compiler_options *options =
  72       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  73    bool progress;
  74    nir_shader *nir;
  75
  76    /* First, lower the GLSL IR or Mesa IR to NIR */
  77    if (shader_prog) {
  78       nir = glsl_to_nir(shader_prog, stage, options);
  79       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  80       nir_lower_returns(nir);
  81       nir_validate_shader(nir);
  82       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  83                  nir_shader_get_entrypoint(nir), true, false);
  84    } else {
  85       nir = prog_to_nir(prog, options);
  86       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  87    }
  88    nir_validate_shader(nir);
  89
  90    (void)progress;
  91
  92    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  93
  94    if (stage == MESA_SHADER_FRAGMENT) {
  95       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  96          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  97          .fs_coord_pixel_center_integer = 1,
  98          .fs_coord_origin_upper_left = 1,
  99       };
 100       _mesa_add_state_reference(prog->Parameters,
 101                                 (gl_state_index *) wpos_options.state_tokens);
 102
 103       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 104    }
 105
 106    NIR_PASS(progress, nir, nir_lower_system_values);
 107    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 108
 109    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 110
 111    /* nir_shader may have been cloned so make sure shader_info is in sync */
 112    if (nir->info != &prog->info) {
 113       const char *name = prog->info.name;
 114       const char *label = prog->info.label;
 115       prog->info = *nir->info;
 116       prog->info.name = name;
 117       prog->info.label = label;
 118    }
 119
 120    if (shader_prog) {
 121       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 122       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 123    }
 124
 125    return nir;
 126 }
 127
 128 static unsigned
 129 get_new_program_id(struct intel_screen *screen)
 130 {
 131    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
 132    pthread_mutex_lock(&m);
 133    unsigned id = screen->program_id++;
 134    pthread_mutex_unlock(&m);
 135    return id;
 136 }
 137
 138 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 139                                         GLuint id, bool is_arb_asm)
 140 {
 141    struct brw_context *brw = brw_context(ctx);
 142
 143    switch (target) {
 144    case GL_VERTEX_PROGRAM_ARB:
 145    case GL_TESS_CONTROL_PROGRAM_NV:
 146    case GL_TESS_EVALUATION_PROGRAM_NV:
 147    case GL_GEOMETRY_PROGRAM_NV:
 148    case GL_COMPUTE_PROGRAM_NV: {
 149       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 150       if (prog) {
 151          prog->id = get_new_program_id(brw->screen);
 152
 153          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 154       }
 155       else
 156          return NULL;
 157    }
 158
 159    case GL_FRAGMENT_PROGRAM_ARB: {
 160       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 161
 162       if (prog) {
 163          prog->id = get_new_program_id(brw->screen);
 164
 165          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 166       }
 167       else
 168          return NULL;
 169    }
 170
 171    default:
 172       unreachable("Unsupported target in brwNewProgram()");
 173    }
 174 }
 175
 176 static void brwDeleteProgram( struct gl_context *ctx,
 177                               struct gl_program *prog )
 178 {
 179    struct brw_context *brw = brw_context(ctx);
 180
 181    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 182     *
 183     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 184     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 185     * pointer has changed.
 186     *
 187     * We cannot leave brw->foo_program as a dangling pointer to the dead
 188     * program.  malloc() may allocate the same memory for a new gl_program,
 189     * causing us to see matching pointers...but totally different programs.
 190     *
 191     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 192     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 193     * would cause us to see matching pointers (NULL == NULL), and fail to
 194     * detect that a program has changed since our last draw.
 195     *
 196     * So, set it to a bogus gl_program pointer that will never match,
 197     * causing us to properly reevaluate the state on our next draw.
 198     *
 199     * Getting this wrong causes heisenbugs which are very hard to catch,
 200     * as you need a very specific allocation pattern to hit the problem.
 201     */
 202    static const struct gl_program deleted_program;
 203
 204    if (brw->vertex_program == prog)
 205       brw->vertex_program = &deleted_program;
 206
 207    if (brw->tess_ctrl_program == prog)
 208       brw->tess_ctrl_program = &deleted_program;
 209
 210    if (brw->tess_eval_program == prog)
 211       brw->tess_eval_program = &deleted_program;
 212
 213    if (brw->geometry_program == prog)
 214       brw->geometry_program = &deleted_program;
 215
 216    if (brw->fragment_program == prog)
 217       brw->fragment_program = &deleted_program;
 218
 219    if (brw->compute_program == prog)
 220       brw->compute_program = &deleted_program;
 221
 222    _mesa_delete_program( ctx, prog );
 223 }
 224
 225
 226 static GLboolean
 227 brwProgramStringNotify(struct gl_context *ctx,
 228                        GLenum target,
 229                        struct gl_program *prog)
 230 {
 231    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 232
 233    struct brw_context *brw = brw_context(ctx);
 234    const struct brw_compiler *compiler = brw->screen->compiler;
 235
 236    switch (target) {
 237    case GL_FRAGMENT_PROGRAM_ARB: {
 238       struct brw_program *newFP = brw_program(prog);
 239       const struct brw_program *curFP =
 240          brw_program_const(brw->fragment_program);
 241
 242       if (newFP == curFP)
 243          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 244       newFP->id = get_new_program_id(brw->screen);
 245
 246       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 247
 248       brw_fs_precompile(ctx, prog);
 249       break;
 250    }
 251    case GL_VERTEX_PROGRAM_ARB: {
 252       struct brw_program *newVP = brw_program(prog);
 253       const struct brw_program *curVP =
 254          brw_program_const(brw->vertex_program);
 255
 256       if (newVP == curVP)
 257          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 258       if (newVP->program.arb.IsPositionInvariant) {
 259          _mesa_insert_mvp_code(ctx, &newVP->program);
 260       }
 261       newVP->id = get_new_program_id(brw->screen);
 262
 263       /* Also tell tnl about it:
 264        */
 265       _tnl_program_string(ctx, target, prog);
 266
 267       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 268                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 269
 270       brw_vs_precompile(ctx, prog);
 271       break;
 272    }
 273    default:
 274       /*
 275        * driver->ProgramStringNotify is only called for ARB programs, fixed
 276        * function vertex programs, and ir_to_mesa (which isn't used by the
 277        * i965 back-end).  Therefore, even after geometry shaders are added,
 278        * this function should only ever be called with a target of
 279        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 280        */
 281       unreachable("Unexpected target in brwProgramStringNotify");
 282    }
 283
 284    return true;
 285 }
 286
 287 static void
 288 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 289 {
 290    struct brw_context *brw = brw_context(ctx);
 291    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 292                     PIPE_CONTROL_NO_WRITE |
 293                     PIPE_CONTROL_CS_STALL);
 294    assert(brw->gen >= 7 && brw->gen <= 9);
 295
 296    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 297                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 298                    GL_COMMAND_BARRIER_BIT))
 299       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 300
 301    if (barriers & GL_UNIFORM_BARRIER_BIT)
 302       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 303                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 304
 305    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 306       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 307
 308    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 309       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 310
 311    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 312       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 313                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 314
 315    /* Typed surface messages are handled by the render cache on IVB, so we
 316     * need to flush it too.
 317     */
 318    if (brw->gen == 7 && !brw->is_haswell)
 319       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 320
 321    brw_emit_pipe_control_flush(brw, bits);
 322 }
 323
 324 static void
 325 brw_blend_barrier(struct gl_context *ctx)
 326 {
 327    struct brw_context *brw = brw_context(ctx);
 328
 329    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 330       if (brw->gen >= 6) {
 331          brw_emit_pipe_control_flush(brw,
 332                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 333                                      PIPE_CONTROL_CS_STALL);
 334          brw_emit_pipe_control_flush(brw,
 335                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 336       } else {
 337          brw_emit_pipe_control_flush(brw,
 338                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 339       }
 340    }
 341 }
 342
 343 void
 344 brw_get_scratch_bo(struct brw_context *brw,
 345                    drm_intel_bo **scratch_bo, int size)
 346 {
 347    drm_intel_bo *old_bo = *scratch_bo;
 348
 349    if (old_bo && old_bo->size < size) {
 350       drm_intel_bo_unreference(old_bo);
 351       old_bo = NULL;
 352    }
 353
 354    if (!old_bo) {
 355       *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 356    }
 357 }
 358
 359 /**
 360  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 361  * bytes times the given \p thread_count.
 362  */
 363 void
 364 brw_alloc_stage_scratch(struct brw_context *brw,
 365                         struct brw_stage_state *stage_state,
 366                         unsigned per_thread_size,
 367                         unsigned thread_count)
 368 {
 369    if (stage_state->per_thread_scratch < per_thread_size) {
 370       stage_state->per_thread_scratch = per_thread_size;
 371
 372       if (stage_state->scratch_bo)
 373          drm_intel_bo_unreference(stage_state->scratch_bo);
 374
 375       stage_state->scratch_bo =
 376          drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
 377                             per_thread_size * thread_count, 4096);
 378    }
 379 }
 380
 381 void brwInitFragProgFuncs( struct dd_function_table *functions )
 382 {
 383    assert(functions->ProgramStringNotify == _tnl_program_string);
 384
 385    functions->NewProgram = brwNewProgram;
 386    functions->DeleteProgram = brwDeleteProgram;
 387    functions->ProgramStringNotify = brwProgramStringNotify;
 388
 389    functions->LinkShader = brw_link_shader;
 390
 391    functions->MemoryBarrier = brw_memory_barrier;
 392    functions->BlendBarrier = brw_blend_barrier;
 393 }
 394
 395 struct shader_times {
 396    uint64_t time;
 397    uint64_t written;
 398    uint64_t reset;
 399 };
 400
 401 void
 402 brw_init_shader_time(struct brw_context *brw)
 403 {
 404    const int max_entries = 2048;
 405    brw->shader_time.bo =
 406       drm_intel_bo_alloc(brw->bufmgr, "shader time",
 407                          max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 408    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 409    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 410    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 411                                           max_entries);
 412    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 413                                                max_entries);
 414    brw->shader_time.max_entries = max_entries;
 415 }
 416
 417 static int
 418 compare_time(const void *a, const void *b)
 419 {
 420    uint64_t * const *a_val = a;
 421    uint64_t * const *b_val = b;
 422
 423    /* We don't just subtract because we're turning the value to an int. */
 424    if (**a_val < **b_val)
 425       return -1;
 426    else if (**a_val == **b_val)
 427       return 0;
 428    else
 429       return 1;
 430 }
 431
 432 static void
 433 print_shader_time_line(const char *stage, const char *name,
 434                        int shader_num, uint64_t time, uint64_t total)
 435 {
 436    fprintf(stderr, "%-6s%-18s", stage, name);
 437
 438    if (shader_num != 0)
 439       fprintf(stderr, "%4d: ", shader_num);
 440    else
 441       fprintf(stderr, "    : ");
 442
 443    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 444            (long long)time,
 445            (double)time / 1000000000.0,
 446            (double)time / total * 100.0);
 447 }
 448
 449 static void
 450 brw_report_shader_time(struct brw_context *brw)
 451 {
 452    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 453       return;
 454
 455    uint64_t scaled[brw->shader_time.num_entries];
 456    uint64_t *sorted[brw->shader_time.num_entries];
 457    uint64_t total_by_type[ST_CS + 1];
 458    memset(total_by_type, 0, sizeof(total_by_type));
 459    double total = 0;
 460    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 461       uint64_t written = 0, reset = 0;
 462       enum shader_time_shader_type type = brw->shader_time.types[i];
 463
 464       sorted[i] = &scaled[i];
 465
 466       switch (type) {
 467       case ST_VS:
 468       case ST_TCS:
 469       case ST_TES:
 470       case ST_GS:
 471       case ST_FS8:
 472       case ST_FS16:
 473       case ST_CS:
 474          written = brw->shader_time.cumulative[i].written;
 475          reset = brw->shader_time.cumulative[i].reset;
 476          break;
 477
 478       default:
 479          /* I sometimes want to print things that aren't the 3 shader times.
 480           * Just print the sum in that case.
 481           */
 482          written = 1;
 483          reset = 0;
 484          break;
 485       }
 486
 487       uint64_t time = brw->shader_time.cumulative[i].time;
 488       if (written) {
 489          scaled[i] = time / written * (written + reset);
 490       } else {
 491          scaled[i] = time;
 492       }
 493
 494       switch (type) {
 495       case ST_VS:
 496       case ST_TCS:
 497       case ST_TES:
 498       case ST_GS:
 499       case ST_FS8:
 500       case ST_FS16:
 501       case ST_CS:
 502          total_by_type[type] += scaled[i];
 503          break;
 504       default:
 505          break;
 506       }
 507
 508       total += scaled[i];
 509    }
 510
 511    if (total == 0) {
 512       fprintf(stderr, "No shader time collected yet\n");
 513       return;
 514    }
 515
 516    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 517
 518    fprintf(stderr, "\n");
 519    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 520    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 521       const char *stage;
 522       /* Work back from the sorted pointers times to a time to print. */
 523       int i = sorted[s] - scaled;
 524
 525       if (scaled[i] == 0)
 526          continue;
 527
 528       int shader_num = brw->shader_time.ids[i];
 529       const char *shader_name = brw->shader_time.names[i];
 530
 531       switch (brw->shader_time.types[i]) {
 532       case ST_VS:
 533          stage = "vs";
 534          break;
 535       case ST_TCS:
 536          stage = "tcs";
 537          break;
 538       case ST_TES:
 539          stage = "tes";
 540          break;
 541       case ST_GS:
 542          stage = "gs";
 543          break;
 544       case ST_FS8:
 545          stage = "fs8";
 546          break;
 547       case ST_FS16:
 548          stage = "fs16";
 549          break;
 550       case ST_CS:
 551          stage = "cs";
 552          break;
 553       default:
 554          stage = "other";
 555          break;
 556       }
 557
 558       print_shader_time_line(stage, shader_name, shader_num,
 559                              scaled[i], total);
 560    }
 561
 562    fprintf(stderr, "\n");
 563    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 564    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 565    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 566    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 567    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 568    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 569    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 570 }
 571
 572 static void
 573 brw_collect_shader_time(struct brw_context *brw)
 574 {
 575    if (!brw->shader_time.bo)
 576       return;
 577
 578    /* This probably stalls on the last rendering.  We could fix that by
 579     * delaying reading the reports, but it doesn't look like it's a big
 580     * overhead compared to the cost of tracking the time in the first place.
 581     */
 582    drm_intel_bo_map(brw->shader_time.bo, true);
 583    void *bo_map = brw->shader_time.bo->virtual;
 584
 585    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 586       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 587
 588       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 589       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 590       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 591    }
 592
 593    /* Zero the BO out to clear it out for our next collection.
 594     */
 595    memset(bo_map, 0, brw->shader_time.bo->size);
 596    drm_intel_bo_unmap(brw->shader_time.bo);
 597 }
 598
 599 void
 600 brw_collect_and_report_shader_time(struct brw_context *brw)
 601 {
 602    brw_collect_shader_time(brw);
 603
 604    if (brw->shader_time.report_time == 0 ||
 605        get_time() - brw->shader_time.report_time >= 1.0) {
 606       brw_report_shader_time(brw);
 607       brw->shader_time.report_time = get_time();
 608    }
 609 }
 610
 611 /**
 612  * Chooses an index in the shader_time buffer and sets up tracking information
 613  * for our printouts.
 614  *
 615  * Note that this holds on to references to the underlying programs, which may
 616  * change their lifetimes compared to normal operation.
 617  */
 618 int
 619 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 620                           enum shader_time_shader_type type, bool is_glsl_sh)
 621 {
 622    int shader_time_index = brw->shader_time.num_entries++;
 623    assert(shader_time_index < brw->shader_time.max_entries);
 624    brw->shader_time.types[shader_time_index] = type;
 625
 626    const char *name;
 627    if (prog->Id == 0) {
 628       name = "ff";
 629    } else if (is_glsl_sh) {
 630       name = prog->info.label ?
 631          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 632    } else {
 633       name = "prog";
 634    }
 635
 636    brw->shader_time.names[shader_time_index] = name;
 637    brw->shader_time.ids[shader_time_index] = prog->Id;
 638
 639    return shader_time_index;
 640 }
 641
 642 void
 643 brw_destroy_shader_time(struct brw_context *brw)
 644 {
 645    drm_intel_bo_unreference(brw->shader_time.bo);
 646    brw->shader_time.bo = NULL;
 647 }
 648
 649 void
 650 brw_stage_prog_data_free(const void *p)
 651 {
 652    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 653
 654    ralloc_free(prog_data->param);
 655    ralloc_free(prog_data->pull_param);
 656    ralloc_free(prog_data->image_param);
 657 }
 658
 659 void
 660 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 661 {
 662    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 663            stage, prog->Id, stage);
 664    _mesa_print_program(prog);
 665 }
 666
 667 void
 668 brw_setup_tex_for_precompile(struct brw_context *brw,
 669                              struct brw_sampler_prog_key_data *tex,
 670                              struct gl_program *prog)
 671 {
 672    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
 673    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 674    for (unsigned i = 0; i < sampler_count; i++) {
 675       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 676          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 677          tex->swizzles[i] =
 678             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 679       } else {
 680          /* Color sampler: assume no swizzling. */
 681          tex->swizzles[i] = SWIZZLE_XYZW;
 682       }
 683    }
 684 }
 685
 686 /**
 687  * Sets up the starting offsets for the groups of binding table entries
 688  * common to all pipeline stages.
 689  *
 690  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 691  * unused but also make sure that addition of small offsets to them will
 692  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 693  */
 694 uint32_t
 695 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 696                                         const struct gl_program *prog,
 697                                         struct brw_stage_prog_data *stage_prog_data,
 698                                         uint32_t next_binding_table_offset)
 699 {
 700    int num_textures = util_last_bit(prog->SamplersUsed);
 701
 702    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 703    next_binding_table_offset += num_textures;
 704
 705    if (prog->info.num_ubos) {
 706       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 707       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 708       next_binding_table_offset += prog->info.num_ubos;
 709    } else {
 710       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 711    }
 712
 713    if (prog->info.num_ssbos) {
 714       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 715       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 716       next_binding_table_offset += prog->info.num_ssbos;
 717    } else {
 718       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 719    }
 720
 721    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 722       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 723       next_binding_table_offset++;
 724    } else {
 725       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 726    }
 727
 728    if (prog->nir->info->uses_texture_gather) {
 729       if (devinfo->gen >= 8) {
 730          stage_prog_data->binding_table.gather_texture_start =
 731             stage_prog_data->binding_table.texture_start;
 732       } else {
 733          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 734          next_binding_table_offset += num_textures;
 735       }
 736    } else {
 737       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 738    }
 739
 740    if (prog->info.num_abos) {
 741       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 742       next_binding_table_offset += prog->info.num_abos;
 743    } else {
 744       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 745    }
 746
 747    if (prog->info.num_images) {
 748       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 749       next_binding_table_offset += prog->info.num_images;
 750    } else {
 751       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 752    }
 753
 754    /* This may or may not be used depending on how the compile goes. */
 755    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 756    next_binding_table_offset++;
 757
 758    /* Plane 0 is just the regular texture section */
 759    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 760
 761    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 762    next_binding_table_offset += num_textures;
 763
 764    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 765    next_binding_table_offset += num_textures;
 766
 767    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 768
 769    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 770    return next_binding_table_offset;
 771 }