src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "compiler/brw_nir.h"
  47 #include "brw_defines.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static bool
  51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  52 {
  53    if (is_scalar) {
  54       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  55                                type_size_scalar_bytes);
  56       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  57    } else {
  58       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  59                                type_size_vec4_bytes);
  60       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  61    }
  62 }
  63
  64 nir_shader *
  65 brw_create_nir(struct brw_context *brw,
  66                const struct gl_shader_program *shader_prog,
  67                struct gl_program *prog,
  68                gl_shader_stage stage,
  69                bool is_scalar)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    const nir_shader_compiler_options *options =
  73       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  74    bool progress;
  75    nir_shader *nir;
  76
  77    /* First, lower the GLSL IR or Mesa IR to NIR */
  78    if (shader_prog) {
  79       nir = glsl_to_nir(shader_prog, stage, options);
  80       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  81       nir_lower_returns(nir);
  82       nir_validate_shader(nir);
  83       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  84                  nir_shader_get_entrypoint(nir), true, false);
  85    } else {
  86       nir = prog_to_nir(prog, options);
  87       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  88    }
  89    nir_validate_shader(nir);
  90
  91    (void)progress;
  92
  93    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  94
  95    if (stage == MESA_SHADER_FRAGMENT) {
  96       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  97          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  98          .fs_coord_pixel_center_integer = 1,
  99          .fs_coord_origin_upper_left = 1,
 100       };
 101       _mesa_add_state_reference(prog->Parameters,
 102                                 (gl_state_index *) wpos_options.state_tokens);
 103
 104       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 105    }
 106
 107    NIR_PASS(progress, nir, nir_lower_system_values);
 108    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 109
 110    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 111
 112    /* Copy the info we just generated back into the gl_program */
 113    const char *prog_name = prog->info.name;
 114    const char *prog_label = prog->info.label;
 115    prog->info = nir->info;
 116    prog->info.name = prog_name;
 117    prog->info.label = prog_label;
 118
 119    if (shader_prog) {
 120       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 121       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 122    }
 123
 124    return nir;
 125 }
 126
 127 static unsigned
 128 get_new_program_id(struct intel_screen *screen)
 129 {
 130    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
 131    pthread_mutex_lock(&m);
 132    unsigned id = screen->program_id++;
 133    pthread_mutex_unlock(&m);
 134    return id;
 135 }
 136
 137 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 138                                         GLuint id, bool is_arb_asm)
 139 {
 140    struct brw_context *brw = brw_context(ctx);
 141
 142    switch (target) {
 143    case GL_VERTEX_PROGRAM_ARB:
 144    case GL_TESS_CONTROL_PROGRAM_NV:
 145    case GL_TESS_EVALUATION_PROGRAM_NV:
 146    case GL_GEOMETRY_PROGRAM_NV:
 147    case GL_COMPUTE_PROGRAM_NV: {
 148       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 149       if (prog) {
 150          prog->id = get_new_program_id(brw->screen);
 151
 152          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 153       }
 154       else
 155          return NULL;
 156    }
 157
 158    case GL_FRAGMENT_PROGRAM_ARB: {
 159       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 160
 161       if (prog) {
 162          prog->id = get_new_program_id(brw->screen);
 163
 164          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 165       }
 166       else
 167          return NULL;
 168    }
 169
 170    default:
 171       unreachable("Unsupported target in brwNewProgram()");
 172    }
 173 }
 174
 175 static void brwDeleteProgram( struct gl_context *ctx,
 176                               struct gl_program *prog )
 177 {
 178    struct brw_context *brw = brw_context(ctx);
 179
 180    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 181     *
 182     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 183     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 184     * pointer has changed.
 185     *
 186     * We cannot leave brw->foo_program as a dangling pointer to the dead
 187     * program.  malloc() may allocate the same memory for a new gl_program,
 188     * causing us to see matching pointers...but totally different programs.
 189     *
 190     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 191     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 192     * would cause us to see matching pointers (NULL == NULL), and fail to
 193     * detect that a program has changed since our last draw.
 194     *
 195     * So, set it to a bogus gl_program pointer that will never match,
 196     * causing us to properly reevaluate the state on our next draw.
 197     *
 198     * Getting this wrong causes heisenbugs which are very hard to catch,
 199     * as you need a very specific allocation pattern to hit the problem.
 200     */
 201    static const struct gl_program deleted_program;
 202
 203    if (brw->vertex_program == prog)
 204       brw->vertex_program = &deleted_program;
 205
 206    if (brw->tess_ctrl_program == prog)
 207       brw->tess_ctrl_program = &deleted_program;
 208
 209    if (brw->tess_eval_program == prog)
 210       brw->tess_eval_program = &deleted_program;
 211
 212    if (brw->geometry_program == prog)
 213       brw->geometry_program = &deleted_program;
 214
 215    if (brw->fragment_program == prog)
 216       brw->fragment_program = &deleted_program;
 217
 218    if (brw->compute_program == prog)
 219       brw->compute_program = &deleted_program;
 220
 221    _mesa_delete_program( ctx, prog );
 222 }
 223
 224
 225 static GLboolean
 226 brwProgramStringNotify(struct gl_context *ctx,
 227                        GLenum target,
 228                        struct gl_program *prog)
 229 {
 230    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 231
 232    struct brw_context *brw = brw_context(ctx);
 233    const struct brw_compiler *compiler = brw->screen->compiler;
 234
 235    switch (target) {
 236    case GL_FRAGMENT_PROGRAM_ARB: {
 237       struct brw_program *newFP = brw_program(prog);
 238       const struct brw_program *curFP =
 239          brw_program_const(brw->fragment_program);
 240
 241       if (newFP == curFP)
 242          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 243       newFP->id = get_new_program_id(brw->screen);
 244
 245       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 246
 247       brw_fs_precompile(ctx, prog);
 248       break;
 249    }
 250    case GL_VERTEX_PROGRAM_ARB: {
 251       struct brw_program *newVP = brw_program(prog);
 252       const struct brw_program *curVP =
 253          brw_program_const(brw->vertex_program);
 254
 255       if (newVP == curVP)
 256          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 257       if (newVP->program.arb.IsPositionInvariant) {
 258          _mesa_insert_mvp_code(ctx, &newVP->program);
 259       }
 260       newVP->id = get_new_program_id(brw->screen);
 261
 262       /* Also tell tnl about it:
 263        */
 264       _tnl_program_string(ctx, target, prog);
 265
 266       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 267                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 268
 269       brw_vs_precompile(ctx, prog);
 270       break;
 271    }
 272    default:
 273       /*
 274        * driver->ProgramStringNotify is only called for ARB programs, fixed
 275        * function vertex programs, and ir_to_mesa (which isn't used by the
 276        * i965 back-end).  Therefore, even after geometry shaders are added,
 277        * this function should only ever be called with a target of
 278        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 279        */
 280       unreachable("Unexpected target in brwProgramStringNotify");
 281    }
 282
 283    return true;
 284 }
 285
 286 static void
 287 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 288 {
 289    struct brw_context *brw = brw_context(ctx);
 290    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 291                     PIPE_CONTROL_NO_WRITE |
 292                     PIPE_CONTROL_CS_STALL);
 293    assert(brw->gen >= 7 && brw->gen <= 9);
 294
 295    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 296                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 297                    GL_COMMAND_BARRIER_BIT))
 298       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 299
 300    if (barriers & GL_UNIFORM_BARRIER_BIT)
 301       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 302                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 303
 304    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 305       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 306
 307    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 308       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 309
 310    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 311       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 312                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 313
 314    /* Typed surface messages are handled by the render cache on IVB, so we
 315     * need to flush it too.
 316     */
 317    if (brw->gen == 7 && !brw->is_haswell)
 318       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 319
 320    brw_emit_pipe_control_flush(brw, bits);
 321 }
 322
 323 static void
 324 brw_blend_barrier(struct gl_context *ctx)
 325 {
 326    struct brw_context *brw = brw_context(ctx);
 327
 328    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 329       if (brw->gen >= 6) {
 330          brw_emit_pipe_control_flush(brw,
 331                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 332                                      PIPE_CONTROL_CS_STALL);
 333          brw_emit_pipe_control_flush(brw,
 334                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 335       } else {
 336          brw_emit_pipe_control_flush(brw,
 337                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 338       }
 339    }
 340 }
 341
 342 void
 343 brw_get_scratch_bo(struct brw_context *brw,
 344                    struct brw_bo **scratch_bo, int size)
 345 {
 346    struct brw_bo *old_bo = *scratch_bo;
 347
 348    if (old_bo && old_bo->size < size) {
 349       brw_bo_unreference(old_bo);
 350       old_bo = NULL;
 351    }
 352
 353    if (!old_bo) {
 354       *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 355    }
 356 }
 357
 358 /**
 359  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 360  * bytes times the given \p thread_count.
 361  */
 362 void
 363 brw_alloc_stage_scratch(struct brw_context *brw,
 364                         struct brw_stage_state *stage_state,
 365                         unsigned per_thread_size,
 366                         unsigned thread_count)
 367 {
 368    if (stage_state->per_thread_scratch < per_thread_size) {
 369       stage_state->per_thread_scratch = per_thread_size;
 370
 371       if (stage_state->scratch_bo)
 372          brw_bo_unreference(stage_state->scratch_bo);
 373
 374       stage_state->scratch_bo =
 375          brw_bo_alloc(brw->bufmgr, "shader scratch space",
 376                       per_thread_size * thread_count, 4096);
 377    }
 378 }
 379
 380 void brwInitFragProgFuncs( struct dd_function_table *functions )
 381 {
 382    assert(functions->ProgramStringNotify == _tnl_program_string);
 383
 384    functions->NewProgram = brwNewProgram;
 385    functions->DeleteProgram = brwDeleteProgram;
 386    functions->ProgramStringNotify = brwProgramStringNotify;
 387
 388    functions->LinkShader = brw_link_shader;
 389
 390    functions->MemoryBarrier = brw_memory_barrier;
 391    functions->BlendBarrier = brw_blend_barrier;
 392 }
 393
 394 struct shader_times {
 395    uint64_t time;
 396    uint64_t written;
 397    uint64_t reset;
 398 };
 399
 400 void
 401 brw_init_shader_time(struct brw_context *brw)
 402 {
 403    const int max_entries = 2048;
 404    brw->shader_time.bo =
 405       brw_bo_alloc(brw->bufmgr, "shader time",
 406                    max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 407    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 408    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 409    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 410                                           max_entries);
 411    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 412                                                max_entries);
 413    brw->shader_time.max_entries = max_entries;
 414 }
 415
 416 static int
 417 compare_time(const void *a, const void *b)
 418 {
 419    uint64_t * const *a_val = a;
 420    uint64_t * const *b_val = b;
 421
 422    /* We don't just subtract because we're turning the value to an int. */
 423    if (**a_val < **b_val)
 424       return -1;
 425    else if (**a_val == **b_val)
 426       return 0;
 427    else
 428       return 1;
 429 }
 430
 431 static void
 432 print_shader_time_line(const char *stage, const char *name,
 433                        int shader_num, uint64_t time, uint64_t total)
 434 {
 435    fprintf(stderr, "%-6s%-18s", stage, name);
 436
 437    if (shader_num != 0)
 438       fprintf(stderr, "%4d: ", shader_num);
 439    else
 440       fprintf(stderr, "    : ");
 441
 442    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 443            (long long)time,
 444            (double)time / 1000000000.0,
 445            (double)time / total * 100.0);
 446 }
 447
 448 static void
 449 brw_report_shader_time(struct brw_context *brw)
 450 {
 451    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 452       return;
 453
 454    uint64_t scaled[brw->shader_time.num_entries];
 455    uint64_t *sorted[brw->shader_time.num_entries];
 456    uint64_t total_by_type[ST_CS + 1];
 457    memset(total_by_type, 0, sizeof(total_by_type));
 458    double total = 0;
 459    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 460       uint64_t written = 0, reset = 0;
 461       enum shader_time_shader_type type = brw->shader_time.types[i];
 462
 463       sorted[i] = &scaled[i];
 464
 465       switch (type) {
 466       case ST_VS:
 467       case ST_TCS:
 468       case ST_TES:
 469       case ST_GS:
 470       case ST_FS8:
 471       case ST_FS16:
 472       case ST_CS:
 473          written = brw->shader_time.cumulative[i].written;
 474          reset = brw->shader_time.cumulative[i].reset;
 475          break;
 476
 477       default:
 478          /* I sometimes want to print things that aren't the 3 shader times.
 479           * Just print the sum in that case.
 480           */
 481          written = 1;
 482          reset = 0;
 483          break;
 484       }
 485
 486       uint64_t time = brw->shader_time.cumulative[i].time;
 487       if (written) {
 488          scaled[i] = time / written * (written + reset);
 489       } else {
 490          scaled[i] = time;
 491       }
 492
 493       switch (type) {
 494       case ST_VS:
 495       case ST_TCS:
 496       case ST_TES:
 497       case ST_GS:
 498       case ST_FS8:
 499       case ST_FS16:
 500       case ST_CS:
 501          total_by_type[type] += scaled[i];
 502          break;
 503       default:
 504          break;
 505       }
 506
 507       total += scaled[i];
 508    }
 509
 510    if (total == 0) {
 511       fprintf(stderr, "No shader time collected yet\n");
 512       return;
 513    }
 514
 515    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 516
 517    fprintf(stderr, "\n");
 518    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 519    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 520       const char *stage;
 521       /* Work back from the sorted pointers times to a time to print. */
 522       int i = sorted[s] - scaled;
 523
 524       if (scaled[i] == 0)
 525          continue;
 526
 527       int shader_num = brw->shader_time.ids[i];
 528       const char *shader_name = brw->shader_time.names[i];
 529
 530       switch (brw->shader_time.types[i]) {
 531       case ST_VS:
 532          stage = "vs";
 533          break;
 534       case ST_TCS:
 535          stage = "tcs";
 536          break;
 537       case ST_TES:
 538          stage = "tes";
 539          break;
 540       case ST_GS:
 541          stage = "gs";
 542          break;
 543       case ST_FS8:
 544          stage = "fs8";
 545          break;
 546       case ST_FS16:
 547          stage = "fs16";
 548          break;
 549       case ST_CS:
 550          stage = "cs";
 551          break;
 552       default:
 553          stage = "other";
 554          break;
 555       }
 556
 557       print_shader_time_line(stage, shader_name, shader_num,
 558                              scaled[i], total);
 559    }
 560
 561    fprintf(stderr, "\n");
 562    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 563    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 564    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 565    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 566    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 567    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 568    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 569 }
 570
 571 static void
 572 brw_collect_shader_time(struct brw_context *brw)
 573 {
 574    if (!brw->shader_time.bo)
 575       return;
 576
 577    /* This probably stalls on the last rendering.  We could fix that by
 578     * delaying reading the reports, but it doesn't look like it's a big
 579     * overhead compared to the cost of tracking the time in the first place.
 580     */
 581    brw_bo_map(brw, brw->shader_time.bo, true);
 582    void *bo_map = brw->shader_time.bo->virtual;
 583
 584    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 585       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 586
 587       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 588       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 589       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 590    }
 591
 592    /* Zero the BO out to clear it out for our next collection.
 593     */
 594    memset(bo_map, 0, brw->shader_time.bo->size);
 595    brw_bo_unmap(brw->shader_time.bo);
 596 }
 597
 598 void
 599 brw_collect_and_report_shader_time(struct brw_context *brw)
 600 {
 601    brw_collect_shader_time(brw);
 602
 603    if (brw->shader_time.report_time == 0 ||
 604        get_time() - brw->shader_time.report_time >= 1.0) {
 605       brw_report_shader_time(brw);
 606       brw->shader_time.report_time = get_time();
 607    }
 608 }
 609
 610 /**
 611  * Chooses an index in the shader_time buffer and sets up tracking information
 612  * for our printouts.
 613  *
 614  * Note that this holds on to references to the underlying programs, which may
 615  * change their lifetimes compared to normal operation.
 616  */
 617 int
 618 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 619                           enum shader_time_shader_type type, bool is_glsl_sh)
 620 {
 621    int shader_time_index = brw->shader_time.num_entries++;
 622    assert(shader_time_index < brw->shader_time.max_entries);
 623    brw->shader_time.types[shader_time_index] = type;
 624
 625    const char *name;
 626    if (prog->Id == 0) {
 627       name = "ff";
 628    } else if (is_glsl_sh) {
 629       name = prog->info.label ?
 630          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 631    } else {
 632       name = "prog";
 633    }
 634
 635    brw->shader_time.names[shader_time_index] = name;
 636    brw->shader_time.ids[shader_time_index] = prog->Id;
 637
 638    return shader_time_index;
 639 }
 640
 641 void
 642 brw_destroy_shader_time(struct brw_context *brw)
 643 {
 644    brw_bo_unreference(brw->shader_time.bo);
 645    brw->shader_time.bo = NULL;
 646 }
 647
 648 void
 649 brw_stage_prog_data_free(const void *p)
 650 {
 651    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 652
 653    ralloc_free(prog_data->param);
 654    ralloc_free(prog_data->pull_param);
 655    ralloc_free(prog_data->image_param);
 656 }
 657
 658 void
 659 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 660 {
 661    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 662            stage, prog->Id, stage);
 663    _mesa_print_program(prog);
 664 }
 665
 666 void
 667 brw_setup_tex_for_precompile(struct brw_context *brw,
 668                              struct brw_sampler_prog_key_data *tex,
 669                              struct gl_program *prog)
 670 {
 671    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
 672    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 673    for (unsigned i = 0; i < sampler_count; i++) {
 674       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 675          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 676          tex->swizzles[i] =
 677             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 678       } else {
 679          /* Color sampler: assume no swizzling. */
 680          tex->swizzles[i] = SWIZZLE_XYZW;
 681       }
 682    }
 683 }
 684
 685 /**
 686  * Sets up the starting offsets for the groups of binding table entries
 687  * common to all pipeline stages.
 688  *
 689  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 690  * unused but also make sure that addition of small offsets to them will
 691  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 692  */
 693 uint32_t
 694 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 695                                         const struct gl_program *prog,
 696                                         struct brw_stage_prog_data *stage_prog_data,
 697                                         uint32_t next_binding_table_offset)
 698 {
 699    int num_textures = util_last_bit(prog->SamplersUsed);
 700
 701    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 702    next_binding_table_offset += num_textures;
 703
 704    if (prog->info.num_ubos) {
 705       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 706       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 707       next_binding_table_offset += prog->info.num_ubos;
 708    } else {
 709       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 710    }
 711
 712    if (prog->info.num_ssbos) {
 713       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 714       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 715       next_binding_table_offset += prog->info.num_ssbos;
 716    } else {
 717       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 718    }
 719
 720    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 721       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 722       next_binding_table_offset++;
 723    } else {
 724       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 725    }
 726
 727    if (prog->nir->info.uses_texture_gather) {
 728       if (devinfo->gen >= 8) {
 729          stage_prog_data->binding_table.gather_texture_start =
 730             stage_prog_data->binding_table.texture_start;
 731       } else {
 732          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 733          next_binding_table_offset += num_textures;
 734       }
 735    } else {
 736       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 737    }
 738
 739    if (prog->info.num_abos) {
 740       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 741       next_binding_table_offset += prog->info.num_abos;
 742    } else {
 743       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 744    }
 745
 746    if (prog->info.num_images) {
 747       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 748       next_binding_table_offset += prog->info.num_images;
 749    } else {
 750       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 751    }
 752
 753    /* This may or may not be used depending on how the compile goes. */
 754    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 755    next_binding_table_offset++;
 756
 757    /* Plane 0 is just the regular texture section */
 758    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 759
 760    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 761    next_binding_table_offset += num_textures;
 762
 763    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 764    next_binding_table_offset += num_textures;
 765
 766    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 767
 768    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 769    return next_binding_table_offset;
 770 }