src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "compiler/brw_nir.h"
  47 #include "brw_defines.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static bool
  51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  52 {
  53    if (is_scalar) {
  54       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  55                                type_size_scalar_bytes);
  56       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  57    } else {
  58       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  59                                type_size_vec4_bytes);
  60       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  61    }
  62 }
  63
  64 nir_shader *
  65 brw_create_nir(struct brw_context *brw,
  66                const struct gl_shader_program *shader_prog,
  67                struct gl_program *prog,
  68                gl_shader_stage stage,
  69                bool is_scalar)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    const nir_shader_compiler_options *options =
  73       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  74    bool progress;
  75    nir_shader *nir;
  76
  77    /* First, lower the GLSL IR or Mesa IR to NIR */
  78    if (shader_prog) {
  79       nir = glsl_to_nir(shader_prog, stage, options);
  80       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  81       nir_lower_returns(nir);
  82       nir_validate_shader(nir);
  83       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  84                  nir_shader_get_entrypoint(nir), true, false);
  85    } else {
  86       nir = prog_to_nir(prog, options);
  87       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  88    }
  89    nir_validate_shader(nir);
  90
  91    (void)progress;
  92
  93    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  94
  95    if (stage == MESA_SHADER_FRAGMENT) {
  96       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  97          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  98          .fs_coord_pixel_center_integer = 1,
  99          .fs_coord_origin_upper_left = 1,
 100       };
 101       _mesa_add_state_reference(prog->Parameters,
 102                                 (gl_state_index *) wpos_options.state_tokens);
 103
 104       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 105    }
 106
 107    NIR_PASS(progress, nir, nir_lower_system_values);
 108    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 109
 110    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 111
 112    /* Copy the info we just generated back into the gl_program */
 113    const char *prog_name = prog->info.name;
 114    const char *prog_label = prog->info.label;
 115    prog->info = nir->info;
 116    prog->info.name = prog_name;
 117    prog->info.label = prog_label;
 118
 119    if (shader_prog) {
 120       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 121       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 122    }
 123
 124    return nir;
 125 }
 126
 127 static unsigned
 128 get_new_program_id(struct intel_screen *screen)
 129 {
 130    return p_atomic_inc_return(&screen->program_id);
 131 }
 132
 133 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 134                                         GLuint id, bool is_arb_asm)
 135 {
 136    struct brw_context *brw = brw_context(ctx);
 137    struct brw_program *prog = rzalloc(NULL, struct brw_program);
 138
 139    if (prog) {
 140       prog->id = get_new_program_id(brw->screen);
 141
 142       return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 143    }
 144
 145    return NULL;
 146 }
 147
 148 static void brwDeleteProgram( struct gl_context *ctx,
 149                               struct gl_program *prog )
 150 {
 151    struct brw_context *brw = brw_context(ctx);
 152
 153    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 154     *
 155     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 156     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 157     * pointer has changed.
 158     *
 159     * We cannot leave brw->foo_program as a dangling pointer to the dead
 160     * program.  malloc() may allocate the same memory for a new gl_program,
 161     * causing us to see matching pointers...but totally different programs.
 162     *
 163     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 164     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 165     * would cause us to see matching pointers (NULL == NULL), and fail to
 166     * detect that a program has changed since our last draw.
 167     *
 168     * So, set it to a bogus gl_program pointer that will never match,
 169     * causing us to properly reevaluate the state on our next draw.
 170     *
 171     * Getting this wrong causes heisenbugs which are very hard to catch,
 172     * as you need a very specific allocation pattern to hit the problem.
 173     */
 174    static const struct gl_program deleted_program;
 175
 176    if (brw->vertex_program == prog)
 177       brw->vertex_program = &deleted_program;
 178
 179    if (brw->tess_ctrl_program == prog)
 180       brw->tess_ctrl_program = &deleted_program;
 181
 182    if (brw->tess_eval_program == prog)
 183       brw->tess_eval_program = &deleted_program;
 184
 185    if (brw->geometry_program == prog)
 186       brw->geometry_program = &deleted_program;
 187
 188    if (brw->fragment_program == prog)
 189       brw->fragment_program = &deleted_program;
 190
 191    if (brw->compute_program == prog)
 192       brw->compute_program = &deleted_program;
 193
 194    _mesa_delete_program( ctx, prog );
 195 }
 196
 197
 198 static GLboolean
 199 brwProgramStringNotify(struct gl_context *ctx,
 200                        GLenum target,
 201                        struct gl_program *prog)
 202 {
 203    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 204
 205    struct brw_context *brw = brw_context(ctx);
 206    const struct brw_compiler *compiler = brw->screen->compiler;
 207
 208    switch (target) {
 209    case GL_FRAGMENT_PROGRAM_ARB: {
 210       struct brw_program *newFP = brw_program(prog);
 211       const struct brw_program *curFP =
 212          brw_program_const(brw->fragment_program);
 213
 214       if (newFP == curFP)
 215          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 216       newFP->id = get_new_program_id(brw->screen);
 217
 218       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 219
 220       brw_fs_precompile(ctx, prog);
 221       break;
 222    }
 223    case GL_VERTEX_PROGRAM_ARB: {
 224       struct brw_program *newVP = brw_program(prog);
 225       const struct brw_program *curVP =
 226          brw_program_const(brw->vertex_program);
 227
 228       if (newVP == curVP)
 229          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 230       if (newVP->program.arb.IsPositionInvariant) {
 231          _mesa_insert_mvp_code(ctx, &newVP->program);
 232       }
 233       newVP->id = get_new_program_id(brw->screen);
 234
 235       /* Also tell tnl about it:
 236        */
 237       _tnl_program_string(ctx, target, prog);
 238
 239       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 240                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 241
 242       brw_vs_precompile(ctx, prog);
 243       break;
 244    }
 245    default:
 246       /*
 247        * driver->ProgramStringNotify is only called for ARB programs, fixed
 248        * function vertex programs, and ir_to_mesa (which isn't used by the
 249        * i965 back-end).  Therefore, even after geometry shaders are added,
 250        * this function should only ever be called with a target of
 251        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 252        */
 253       unreachable("Unexpected target in brwProgramStringNotify");
 254    }
 255
 256    return true;
 257 }
 258
 259 static void
 260 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 261 {
 262    struct brw_context *brw = brw_context(ctx);
 263    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 264    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 265                     PIPE_CONTROL_NO_WRITE |
 266                     PIPE_CONTROL_CS_STALL);
 267    assert(devinfo->gen >= 7 && devinfo->gen <= 10);
 268
 269    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 270                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 271                    GL_COMMAND_BARRIER_BIT))
 272       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 273
 274    if (barriers & GL_UNIFORM_BARRIER_BIT)
 275       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 276                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 277
 278    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 279       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 280
 281    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 282       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 283
 284    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 285       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 286                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 287
 288    /* Typed surface messages are handled by the render cache on IVB, so we
 289     * need to flush it too.
 290     */
 291    if (devinfo->gen == 7 && !devinfo->is_haswell)
 292       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 293
 294    brw_emit_pipe_control_flush(brw, bits);
 295 }
 296
 297 static void
 298 brw_blend_barrier(struct gl_context *ctx)
 299 {
 300    struct brw_context *brw = brw_context(ctx);
 301    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 302
 303    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 304       if (devinfo->gen >= 6) {
 305          brw_emit_pipe_control_flush(brw,
 306                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 307                                      PIPE_CONTROL_CS_STALL);
 308          brw_emit_pipe_control_flush(brw,
 309                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 310       } else {
 311          brw_emit_pipe_control_flush(brw,
 312                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 313       }
 314    }
 315 }
 316
 317 void
 318 brw_get_scratch_bo(struct brw_context *brw,
 319                    struct brw_bo **scratch_bo, int size)
 320 {
 321    struct brw_bo *old_bo = *scratch_bo;
 322
 323    if (old_bo && old_bo->size < size) {
 324       brw_bo_unreference(old_bo);
 325       old_bo = NULL;
 326    }
 327
 328    if (!old_bo) {
 329       *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 330    }
 331 }
 332
 333 /**
 334  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 335  * bytes times the given \p thread_count.
 336  */
 337 void
 338 brw_alloc_stage_scratch(struct brw_context *brw,
 339                         struct brw_stage_state *stage_state,
 340                         unsigned per_thread_size,
 341                         unsigned thread_count)
 342 {
 343    if (stage_state->per_thread_scratch < per_thread_size) {
 344       stage_state->per_thread_scratch = per_thread_size;
 345
 346       if (stage_state->scratch_bo)
 347          brw_bo_unreference(stage_state->scratch_bo);
 348
 349       stage_state->scratch_bo =
 350          brw_bo_alloc(brw->bufmgr, "shader scratch space",
 351                       per_thread_size * thread_count, 4096);
 352    }
 353 }
 354
 355 void brwInitFragProgFuncs( struct dd_function_table *functions )
 356 {
 357    assert(functions->ProgramStringNotify == _tnl_program_string);
 358
 359    functions->NewProgram = brwNewProgram;
 360    functions->DeleteProgram = brwDeleteProgram;
 361    functions->ProgramStringNotify = brwProgramStringNotify;
 362
 363    functions->LinkShader = brw_link_shader;
 364
 365    functions->MemoryBarrier = brw_memory_barrier;
 366    functions->BlendBarrier = brw_blend_barrier;
 367 }
 368
 369 struct shader_times {
 370    uint64_t time;
 371    uint64_t written;
 372    uint64_t reset;
 373 };
 374
 375 void
 376 brw_init_shader_time(struct brw_context *brw)
 377 {
 378    const int max_entries = 2048;
 379    brw->shader_time.bo =
 380       brw_bo_alloc(brw->bufmgr, "shader time",
 381                    max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 382    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 383    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 384    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 385                                           max_entries);
 386    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 387                                                max_entries);
 388    brw->shader_time.max_entries = max_entries;
 389 }
 390
 391 static int
 392 compare_time(const void *a, const void *b)
 393 {
 394    uint64_t * const *a_val = a;
 395    uint64_t * const *b_val = b;
 396
 397    /* We don't just subtract because we're turning the value to an int. */
 398    if (**a_val < **b_val)
 399       return -1;
 400    else if (**a_val == **b_val)
 401       return 0;
 402    else
 403       return 1;
 404 }
 405
 406 static void
 407 print_shader_time_line(const char *stage, const char *name,
 408                        int shader_num, uint64_t time, uint64_t total)
 409 {
 410    fprintf(stderr, "%-6s%-18s", stage, name);
 411
 412    if (shader_num != 0)
 413       fprintf(stderr, "%4d: ", shader_num);
 414    else
 415       fprintf(stderr, "    : ");
 416
 417    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 418            (long long)time,
 419            (double)time / 1000000000.0,
 420            (double)time / total * 100.0);
 421 }
 422
 423 static void
 424 brw_report_shader_time(struct brw_context *brw)
 425 {
 426    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 427       return;
 428
 429    uint64_t scaled[brw->shader_time.num_entries];
 430    uint64_t *sorted[brw->shader_time.num_entries];
 431    uint64_t total_by_type[ST_CS + 1];
 432    memset(total_by_type, 0, sizeof(total_by_type));
 433    double total = 0;
 434    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 435       uint64_t written = 0, reset = 0;
 436       enum shader_time_shader_type type = brw->shader_time.types[i];
 437
 438       sorted[i] = &scaled[i];
 439
 440       switch (type) {
 441       case ST_VS:
 442       case ST_TCS:
 443       case ST_TES:
 444       case ST_GS:
 445       case ST_FS8:
 446       case ST_FS16:
 447       case ST_CS:
 448          written = brw->shader_time.cumulative[i].written;
 449          reset = brw->shader_time.cumulative[i].reset;
 450          break;
 451
 452       default:
 453          /* I sometimes want to print things that aren't the 3 shader times.
 454           * Just print the sum in that case.
 455           */
 456          written = 1;
 457          reset = 0;
 458          break;
 459       }
 460
 461       uint64_t time = brw->shader_time.cumulative[i].time;
 462       if (written) {
 463          scaled[i] = time / written * (written + reset);
 464       } else {
 465          scaled[i] = time;
 466       }
 467
 468       switch (type) {
 469       case ST_VS:
 470       case ST_TCS:
 471       case ST_TES:
 472       case ST_GS:
 473       case ST_FS8:
 474       case ST_FS16:
 475       case ST_CS:
 476          total_by_type[type] += scaled[i];
 477          break;
 478       default:
 479          break;
 480       }
 481
 482       total += scaled[i];
 483    }
 484
 485    if (total == 0) {
 486       fprintf(stderr, "No shader time collected yet\n");
 487       return;
 488    }
 489
 490    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 491
 492    fprintf(stderr, "\n");
 493    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 494    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 495       const char *stage;
 496       /* Work back from the sorted pointers times to a time to print. */
 497       int i = sorted[s] - scaled;
 498
 499       if (scaled[i] == 0)
 500          continue;
 501
 502       int shader_num = brw->shader_time.ids[i];
 503       const char *shader_name = brw->shader_time.names[i];
 504
 505       switch (brw->shader_time.types[i]) {
 506       case ST_VS:
 507          stage = "vs";
 508          break;
 509       case ST_TCS:
 510          stage = "tcs";
 511          break;
 512       case ST_TES:
 513          stage = "tes";
 514          break;
 515       case ST_GS:
 516          stage = "gs";
 517          break;
 518       case ST_FS8:
 519          stage = "fs8";
 520          break;
 521       case ST_FS16:
 522          stage = "fs16";
 523          break;
 524       case ST_CS:
 525          stage = "cs";
 526          break;
 527       default:
 528          stage = "other";
 529          break;
 530       }
 531
 532       print_shader_time_line(stage, shader_name, shader_num,
 533                              scaled[i], total);
 534    }
 535
 536    fprintf(stderr, "\n");
 537    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 538    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 539    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 540    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 541    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 542    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 543    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 544 }
 545
 546 static void
 547 brw_collect_shader_time(struct brw_context *brw)
 548 {
 549    if (!brw->shader_time.bo)
 550       return;
 551
 552    /* This probably stalls on the last rendering.  We could fix that by
 553     * delaying reading the reports, but it doesn't look like it's a big
 554     * overhead compared to the cost of tracking the time in the first place.
 555     */
 556    void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
 557
 558    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 559       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 560
 561       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 562       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 563       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 564    }
 565
 566    /* Zero the BO out to clear it out for our next collection.
 567     */
 568    memset(bo_map, 0, brw->shader_time.bo->size);
 569    brw_bo_unmap(brw->shader_time.bo);
 570 }
 571
 572 void
 573 brw_collect_and_report_shader_time(struct brw_context *brw)
 574 {
 575    brw_collect_shader_time(brw);
 576
 577    if (brw->shader_time.report_time == 0 ||
 578        get_time() - brw->shader_time.report_time >= 1.0) {
 579       brw_report_shader_time(brw);
 580       brw->shader_time.report_time = get_time();
 581    }
 582 }
 583
 584 /**
 585  * Chooses an index in the shader_time buffer and sets up tracking information
 586  * for our printouts.
 587  *
 588  * Note that this holds on to references to the underlying programs, which may
 589  * change their lifetimes compared to normal operation.
 590  */
 591 int
 592 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 593                           enum shader_time_shader_type type, bool is_glsl_sh)
 594 {
 595    int shader_time_index = brw->shader_time.num_entries++;
 596    assert(shader_time_index < brw->shader_time.max_entries);
 597    brw->shader_time.types[shader_time_index] = type;
 598
 599    const char *name;
 600    if (prog->Id == 0) {
 601       name = "ff";
 602    } else if (is_glsl_sh) {
 603       name = prog->info.label ?
 604          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 605    } else {
 606       name = "prog";
 607    }
 608
 609    brw->shader_time.names[shader_time_index] = name;
 610    brw->shader_time.ids[shader_time_index] = prog->Id;
 611
 612    return shader_time_index;
 613 }
 614
 615 void
 616 brw_destroy_shader_time(struct brw_context *brw)
 617 {
 618    brw_bo_unreference(brw->shader_time.bo);
 619    brw->shader_time.bo = NULL;
 620 }
 621
 622 void
 623 brw_stage_prog_data_free(const void *p)
 624 {
 625    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 626
 627    ralloc_free(prog_data->param);
 628    ralloc_free(prog_data->pull_param);
 629    ralloc_free(prog_data->image_param);
 630 }
 631
 632 void
 633 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 634 {
 635    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 636            stage, prog->Id, stage);
 637    _mesa_print_program(prog);
 638 }
 639
 640 void
 641 brw_setup_tex_for_precompile(struct brw_context *brw,
 642                              struct brw_sampler_prog_key_data *tex,
 643                              struct gl_program *prog)
 644 {
 645    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 646    const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
 647    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 648    for (unsigned i = 0; i < sampler_count; i++) {
 649       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 650          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 651          tex->swizzles[i] =
 652             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 653       } else {
 654          /* Color sampler: assume no swizzling. */
 655          tex->swizzles[i] = SWIZZLE_XYZW;
 656       }
 657    }
 658 }
 659
 660 /**
 661  * Sets up the starting offsets for the groups of binding table entries
 662  * common to all pipeline stages.
 663  *
 664  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 665  * unused but also make sure that addition of small offsets to them will
 666  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 667  */
 668 uint32_t
 669 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 670                                         const struct gl_program *prog,
 671                                         struct brw_stage_prog_data *stage_prog_data,
 672                                         uint32_t next_binding_table_offset)
 673 {
 674    int num_textures = util_last_bit(prog->SamplersUsed);
 675
 676    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 677    next_binding_table_offset += num_textures;
 678
 679    if (prog->info.num_ubos) {
 680       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 681       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 682       next_binding_table_offset += prog->info.num_ubos;
 683    } else {
 684       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 685    }
 686
 687    if (prog->info.num_ssbos) {
 688       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 689       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 690       next_binding_table_offset += prog->info.num_ssbos;
 691    } else {
 692       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 693    }
 694
 695    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 696       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 697       next_binding_table_offset++;
 698    } else {
 699       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 700    }
 701
 702    if (prog->nir->info.uses_texture_gather) {
 703       if (devinfo->gen >= 8) {
 704          stage_prog_data->binding_table.gather_texture_start =
 705             stage_prog_data->binding_table.texture_start;
 706       } else {
 707          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 708          next_binding_table_offset += num_textures;
 709       }
 710    } else {
 711       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 712    }
 713
 714    if (prog->info.num_abos) {
 715       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 716       next_binding_table_offset += prog->info.num_abos;
 717    } else {
 718       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 719    }
 720
 721    if (prog->info.num_images) {
 722       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 723       next_binding_table_offset += prog->info.num_images;
 724    } else {
 725       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 726    }
 727
 728    /* This may or may not be used depending on how the compile goes. */
 729    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 730    next_binding_table_offset++;
 731
 732    /* Plane 0 is just the regular texture section */
 733    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 734
 735    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 736    next_binding_table_offset += num_textures;
 737
 738    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 739    next_binding_table_offset += num_textures;
 740
 741    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 742
 743    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 744    return next_binding_table_offset;
 745 }