src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "compiler/brw_nir.h"
  47 #include "brw_defines.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static bool
  51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  52 {
  53    if (is_scalar) {
  54       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  55                                type_size_scalar_bytes);
  56       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  57    } else {
  58       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  59                                type_size_vec4_bytes);
  60       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  61    }
  62 }
  63
  64 nir_shader *
  65 brw_create_nir(struct brw_context *brw,
  66                const struct gl_shader_program *shader_prog,
  67                struct gl_program *prog,
  68                gl_shader_stage stage,
  69                bool is_scalar)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    const nir_shader_compiler_options *options =
  73       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  74    bool progress;
  75    nir_shader *nir;
  76
  77    /* First, lower the GLSL IR or Mesa IR to NIR */
  78    if (shader_prog) {
  79       nir = glsl_to_nir(shader_prog, stage, options);
  80       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  81       nir_lower_returns(nir);
  82       nir_validate_shader(nir);
  83       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  84                  nir_shader_get_entrypoint(nir), true, false);
  85    } else {
  86       nir = prog_to_nir(prog, options);
  87       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  88    }
  89    nir_validate_shader(nir);
  90
  91    (void)progress;
  92
  93    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  94
  95    if (stage == MESA_SHADER_FRAGMENT) {
  96       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  97          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  98          .fs_coord_pixel_center_integer = 1,
  99          .fs_coord_origin_upper_left = 1,
 100       };
 101       _mesa_add_state_reference(prog->Parameters,
 102                                 (gl_state_index *) wpos_options.state_tokens);
 103
 104       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 105    }
 106
 107    NIR_PASS(progress, nir, nir_lower_system_values);
 108    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 109
 110    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 111
 112    /* Copy the info we just generated back into the gl_program */
 113    const char *prog_name = prog->info.name;
 114    const char *prog_label = prog->info.label;
 115    prog->info = nir->info;
 116    prog->info.name = prog_name;
 117    prog->info.label = prog_label;
 118
 119    if (shader_prog) {
 120       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 121       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 122    }
 123
 124    return nir;
 125 }
 126
 127 static unsigned
 128 get_new_program_id(struct intel_screen *screen)
 129 {
 130    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
 131    pthread_mutex_lock(&m);
 132    unsigned id = screen->program_id++;
 133    pthread_mutex_unlock(&m);
 134    return id;
 135 }
 136
 137 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 138                                         GLuint id, bool is_arb_asm)
 139 {
 140    struct brw_context *brw = brw_context(ctx);
 141    struct brw_program *prog = rzalloc(NULL, struct brw_program);
 142
 143    if (prog) {
 144       prog->id = get_new_program_id(brw->screen);
 145
 146       return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 147    }
 148
 149    return NULL;
 150 }
 151
 152 static void brwDeleteProgram( struct gl_context *ctx,
 153                               struct gl_program *prog )
 154 {
 155    struct brw_context *brw = brw_context(ctx);
 156
 157    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 158     *
 159     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 160     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 161     * pointer has changed.
 162     *
 163     * We cannot leave brw->foo_program as a dangling pointer to the dead
 164     * program.  malloc() may allocate the same memory for a new gl_program,
 165     * causing us to see matching pointers...but totally different programs.
 166     *
 167     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 168     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 169     * would cause us to see matching pointers (NULL == NULL), and fail to
 170     * detect that a program has changed since our last draw.
 171     *
 172     * So, set it to a bogus gl_program pointer that will never match,
 173     * causing us to properly reevaluate the state on our next draw.
 174     *
 175     * Getting this wrong causes heisenbugs which are very hard to catch,
 176     * as you need a very specific allocation pattern to hit the problem.
 177     */
 178    static const struct gl_program deleted_program;
 179
 180    if (brw->vertex_program == prog)
 181       brw->vertex_program = &deleted_program;
 182
 183    if (brw->tess_ctrl_program == prog)
 184       brw->tess_ctrl_program = &deleted_program;
 185
 186    if (brw->tess_eval_program == prog)
 187       brw->tess_eval_program = &deleted_program;
 188
 189    if (brw->geometry_program == prog)
 190       brw->geometry_program = &deleted_program;
 191
 192    if (brw->fragment_program == prog)
 193       brw->fragment_program = &deleted_program;
 194
 195    if (brw->compute_program == prog)
 196       brw->compute_program = &deleted_program;
 197
 198    _mesa_delete_program( ctx, prog );
 199 }
 200
 201
 202 static GLboolean
 203 brwProgramStringNotify(struct gl_context *ctx,
 204                        GLenum target,
 205                        struct gl_program *prog)
 206 {
 207    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 208
 209    struct brw_context *brw = brw_context(ctx);
 210    const struct brw_compiler *compiler = brw->screen->compiler;
 211
 212    switch (target) {
 213    case GL_FRAGMENT_PROGRAM_ARB: {
 214       struct brw_program *newFP = brw_program(prog);
 215       const struct brw_program *curFP =
 216          brw_program_const(brw->fragment_program);
 217
 218       if (newFP == curFP)
 219          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 220       newFP->id = get_new_program_id(brw->screen);
 221
 222       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 223
 224       brw_fs_precompile(ctx, prog);
 225       break;
 226    }
 227    case GL_VERTEX_PROGRAM_ARB: {
 228       struct brw_program *newVP = brw_program(prog);
 229       const struct brw_program *curVP =
 230          brw_program_const(brw->vertex_program);
 231
 232       if (newVP == curVP)
 233          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 234       if (newVP->program.arb.IsPositionInvariant) {
 235          _mesa_insert_mvp_code(ctx, &newVP->program);
 236       }
 237       newVP->id = get_new_program_id(brw->screen);
 238
 239       /* Also tell tnl about it:
 240        */
 241       _tnl_program_string(ctx, target, prog);
 242
 243       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 244                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 245
 246       brw_vs_precompile(ctx, prog);
 247       break;
 248    }
 249    default:
 250       /*
 251        * driver->ProgramStringNotify is only called for ARB programs, fixed
 252        * function vertex programs, and ir_to_mesa (which isn't used by the
 253        * i965 back-end).  Therefore, even after geometry shaders are added,
 254        * this function should only ever be called with a target of
 255        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 256        */
 257       unreachable("Unexpected target in brwProgramStringNotify");
 258    }
 259
 260    return true;
 261 }
 262
 263 static void
 264 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 265 {
 266    struct brw_context *brw = brw_context(ctx);
 267    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 268    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 269                     PIPE_CONTROL_NO_WRITE |
 270                     PIPE_CONTROL_CS_STALL);
 271    assert(devinfo->gen >= 7 && devinfo->gen <= 10);
 272
 273    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 274                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 275                    GL_COMMAND_BARRIER_BIT))
 276       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 277
 278    if (barriers & GL_UNIFORM_BARRIER_BIT)
 279       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 280                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 281
 282    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 283       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 284
 285    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 286       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 287
 288    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 289       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 290                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 291
 292    /* Typed surface messages are handled by the render cache on IVB, so we
 293     * need to flush it too.
 294     */
 295    if (devinfo->gen == 7 && !brw->is_haswell)
 296       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 297
 298    brw_emit_pipe_control_flush(brw, bits);
 299 }
 300
 301 static void
 302 brw_blend_barrier(struct gl_context *ctx)
 303 {
 304    struct brw_context *brw = brw_context(ctx);
 305    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 306
 307    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 308       if (devinfo->gen >= 6) {
 309          brw_emit_pipe_control_flush(brw,
 310                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 311                                      PIPE_CONTROL_CS_STALL);
 312          brw_emit_pipe_control_flush(brw,
 313                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 314       } else {
 315          brw_emit_pipe_control_flush(brw,
 316                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 317       }
 318    }
 319 }
 320
 321 void
 322 brw_get_scratch_bo(struct brw_context *brw,
 323                    struct brw_bo **scratch_bo, int size)
 324 {
 325    struct brw_bo *old_bo = *scratch_bo;
 326
 327    if (old_bo && old_bo->size < size) {
 328       brw_bo_unreference(old_bo);
 329       old_bo = NULL;
 330    }
 331
 332    if (!old_bo) {
 333       *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 334    }
 335 }
 336
 337 /**
 338  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 339  * bytes times the given \p thread_count.
 340  */
 341 void
 342 brw_alloc_stage_scratch(struct brw_context *brw,
 343                         struct brw_stage_state *stage_state,
 344                         unsigned per_thread_size,
 345                         unsigned thread_count)
 346 {
 347    if (stage_state->per_thread_scratch < per_thread_size) {
 348       stage_state->per_thread_scratch = per_thread_size;
 349
 350       if (stage_state->scratch_bo)
 351          brw_bo_unreference(stage_state->scratch_bo);
 352
 353       stage_state->scratch_bo =
 354          brw_bo_alloc(brw->bufmgr, "shader scratch space",
 355                       per_thread_size * thread_count, 4096);
 356    }
 357 }
 358
 359 void brwInitFragProgFuncs( struct dd_function_table *functions )
 360 {
 361    assert(functions->ProgramStringNotify == _tnl_program_string);
 362
 363    functions->NewProgram = brwNewProgram;
 364    functions->DeleteProgram = brwDeleteProgram;
 365    functions->ProgramStringNotify = brwProgramStringNotify;
 366
 367    functions->LinkShader = brw_link_shader;
 368
 369    functions->MemoryBarrier = brw_memory_barrier;
 370    functions->BlendBarrier = brw_blend_barrier;
 371 }
 372
 373 struct shader_times {
 374    uint64_t time;
 375    uint64_t written;
 376    uint64_t reset;
 377 };
 378
 379 void
 380 brw_init_shader_time(struct brw_context *brw)
 381 {
 382    const int max_entries = 2048;
 383    brw->shader_time.bo =
 384       brw_bo_alloc(brw->bufmgr, "shader time",
 385                    max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 386    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 387    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 388    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 389                                           max_entries);
 390    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 391                                                max_entries);
 392    brw->shader_time.max_entries = max_entries;
 393 }
 394
 395 static int
 396 compare_time(const void *a, const void *b)
 397 {
 398    uint64_t * const *a_val = a;
 399    uint64_t * const *b_val = b;
 400
 401    /* We don't just subtract because we're turning the value to an int. */
 402    if (**a_val < **b_val)
 403       return -1;
 404    else if (**a_val == **b_val)
 405       return 0;
 406    else
 407       return 1;
 408 }
 409
 410 static void
 411 print_shader_time_line(const char *stage, const char *name,
 412                        int shader_num, uint64_t time, uint64_t total)
 413 {
 414    fprintf(stderr, "%-6s%-18s", stage, name);
 415
 416    if (shader_num != 0)
 417       fprintf(stderr, "%4d: ", shader_num);
 418    else
 419       fprintf(stderr, "    : ");
 420
 421    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 422            (long long)time,
 423            (double)time / 1000000000.0,
 424            (double)time / total * 100.0);
 425 }
 426
 427 static void
 428 brw_report_shader_time(struct brw_context *brw)
 429 {
 430    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 431       return;
 432
 433    uint64_t scaled[brw->shader_time.num_entries];
 434    uint64_t *sorted[brw->shader_time.num_entries];
 435    uint64_t total_by_type[ST_CS + 1];
 436    memset(total_by_type, 0, sizeof(total_by_type));
 437    double total = 0;
 438    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 439       uint64_t written = 0, reset = 0;
 440       enum shader_time_shader_type type = brw->shader_time.types[i];
 441
 442       sorted[i] = &scaled[i];
 443
 444       switch (type) {
 445       case ST_VS:
 446       case ST_TCS:
 447       case ST_TES:
 448       case ST_GS:
 449       case ST_FS8:
 450       case ST_FS16:
 451       case ST_CS:
 452          written = brw->shader_time.cumulative[i].written;
 453          reset = brw->shader_time.cumulative[i].reset;
 454          break;
 455
 456       default:
 457          /* I sometimes want to print things that aren't the 3 shader times.
 458           * Just print the sum in that case.
 459           */
 460          written = 1;
 461          reset = 0;
 462          break;
 463       }
 464
 465       uint64_t time = brw->shader_time.cumulative[i].time;
 466       if (written) {
 467          scaled[i] = time / written * (written + reset);
 468       } else {
 469          scaled[i] = time;
 470       }
 471
 472       switch (type) {
 473       case ST_VS:
 474       case ST_TCS:
 475       case ST_TES:
 476       case ST_GS:
 477       case ST_FS8:
 478       case ST_FS16:
 479       case ST_CS:
 480          total_by_type[type] += scaled[i];
 481          break;
 482       default:
 483          break;
 484       }
 485
 486       total += scaled[i];
 487    }
 488
 489    if (total == 0) {
 490       fprintf(stderr, "No shader time collected yet\n");
 491       return;
 492    }
 493
 494    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 495
 496    fprintf(stderr, "\n");
 497    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 498    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 499       const char *stage;
 500       /* Work back from the sorted pointers times to a time to print. */
 501       int i = sorted[s] - scaled;
 502
 503       if (scaled[i] == 0)
 504          continue;
 505
 506       int shader_num = brw->shader_time.ids[i];
 507       const char *shader_name = brw->shader_time.names[i];
 508
 509       switch (brw->shader_time.types[i]) {
 510       case ST_VS:
 511          stage = "vs";
 512          break;
 513       case ST_TCS:
 514          stage = "tcs";
 515          break;
 516       case ST_TES:
 517          stage = "tes";
 518          break;
 519       case ST_GS:
 520          stage = "gs";
 521          break;
 522       case ST_FS8:
 523          stage = "fs8";
 524          break;
 525       case ST_FS16:
 526          stage = "fs16";
 527          break;
 528       case ST_CS:
 529          stage = "cs";
 530          break;
 531       default:
 532          stage = "other";
 533          break;
 534       }
 535
 536       print_shader_time_line(stage, shader_name, shader_num,
 537                              scaled[i], total);
 538    }
 539
 540    fprintf(stderr, "\n");
 541    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 542    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 543    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 544    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 545    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 546    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 547    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 548 }
 549
 550 static void
 551 brw_collect_shader_time(struct brw_context *brw)
 552 {
 553    if (!brw->shader_time.bo)
 554       return;
 555
 556    /* This probably stalls on the last rendering.  We could fix that by
 557     * delaying reading the reports, but it doesn't look like it's a big
 558     * overhead compared to the cost of tracking the time in the first place.
 559     */
 560    void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
 561
 562    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 563       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 564
 565       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 566       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 567       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 568    }
 569
 570    /* Zero the BO out to clear it out for our next collection.
 571     */
 572    memset(bo_map, 0, brw->shader_time.bo->size);
 573    brw_bo_unmap(brw->shader_time.bo);
 574 }
 575
 576 void
 577 brw_collect_and_report_shader_time(struct brw_context *brw)
 578 {
 579    brw_collect_shader_time(brw);
 580
 581    if (brw->shader_time.report_time == 0 ||
 582        get_time() - brw->shader_time.report_time >= 1.0) {
 583       brw_report_shader_time(brw);
 584       brw->shader_time.report_time = get_time();
 585    }
 586 }
 587
 588 /**
 589  * Chooses an index in the shader_time buffer and sets up tracking information
 590  * for our printouts.
 591  *
 592  * Note that this holds on to references to the underlying programs, which may
 593  * change their lifetimes compared to normal operation.
 594  */
 595 int
 596 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 597                           enum shader_time_shader_type type, bool is_glsl_sh)
 598 {
 599    int shader_time_index = brw->shader_time.num_entries++;
 600    assert(shader_time_index < brw->shader_time.max_entries);
 601    brw->shader_time.types[shader_time_index] = type;
 602
 603    const char *name;
 604    if (prog->Id == 0) {
 605       name = "ff";
 606    } else if (is_glsl_sh) {
 607       name = prog->info.label ?
 608          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 609    } else {
 610       name = "prog";
 611    }
 612
 613    brw->shader_time.names[shader_time_index] = name;
 614    brw->shader_time.ids[shader_time_index] = prog->Id;
 615
 616    return shader_time_index;
 617 }
 618
 619 void
 620 brw_destroy_shader_time(struct brw_context *brw)
 621 {
 622    brw_bo_unreference(brw->shader_time.bo);
 623    brw->shader_time.bo = NULL;
 624 }
 625
 626 void
 627 brw_stage_prog_data_free(const void *p)
 628 {
 629    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 630
 631    ralloc_free(prog_data->param);
 632    ralloc_free(prog_data->pull_param);
 633    ralloc_free(prog_data->image_param);
 634 }
 635
 636 void
 637 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 638 {
 639    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 640            stage, prog->Id, stage);
 641    _mesa_print_program(prog);
 642 }
 643
 644 void
 645 brw_setup_tex_for_precompile(struct brw_context *brw,
 646                              struct brw_sampler_prog_key_data *tex,
 647                              struct gl_program *prog)
 648 {
 649    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 650    const bool has_shader_channel_select = brw->is_haswell || devinfo->gen >= 8;
 651    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 652    for (unsigned i = 0; i < sampler_count; i++) {
 653       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 654          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 655          tex->swizzles[i] =
 656             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 657       } else {
 658          /* Color sampler: assume no swizzling. */
 659          tex->swizzles[i] = SWIZZLE_XYZW;
 660       }
 661    }
 662 }
 663
 664 /**
 665  * Sets up the starting offsets for the groups of binding table entries
 666  * common to all pipeline stages.
 667  *
 668  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 669  * unused but also make sure that addition of small offsets to them will
 670  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 671  */
 672 uint32_t
 673 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 674                                         const struct gl_program *prog,
 675                                         struct brw_stage_prog_data *stage_prog_data,
 676                                         uint32_t next_binding_table_offset)
 677 {
 678    int num_textures = util_last_bit(prog->SamplersUsed);
 679
 680    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 681    next_binding_table_offset += num_textures;
 682
 683    if (prog->info.num_ubos) {
 684       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 685       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 686       next_binding_table_offset += prog->info.num_ubos;
 687    } else {
 688       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 689    }
 690
 691    if (prog->info.num_ssbos) {
 692       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 693       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 694       next_binding_table_offset += prog->info.num_ssbos;
 695    } else {
 696       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 697    }
 698
 699    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 700       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 701       next_binding_table_offset++;
 702    } else {
 703       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 704    }
 705
 706    if (prog->nir->info.uses_texture_gather) {
 707       if (devinfo->gen >= 8) {
 708          stage_prog_data->binding_table.gather_texture_start =
 709             stage_prog_data->binding_table.texture_start;
 710       } else {
 711          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 712          next_binding_table_offset += num_textures;
 713       }
 714    } else {
 715       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 716    }
 717
 718    if (prog->info.num_abos) {
 719       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 720       next_binding_table_offset += prog->info.num_abos;
 721    } else {
 722       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 723    }
 724
 725    if (prog->info.num_images) {
 726       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 727       next_binding_table_offset += prog->info.num_images;
 728    } else {
 729       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 730    }
 731
 732    /* This may or may not be used depending on how the compile goes. */
 733    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 734    next_binding_table_offset++;
 735
 736    /* Plane 0 is just the regular texture section */
 737    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 738
 739    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 740    next_binding_table_offset += num_textures;
 741
 742    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 743    next_binding_table_offset += num_textures;
 744
 745    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 746
 747    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 748    return next_binding_table_offset;
 749 }