src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/prog_to_nir.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "util/ralloc.h"
  41 #include "compiler/glsl/ir.h"
  42 #include "compiler/glsl/glsl_to_nir.h"
  43
  44 #include "brw_program.h"
  45 #include "brw_context.h"
  46 #include "compiler/brw_nir.h"
  47 #include "brw_defines.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static bool
  51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  52 {
  53    if (is_scalar) {
  54       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  55                                type_size_scalar_bytes);
  56       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
  57    } else {
  58       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
  59                                type_size_vec4_bytes);
  60       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
  61    }
  62 }
  63
  64 nir_shader *
  65 brw_create_nir(struct brw_context *brw,
  66                const struct gl_shader_program *shader_prog,
  67                struct gl_program *prog,
  68                gl_shader_stage stage,
  69                bool is_scalar)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    const nir_shader_compiler_options *options =
  73       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  74    bool progress;
  75    nir_shader *nir;
  76
  77    /* First, lower the GLSL IR or Mesa IR to NIR */
  78    if (shader_prog) {
  79       nir = glsl_to_nir(shader_prog, stage, options);
  80       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
  81       nir_lower_returns(nir);
  82       nir_validate_shader(nir);
  83       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
  84                  nir_shader_get_entrypoint(nir), true, false);
  85    } else {
  86       nir = prog_to_nir(prog, options);
  87       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
  88    }
  89    nir_validate_shader(nir);
  90
  91    (void)progress;
  92
  93    nir = brw_preprocess_nir(brw->screen->compiler, nir);
  94
  95    if (stage == MESA_SHADER_FRAGMENT) {
  96       static const struct nir_lower_wpos_ytransform_options wpos_options = {
  97          .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
  98          .fs_coord_pixel_center_integer = 1,
  99          .fs_coord_origin_upper_left = 1,
 100       };
 101       _mesa_add_state_reference(prog->Parameters,
 102                                 (gl_state_index *) wpos_options.state_tokens);
 103
 104       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
 105    }
 106
 107    NIR_PASS(progress, nir, nir_lower_system_values);
 108    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 109
 110    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 111
 112    /* Copy the info we just generated back into the gl_program */
 113    const char *prog_name = prog->info.name;
 114    const char *prog_label = prog->info.label;
 115    prog->info = nir->info;
 116    prog->info.name = prog_name;
 117    prog->info.label = prog_label;
 118
 119    if (shader_prog) {
 120       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
 121       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
 122    }
 123
 124    return nir;
 125 }
 126
 127 static unsigned
 128 get_new_program_id(struct intel_screen *screen)
 129 {
 130    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
 131    pthread_mutex_lock(&m);
 132    unsigned id = screen->program_id++;
 133    pthread_mutex_unlock(&m);
 134    return id;
 135 }
 136
 137 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
 138                                         GLuint id, bool is_arb_asm)
 139 {
 140    struct brw_context *brw = brw_context(ctx);
 141
 142    switch (target) {
 143    case GL_VERTEX_PROGRAM_ARB:
 144    case GL_TESS_CONTROL_PROGRAM_NV:
 145    case GL_TESS_EVALUATION_PROGRAM_NV:
 146    case GL_GEOMETRY_PROGRAM_NV:
 147    case GL_COMPUTE_PROGRAM_NV: {
 148       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 149       if (prog) {
 150          prog->id = get_new_program_id(brw->screen);
 151
 152          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 153       }
 154       else
 155          return NULL;
 156    }
 157
 158    case GL_FRAGMENT_PROGRAM_ARB: {
 159       struct brw_program *prog = rzalloc(NULL, struct brw_program);
 160
 161       if (prog) {
 162          prog->id = get_new_program_id(brw->screen);
 163
 164          return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
 165       }
 166       else
 167          return NULL;
 168    }
 169
 170    default:
 171       unreachable("Unsupported target in brwNewProgram()");
 172    }
 173 }
 174
 175 static void brwDeleteProgram( struct gl_context *ctx,
 176                               struct gl_program *prog )
 177 {
 178    struct brw_context *brw = brw_context(ctx);
 179
 180    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
 181     *
 182     * In brw_upload_pipeline_state(), we compare brw->foo_program to
 183     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
 184     * pointer has changed.
 185     *
 186     * We cannot leave brw->foo_program as a dangling pointer to the dead
 187     * program.  malloc() may allocate the same memory for a new gl_program,
 188     * causing us to see matching pointers...but totally different programs.
 189     *
 190     * We cannot set brw->foo_program to NULL, either.  If we've deleted the
 191     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
 192     * would cause us to see matching pointers (NULL == NULL), and fail to
 193     * detect that a program has changed since our last draw.
 194     *
 195     * So, set it to a bogus gl_program pointer that will never match,
 196     * causing us to properly reevaluate the state on our next draw.
 197     *
 198     * Getting this wrong causes heisenbugs which are very hard to catch,
 199     * as you need a very specific allocation pattern to hit the problem.
 200     */
 201    static const struct gl_program deleted_program;
 202
 203    if (brw->vertex_program == prog)
 204       brw->vertex_program = &deleted_program;
 205
 206    if (brw->tess_ctrl_program == prog)
 207       brw->tess_ctrl_program = &deleted_program;
 208
 209    if (brw->tess_eval_program == prog)
 210       brw->tess_eval_program = &deleted_program;
 211
 212    if (brw->geometry_program == prog)
 213       brw->geometry_program = &deleted_program;
 214
 215    if (brw->fragment_program == prog)
 216       brw->fragment_program = &deleted_program;
 217
 218    if (brw->compute_program == prog)
 219       brw->compute_program = &deleted_program;
 220
 221    _mesa_delete_program( ctx, prog );
 222 }
 223
 224
 225 static GLboolean
 226 brwProgramStringNotify(struct gl_context *ctx,
 227                        GLenum target,
 228                        struct gl_program *prog)
 229 {
 230    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 231
 232    struct brw_context *brw = brw_context(ctx);
 233    const struct brw_compiler *compiler = brw->screen->compiler;
 234
 235    switch (target) {
 236    case GL_FRAGMENT_PROGRAM_ARB: {
 237       struct brw_program *newFP = brw_program(prog);
 238       const struct brw_program *curFP =
 239          brw_program_const(brw->fragment_program);
 240
 241       if (newFP == curFP)
 242          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 243       newFP->id = get_new_program_id(brw->screen);
 244
 245       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 246
 247       brw_fs_precompile(ctx, prog);
 248       break;
 249    }
 250    case GL_VERTEX_PROGRAM_ARB: {
 251       struct brw_program *newVP = brw_program(prog);
 252       const struct brw_program *curVP =
 253          brw_program_const(brw->vertex_program);
 254
 255       if (newVP == curVP)
 256          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 257       if (newVP->program.arb.IsPositionInvariant) {
 258          _mesa_insert_mvp_code(ctx, &newVP->program);
 259       }
 260       newVP->id = get_new_program_id(brw->screen);
 261
 262       /* Also tell tnl about it:
 263        */
 264       _tnl_program_string(ctx, target, prog);
 265
 266       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 267                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 268
 269       brw_vs_precompile(ctx, prog);
 270       break;
 271    }
 272    default:
 273       /*
 274        * driver->ProgramStringNotify is only called for ARB programs, fixed
 275        * function vertex programs, and ir_to_mesa (which isn't used by the
 276        * i965 back-end).  Therefore, even after geometry shaders are added,
 277        * this function should only ever be called with a target of
 278        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 279        */
 280       unreachable("Unexpected target in brwProgramStringNotify");
 281    }
 282
 283    return true;
 284 }
 285
 286 static void
 287 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 288 {
 289    struct brw_context *brw = brw_context(ctx);
 290    unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 291                     PIPE_CONTROL_NO_WRITE |
 292                     PIPE_CONTROL_CS_STALL);
 293    assert(brw->gen >= 7 && brw->gen <= 10);
 294
 295    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 296                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 297                    GL_COMMAND_BARRIER_BIT))
 298       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 299
 300    if (barriers & GL_UNIFORM_BARRIER_BIT)
 301       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 302                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 303
 304    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 305       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 306
 307    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 308       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 309
 310    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 311       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 312                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 313
 314    /* Typed surface messages are handled by the render cache on IVB, so we
 315     * need to flush it too.
 316     */
 317    if (brw->gen == 7 && !brw->is_haswell)
 318       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 319
 320    brw_emit_pipe_control_flush(brw, bits);
 321 }
 322
 323 static void
 324 brw_blend_barrier(struct gl_context *ctx)
 325 {
 326    struct brw_context *brw = brw_context(ctx);
 327
 328    if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
 329       if (brw->gen >= 6) {
 330          brw_emit_pipe_control_flush(brw,
 331                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
 332                                      PIPE_CONTROL_CS_STALL);
 333          brw_emit_pipe_control_flush(brw,
 334                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 335       } else {
 336          brw_emit_pipe_control_flush(brw,
 337                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
 338       }
 339    }
 340 }
 341
 342 void
 343 brw_get_scratch_bo(struct brw_context *brw,
 344                    struct brw_bo **scratch_bo, int size)
 345 {
 346    struct brw_bo *old_bo = *scratch_bo;
 347
 348    if (old_bo && old_bo->size < size) {
 349       brw_bo_unreference(old_bo);
 350       old_bo = NULL;
 351    }
 352
 353    if (!old_bo) {
 354       *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 355    }
 356 }
 357
 358 /**
 359  * Reserve enough scratch space for the given stage to hold \p per_thread_size
 360  * bytes times the given \p thread_count.
 361  */
 362 void
 363 brw_alloc_stage_scratch(struct brw_context *brw,
 364                         struct brw_stage_state *stage_state,
 365                         unsigned per_thread_size,
 366                         unsigned thread_count)
 367 {
 368    if (stage_state->per_thread_scratch < per_thread_size) {
 369       stage_state->per_thread_scratch = per_thread_size;
 370
 371       if (stage_state->scratch_bo)
 372          brw_bo_unreference(stage_state->scratch_bo);
 373
 374       stage_state->scratch_bo =
 375          brw_bo_alloc(brw->bufmgr, "shader scratch space",
 376                       per_thread_size * thread_count, 4096);
 377    }
 378 }
 379
 380 void brwInitFragProgFuncs( struct dd_function_table *functions )
 381 {
 382    assert(functions->ProgramStringNotify == _tnl_program_string);
 383
 384    functions->NewProgram = brwNewProgram;
 385    functions->DeleteProgram = brwDeleteProgram;
 386    functions->ProgramStringNotify = brwProgramStringNotify;
 387
 388    functions->LinkShader = brw_link_shader;
 389
 390    functions->MemoryBarrier = brw_memory_barrier;
 391    functions->BlendBarrier = brw_blend_barrier;
 392 }
 393
 394 struct shader_times {
 395    uint64_t time;
 396    uint64_t written;
 397    uint64_t reset;
 398 };
 399
 400 void
 401 brw_init_shader_time(struct brw_context *brw)
 402 {
 403    const int max_entries = 2048;
 404    brw->shader_time.bo =
 405       brw_bo_alloc(brw->bufmgr, "shader time",
 406                    max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
 407    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 408    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 409    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 410                                           max_entries);
 411    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 412                                                max_entries);
 413    brw->shader_time.max_entries = max_entries;
 414 }
 415
 416 static int
 417 compare_time(const void *a, const void *b)
 418 {
 419    uint64_t * const *a_val = a;
 420    uint64_t * const *b_val = b;
 421
 422    /* We don't just subtract because we're turning the value to an int. */
 423    if (**a_val < **b_val)
 424       return -1;
 425    else if (**a_val == **b_val)
 426       return 0;
 427    else
 428       return 1;
 429 }
 430
 431 static void
 432 print_shader_time_line(const char *stage, const char *name,
 433                        int shader_num, uint64_t time, uint64_t total)
 434 {
 435    fprintf(stderr, "%-6s%-18s", stage, name);
 436
 437    if (shader_num != 0)
 438       fprintf(stderr, "%4d: ", shader_num);
 439    else
 440       fprintf(stderr, "    : ");
 441
 442    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 443            (long long)time,
 444            (double)time / 1000000000.0,
 445            (double)time / total * 100.0);
 446 }
 447
 448 static void
 449 brw_report_shader_time(struct brw_context *brw)
 450 {
 451    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 452       return;
 453
 454    uint64_t scaled[brw->shader_time.num_entries];
 455    uint64_t *sorted[brw->shader_time.num_entries];
 456    uint64_t total_by_type[ST_CS + 1];
 457    memset(total_by_type, 0, sizeof(total_by_type));
 458    double total = 0;
 459    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 460       uint64_t written = 0, reset = 0;
 461       enum shader_time_shader_type type = brw->shader_time.types[i];
 462
 463       sorted[i] = &scaled[i];
 464
 465       switch (type) {
 466       case ST_VS:
 467       case ST_TCS:
 468       case ST_TES:
 469       case ST_GS:
 470       case ST_FS8:
 471       case ST_FS16:
 472       case ST_CS:
 473          written = brw->shader_time.cumulative[i].written;
 474          reset = brw->shader_time.cumulative[i].reset;
 475          break;
 476
 477       default:
 478          /* I sometimes want to print things that aren't the 3 shader times.
 479           * Just print the sum in that case.
 480           */
 481          written = 1;
 482          reset = 0;
 483          break;
 484       }
 485
 486       uint64_t time = brw->shader_time.cumulative[i].time;
 487       if (written) {
 488          scaled[i] = time / written * (written + reset);
 489       } else {
 490          scaled[i] = time;
 491       }
 492
 493       switch (type) {
 494       case ST_VS:
 495       case ST_TCS:
 496       case ST_TES:
 497       case ST_GS:
 498       case ST_FS8:
 499       case ST_FS16:
 500       case ST_CS:
 501          total_by_type[type] += scaled[i];
 502          break;
 503       default:
 504          break;
 505       }
 506
 507       total += scaled[i];
 508    }
 509
 510    if (total == 0) {
 511       fprintf(stderr, "No shader time collected yet\n");
 512       return;
 513    }
 514
 515    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 516
 517    fprintf(stderr, "\n");
 518    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 519    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 520       const char *stage;
 521       /* Work back from the sorted pointers times to a time to print. */
 522       int i = sorted[s] - scaled;
 523
 524       if (scaled[i] == 0)
 525          continue;
 526
 527       int shader_num = brw->shader_time.ids[i];
 528       const char *shader_name = brw->shader_time.names[i];
 529
 530       switch (brw->shader_time.types[i]) {
 531       case ST_VS:
 532          stage = "vs";
 533          break;
 534       case ST_TCS:
 535          stage = "tcs";
 536          break;
 537       case ST_TES:
 538          stage = "tes";
 539          break;
 540       case ST_GS:
 541          stage = "gs";
 542          break;
 543       case ST_FS8:
 544          stage = "fs8";
 545          break;
 546       case ST_FS16:
 547          stage = "fs16";
 548          break;
 549       case ST_CS:
 550          stage = "cs";
 551          break;
 552       default:
 553          stage = "other";
 554          break;
 555       }
 556
 557       print_shader_time_line(stage, shader_name, shader_num,
 558                              scaled[i], total);
 559    }
 560
 561    fprintf(stderr, "\n");
 562    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 563    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 564    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 565    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 566    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 567    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 568    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 569 }
 570
 571 static void
 572 brw_collect_shader_time(struct brw_context *brw)
 573 {
 574    if (!brw->shader_time.bo)
 575       return;
 576
 577    /* This probably stalls on the last rendering.  We could fix that by
 578     * delaying reading the reports, but it doesn't look like it's a big
 579     * overhead compared to the cost of tracking the time in the first place.
 580     */
 581    void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
 582
 583    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 584       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 585
 586       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
 587       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
 588       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
 589    }
 590
 591    /* Zero the BO out to clear it out for our next collection.
 592     */
 593    memset(bo_map, 0, brw->shader_time.bo->size);
 594    brw_bo_unmap(brw->shader_time.bo);
 595 }
 596
 597 void
 598 brw_collect_and_report_shader_time(struct brw_context *brw)
 599 {
 600    brw_collect_shader_time(brw);
 601
 602    if (brw->shader_time.report_time == 0 ||
 603        get_time() - brw->shader_time.report_time >= 1.0) {
 604       brw_report_shader_time(brw);
 605       brw->shader_time.report_time = get_time();
 606    }
 607 }
 608
 609 /**
 610  * Chooses an index in the shader_time buffer and sets up tracking information
 611  * for our printouts.
 612  *
 613  * Note that this holds on to references to the underlying programs, which may
 614  * change their lifetimes compared to normal operation.
 615  */
 616 int
 617 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
 618                           enum shader_time_shader_type type, bool is_glsl_sh)
 619 {
 620    int shader_time_index = brw->shader_time.num_entries++;
 621    assert(shader_time_index < brw->shader_time.max_entries);
 622    brw->shader_time.types[shader_time_index] = type;
 623
 624    const char *name;
 625    if (prog->Id == 0) {
 626       name = "ff";
 627    } else if (is_glsl_sh) {
 628       name = prog->info.label ?
 629          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
 630    } else {
 631       name = "prog";
 632    }
 633
 634    brw->shader_time.names[shader_time_index] = name;
 635    brw->shader_time.ids[shader_time_index] = prog->Id;
 636
 637    return shader_time_index;
 638 }
 639
 640 void
 641 brw_destroy_shader_time(struct brw_context *brw)
 642 {
 643    brw_bo_unreference(brw->shader_time.bo);
 644    brw->shader_time.bo = NULL;
 645 }
 646
 647 void
 648 brw_stage_prog_data_free(const void *p)
 649 {
 650    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 651
 652    ralloc_free(prog_data->param);
 653    ralloc_free(prog_data->pull_param);
 654    ralloc_free(prog_data->image_param);
 655 }
 656
 657 void
 658 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 659 {
 660    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 661            stage, prog->Id, stage);
 662    _mesa_print_program(prog);
 663 }
 664
 665 void
 666 brw_setup_tex_for_precompile(struct brw_context *brw,
 667                              struct brw_sampler_prog_key_data *tex,
 668                              struct gl_program *prog)
 669 {
 670    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
 671    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
 672    for (unsigned i = 0; i < sampler_count; i++) {
 673       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 674          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 675          tex->swizzles[i] =
 676             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 677       } else {
 678          /* Color sampler: assume no swizzling. */
 679          tex->swizzles[i] = SWIZZLE_XYZW;
 680       }
 681    }
 682 }
 683
 684 /**
 685  * Sets up the starting offsets for the groups of binding table entries
 686  * common to all pipeline stages.
 687  *
 688  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
 689  * unused but also make sure that addition of small offsets to them will
 690  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
 691  */
 692 uint32_t
 693 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 694                                         const struct gl_program *prog,
 695                                         struct brw_stage_prog_data *stage_prog_data,
 696                                         uint32_t next_binding_table_offset)
 697 {
 698    int num_textures = util_last_bit(prog->SamplersUsed);
 699
 700    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
 701    next_binding_table_offset += num_textures;
 702
 703    if (prog->info.num_ubos) {
 704       assert(prog->info.num_ubos <= BRW_MAX_UBO);
 705       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
 706       next_binding_table_offset += prog->info.num_ubos;
 707    } else {
 708       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
 709    }
 710
 711    if (prog->info.num_ssbos) {
 712       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
 713       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
 714       next_binding_table_offset += prog->info.num_ssbos;
 715    } else {
 716       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
 717    }
 718
 719    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 720       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
 721       next_binding_table_offset++;
 722    } else {
 723       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
 724    }
 725
 726    if (prog->nir->info.uses_texture_gather) {
 727       if (devinfo->gen >= 8) {
 728          stage_prog_data->binding_table.gather_texture_start =
 729             stage_prog_data->binding_table.texture_start;
 730       } else {
 731          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
 732          next_binding_table_offset += num_textures;
 733       }
 734    } else {
 735       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
 736    }
 737
 738    if (prog->info.num_abos) {
 739       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
 740       next_binding_table_offset += prog->info.num_abos;
 741    } else {
 742       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
 743    }
 744
 745    if (prog->info.num_images) {
 746       stage_prog_data->binding_table.image_start = next_binding_table_offset;
 747       next_binding_table_offset += prog->info.num_images;
 748    } else {
 749       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
 750    }
 751
 752    /* This may or may not be used depending on how the compile goes. */
 753    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
 754    next_binding_table_offset++;
 755
 756    /* Plane 0 is just the regular texture section */
 757    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
 758
 759    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
 760    next_binding_table_offset += num_textures;
 761
 762    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
 763    next_binding_table_offset += num_textures;
 764
 765    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 766
 767    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
 768    return next_binding_table_offset;
 769 }