src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "main/enums.h"
  35 #include "main/shaderobj.h"
  36 #include "program/prog_parameter.h"
  37 #include "program/prog_print.h"
  38 #include "program/program.h"
  39 #include "program/programopt.h"
  40 #include "tnl/tnl.h"
  41 #include "util/ralloc.h"
  42 #include "glsl/ir.h"
  43
  44 #include "brw_context.h"
  45 #include "brw_shader.h"
  46 #include "brw_nir.h"
  47 #include "brw_wm.h"
  48 #include "intel_batchbuffer.h"
  49
  50 static unsigned
  51 get_new_program_id(struct intel_screen *screen)
  52 {
  53    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  54    pthread_mutex_lock(&m);
  55    unsigned id = screen->program_id++;
  56    pthread_mutex_unlock(&m);
  57    return id;
  58 }
  59
  60 static struct gl_program *brwNewProgram( struct gl_context *ctx,
  61                                       GLenum target,
  62                                       GLuint id )
  63 {
  64    struct brw_context *brw = brw_context(ctx);
  65
  66    switch (target) {
  67    case GL_VERTEX_PROGRAM_ARB: {
  68       struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
  69       if (prog) {
  70          prog->id = get_new_program_id(brw->intelScreen);
  71
  72          return _mesa_init_vertex_program( ctx, &prog->program,
  73                                              target, id );
  74       }
  75       else
  76          return NULL;
  77    }
  78
  79    case GL_FRAGMENT_PROGRAM_ARB: {
  80       struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
  81       if (prog) {
  82          prog->id = get_new_program_id(brw->intelScreen);
  83
  84          return _mesa_init_fragment_program( ctx, &prog->program,
  85                                              target, id );
  86       }
  87       else
  88          return NULL;
  89    }
  90
  91    case GL_GEOMETRY_PROGRAM_NV: {
  92       struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
  93       if (prog) {
  94          prog->id = get_new_program_id(brw->intelScreen);
  95
  96          return _mesa_init_geometry_program(ctx, &prog->program, target, id);
  97       } else {
  98          return NULL;
  99       }
 100    }
 101
 102    case GL_COMPUTE_PROGRAM_NV: {
 103       struct brw_compute_program *prog = CALLOC_STRUCT(brw_compute_program);
 104       if (prog) {
 105          prog->id = get_new_program_id(brw->intelScreen);
 106
 107          return _mesa_init_compute_program(ctx, &prog->program, target, id);
 108       } else {
 109          return NULL;
 110       }
 111    }
 112
 113    default:
 114       unreachable("Unsupported target in brwNewProgram()");
 115    }
 116 }
 117
 118 static void brwDeleteProgram( struct gl_context *ctx,
 119                               struct gl_program *prog )
 120 {
 121    _mesa_delete_program( ctx, prog );
 122 }
 123
 124
 125 static GLboolean
 126 brwProgramStringNotify(struct gl_context *ctx,
 127                        GLenum target,
 128                        struct gl_program *prog)
 129 {
 130    struct brw_context *brw = brw_context(ctx);
 131
 132    switch (target) {
 133    case GL_FRAGMENT_PROGRAM_ARB: {
 134       struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
 135       struct brw_fragment_program *newFP = brw_fragment_program(fprog);
 136       const struct brw_fragment_program *curFP =
 137          brw_fragment_program_const(brw->fragment_program);
 138
 139       if (newFP == curFP)
 140          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 141       newFP->id = get_new_program_id(brw->intelScreen);
 142
 143       brw_add_texrect_params(prog);
 144
 145       if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
 146          prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 147       }
 148
 149       brw_fs_precompile(ctx, NULL, prog);
 150       break;
 151    }
 152    case GL_VERTEX_PROGRAM_ARB: {
 153       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
 154       struct brw_vertex_program *newVP = brw_vertex_program(vprog);
 155       const struct brw_vertex_program *curVP =
 156          brw_vertex_program_const(brw->vertex_program);
 157
 158       if (newVP == curVP)
 159          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 160       if (newVP->program.IsPositionInvariant) {
 161          _mesa_insert_mvp_code(ctx, &newVP->program);
 162       }
 163       newVP->id = get_new_program_id(brw->intelScreen);
 164
 165       /* Also tell tnl about it:
 166        */
 167       _tnl_program_string(ctx, target, prog);
 168
 169       brw_add_texrect_params(prog);
 170
 171       if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
 172          prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 173                                     brw->intelScreen->compiler->scalar_vs);
 174       }
 175
 176       brw_vs_precompile(ctx, NULL, prog);
 177       break;
 178    }
 179    default:
 180       /*
 181        * driver->ProgramStringNotify is only called for ARB programs, fixed
 182        * function vertex programs, and ir_to_mesa (which isn't used by the
 183        * i965 back-end).  Therefore, even after geometry shaders are added,
 184        * this function should only ever be called with a target of
 185        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 186        */
 187       unreachable("Unexpected target in brwProgramStringNotify");
 188    }
 189
 190    return true;
 191 }
 192
 193 static void
 194 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 195 {
 196    struct brw_context *brw = brw_context(ctx);
 197    unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
 198                     PIPE_CONTROL_NO_WRITE |
 199                     PIPE_CONTROL_CS_STALL);
 200    assert(brw->gen >= 7 && brw->gen <= 9);
 201
 202    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 203                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 204                    GL_COMMAND_BARRIER_BIT))
 205       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 206
 207    if (barriers & GL_UNIFORM_BARRIER_BIT)
 208       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 209                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 210
 211    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 212       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 213
 214    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 215       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 216
 217    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 218       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 219                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 220
 221    /* Typed surface messages are handled by the render cache on IVB, so we
 222     * need to flush it too.
 223     */
 224    if (brw->gen == 7 && !brw->is_haswell)
 225       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 226
 227    brw_emit_pipe_control_flush(brw, bits);
 228 }
 229
 230 void
 231 brw_add_texrect_params(struct gl_program *prog)
 232 {
 233    for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
 234       if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
 235          continue;
 236
 237       int tokens[STATE_LENGTH] = {
 238          STATE_INTERNAL,
 239          STATE_TEXRECT_SCALE,
 240          texunit,
 241          0,
 242          0
 243       };
 244
 245       _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
 246    }
 247 }
 248
 249 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
 250 int
 251 brw_get_scratch_size(int size)
 252 {
 253    int i;
 254
 255    for (i = 1024; i < size; i *= 2)
 256       ;
 257
 258    return i;
 259 }
 260
 261 void
 262 brw_get_scratch_bo(struct brw_context *brw,
 263                    drm_intel_bo **scratch_bo, int size)
 264 {
 265    drm_intel_bo *old_bo = *scratch_bo;
 266
 267    if (old_bo && old_bo->size < size) {
 268       drm_intel_bo_unreference(old_bo);
 269       old_bo = NULL;
 270    }
 271
 272    if (!old_bo) {
 273       *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 274    }
 275 }
 276
 277 void brwInitFragProgFuncs( struct dd_function_table *functions )
 278 {
 279    /* assert(functions->ProgramStringNotify == _tnl_program_string); */
 280
 281    functions->NewProgram = brwNewProgram;
 282    functions->DeleteProgram = brwDeleteProgram;
 283    functions->ProgramStringNotify = brwProgramStringNotify;
 284
 285    functions->NewShader = brw_new_shader;
 286    functions->LinkShader = brw_link_shader;
 287
 288    functions->MemoryBarrier = brw_memory_barrier;
 289 }
 290
 291 struct shader_times {
 292    uint64_t time;
 293    uint64_t written;
 294    uint64_t reset;
 295 };
 296
 297 void
 298 brw_init_shader_time(struct brw_context *brw)
 299 {
 300    const int max_entries = 2048;
 301    brw->shader_time.bo =
 302       drm_intel_bo_alloc(brw->bufmgr, "shader time",
 303                          max_entries * SHADER_TIME_STRIDE * 3, 4096);
 304    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 305    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 306    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 307                                           max_entries);
 308    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 309                                                max_entries);
 310    brw->shader_time.max_entries = max_entries;
 311 }
 312
 313 static int
 314 compare_time(const void *a, const void *b)
 315 {
 316    uint64_t * const *a_val = a;
 317    uint64_t * const *b_val = b;
 318
 319    /* We don't just subtract because we're turning the value to an int. */
 320    if (**a_val < **b_val)
 321       return -1;
 322    else if (**a_val == **b_val)
 323       return 0;
 324    else
 325       return 1;
 326 }
 327
 328 static void
 329 print_shader_time_line(const char *stage, const char *name,
 330                        int shader_num, uint64_t time, uint64_t total)
 331 {
 332    fprintf(stderr, "%-6s%-18s", stage, name);
 333
 334    if (shader_num != 0)
 335       fprintf(stderr, "%4d: ", shader_num);
 336    else
 337       fprintf(stderr, "    : ");
 338
 339    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 340            (long long)time,
 341            (double)time / 1000000000.0,
 342            (double)time / total * 100.0);
 343 }
 344
 345 static void
 346 brw_report_shader_time(struct brw_context *brw)
 347 {
 348    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 349       return;
 350
 351    uint64_t scaled[brw->shader_time.num_entries];
 352    uint64_t *sorted[brw->shader_time.num_entries];
 353    uint64_t total_by_type[ST_CS + 1];
 354    memset(total_by_type, 0, sizeof(total_by_type));
 355    double total = 0;
 356    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 357       uint64_t written = 0, reset = 0;
 358       enum shader_time_shader_type type = brw->shader_time.types[i];
 359
 360       sorted[i] = &scaled[i];
 361
 362       switch (type) {
 363       case ST_VS:
 364       case ST_GS:
 365       case ST_FS8:
 366       case ST_FS16:
 367       case ST_CS:
 368          written = brw->shader_time.cumulative[i].written;
 369          reset = brw->shader_time.cumulative[i].reset;
 370          break;
 371
 372       default:
 373          /* I sometimes want to print things that aren't the 3 shader times.
 374           * Just print the sum in that case.
 375           */
 376          written = 1;
 377          reset = 0;
 378          break;
 379       }
 380
 381       uint64_t time = brw->shader_time.cumulative[i].time;
 382       if (written) {
 383          scaled[i] = time / written * (written + reset);
 384       } else {
 385          scaled[i] = time;
 386       }
 387
 388       switch (type) {
 389       case ST_VS:
 390       case ST_GS:
 391       case ST_FS8:
 392       case ST_FS16:
 393       case ST_CS:
 394          total_by_type[type] += scaled[i];
 395          break;
 396       default:
 397          break;
 398       }
 399
 400       total += scaled[i];
 401    }
 402
 403    if (total == 0) {
 404       fprintf(stderr, "No shader time collected yet\n");
 405       return;
 406    }
 407
 408    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 409
 410    fprintf(stderr, "\n");
 411    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 412    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 413       const char *stage;
 414       /* Work back from the sorted pointers times to a time to print. */
 415       int i = sorted[s] - scaled;
 416
 417       if (scaled[i] == 0)
 418          continue;
 419
 420       int shader_num = brw->shader_time.ids[i];
 421       const char *shader_name = brw->shader_time.names[i];
 422
 423       switch (brw->shader_time.types[i]) {
 424       case ST_VS:
 425          stage = "vs";
 426          break;
 427       case ST_GS:
 428          stage = "gs";
 429          break;
 430       case ST_FS8:
 431          stage = "fs8";
 432          break;
 433       case ST_FS16:
 434          stage = "fs16";
 435          break;
 436       case ST_CS:
 437          stage = "cs";
 438          break;
 439       default:
 440          stage = "other";
 441          break;
 442       }
 443
 444       print_shader_time_line(stage, shader_name, shader_num,
 445                              scaled[i], total);
 446    }
 447
 448    fprintf(stderr, "\n");
 449    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 450    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 451    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 452    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 453    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 454 }
 455
 456 static void
 457 brw_collect_shader_time(struct brw_context *brw)
 458 {
 459    if (!brw->shader_time.bo)
 460       return;
 461
 462    /* This probably stalls on the last rendering.  We could fix that by
 463     * delaying reading the reports, but it doesn't look like it's a big
 464     * overhead compared to the cost of tracking the time in the first place.
 465     */
 466    drm_intel_bo_map(brw->shader_time.bo, true);
 467    void *bo_map = brw->shader_time.bo->virtual;
 468
 469    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 470       uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
 471
 472       brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
 473       brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
 474       brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
 475    }
 476
 477    /* Zero the BO out to clear it out for our next collection.
 478     */
 479    memset(bo_map, 0, brw->shader_time.bo->size);
 480    drm_intel_bo_unmap(brw->shader_time.bo);
 481 }
 482
 483 void
 484 brw_collect_and_report_shader_time(struct brw_context *brw)
 485 {
 486    brw_collect_shader_time(brw);
 487
 488    if (brw->shader_time.report_time == 0 ||
 489        get_time() - brw->shader_time.report_time >= 1.0) {
 490       brw_report_shader_time(brw);
 491       brw->shader_time.report_time = get_time();
 492    }
 493 }
 494
 495 /**
 496  * Chooses an index in the shader_time buffer and sets up tracking information
 497  * for our printouts.
 498  *
 499  * Note that this holds on to references to the underlying programs, which may
 500  * change their lifetimes compared to normal operation.
 501  */
 502 int
 503 brw_get_shader_time_index(struct brw_context *brw,
 504                           struct gl_shader_program *shader_prog,
 505                           struct gl_program *prog,
 506                           enum shader_time_shader_type type)
 507 {
 508    int shader_time_index = brw->shader_time.num_entries++;
 509    assert(shader_time_index < brw->shader_time.max_entries);
 510    brw->shader_time.types[shader_time_index] = type;
 511
 512    int id = shader_prog ? shader_prog->Name : prog->Id;
 513    const char *name;
 514    if (id == 0) {
 515       name = "ff";
 516    } else if (!shader_prog) {
 517       name = "prog";
 518    } else if (shader_prog->Label) {
 519       name = ralloc_strdup(brw->shader_time.names, shader_prog->Label);
 520    } else {
 521       name = "glsl";
 522    }
 523
 524    brw->shader_time.names[shader_time_index] = name;
 525    brw->shader_time.ids[shader_time_index] = id;
 526
 527    return shader_time_index;
 528 }
 529
 530 void
 531 brw_destroy_shader_time(struct brw_context *brw)
 532 {
 533    drm_intel_bo_unreference(brw->shader_time.bo);
 534    brw->shader_time.bo = NULL;
 535 }
 536
 537 void
 538 brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
 539                       unsigned surf_index)
 540 {
 541    assert(surf_index < BRW_MAX_SURFACES);
 542
 543    prog_data->binding_table.size_bytes =
 544       MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
 545 }
 546
 547 bool
 548 brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
 549                             const struct brw_stage_prog_data *b)
 550 {
 551    /* Compare all the struct up to the pointers. */
 552    if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
 553       return false;
 554
 555    if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
 556       return false;
 557
 558    if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
 559       return false;
 560
 561    return true;
 562 }
 563
 564 void
 565 brw_stage_prog_data_free(const void *p)
 566 {
 567    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 568
 569    ralloc_free(prog_data->param);
 570    ralloc_free(prog_data->pull_param);
 571 }
 572
 573 void
 574 brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
 575             struct gl_shader *shader, struct gl_program *prog)
 576 {
 577    if (shader_prog) {
 578       if (shader->ir) {
 579          fprintf(stderr,
 580                  "GLSL IR for native %s shader %d:\n",
 581                  stage, shader_prog->Name);
 582          _mesa_print_ir(stderr, shader->ir, NULL);
 583          fprintf(stderr, "\n\n");
 584       }
 585    } else {
 586       fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 587               stage, prog->Id, stage);
 588       _mesa_print_program(prog);
 589    }
 590 }