src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "main/enums.h"
  35 #include "main/shaderobj.h"
  36 #include "program/prog_parameter.h"
  37 #include "program/prog_print.h"
  38 #include "program/program.h"
  39 #include "program/programopt.h"
  40 #include "tnl/tnl.h"
  41 #include "util/ralloc.h"
  42 #include "glsl/ir.h"
  43
  44 #include "brw_context.h"
  45 #include "brw_shader.h"
  46 #include "brw_wm.h"
  47 #include "intel_batchbuffer.h"
  48
  49 static unsigned
  50 get_new_program_id(struct intel_screen *screen)
  51 {
  52    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  53    pthread_mutex_lock(&m);
  54    unsigned id = screen->program_id++;
  55    pthread_mutex_unlock(&m);
  56    return id;
  57 }
  58
  59 static struct gl_program *brwNewProgram( struct gl_context *ctx,
  60                                       GLenum target,
  61                                       GLuint id )
  62 {
  63    struct brw_context *brw = brw_context(ctx);
  64
  65    switch (target) {
  66    case GL_VERTEX_PROGRAM_ARB: {
  67       struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
  68       if (prog) {
  69          prog->id = get_new_program_id(brw->intelScreen);
  70
  71          return _mesa_init_vertex_program( ctx, &prog->program,
  72                                              target, id );
  73       }
  74       else
  75          return NULL;
  76    }
  77
  78    case GL_FRAGMENT_PROGRAM_ARB: {
  79       struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
  80       if (prog) {
  81          prog->id = get_new_program_id(brw->intelScreen);
  82
  83          return _mesa_init_fragment_program( ctx, &prog->program,
  84                                              target, id );
  85       }
  86       else
  87          return NULL;
  88    }
  89
  90    case MESA_GEOMETRY_PROGRAM: {
  91       struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
  92       if (prog) {
  93          prog->id = get_new_program_id(brw->intelScreen);
  94
  95          return _mesa_init_geometry_program(ctx, &prog->program, target, id);
  96       } else {
  97          return NULL;
  98       }
  99    }
 100
 101    case GL_COMPUTE_PROGRAM_NV: {
 102       struct brw_compute_program *prog = CALLOC_STRUCT(brw_compute_program);
 103       if (prog) {
 104          prog->id = get_new_program_id(brw->intelScreen);
 105
 106          return _mesa_init_compute_program(ctx, &prog->program, target, id);
 107       } else {
 108          return NULL;
 109       }
 110    }
 111
 112    default:
 113       unreachable("Unsupported target in brwNewProgram()");
 114    }
 115 }
 116
 117 static void brwDeleteProgram( struct gl_context *ctx,
 118                               struct gl_program *prog )
 119 {
 120    _mesa_delete_program( ctx, prog );
 121 }
 122
 123
 124 static GLboolean
 125 brwProgramStringNotify(struct gl_context *ctx,
 126                        GLenum target,
 127                        struct gl_program *prog)
 128 {
 129    struct brw_context *brw = brw_context(ctx);
 130
 131    switch (target) {
 132    case GL_FRAGMENT_PROGRAM_ARB: {
 133       struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
 134       struct brw_fragment_program *newFP = brw_fragment_program(fprog);
 135       const struct brw_fragment_program *curFP =
 136          brw_fragment_program_const(brw->fragment_program);
 137
 138       if (newFP == curFP)
 139          brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
 140       newFP->id = get_new_program_id(brw->intelScreen);
 141
 142       brw_add_texrect_params(prog);
 143
 144       brw_fs_precompile(ctx, NULL, prog);
 145       break;
 146    }
 147    case GL_VERTEX_PROGRAM_ARB: {
 148       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
 149       struct brw_vertex_program *newVP = brw_vertex_program(vprog);
 150       const struct brw_vertex_program *curVP =
 151          brw_vertex_program_const(brw->vertex_program);
 152
 153       if (newVP == curVP)
 154          brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
 155       if (newVP->program.IsPositionInvariant) {
 156          _mesa_insert_mvp_code(ctx, &newVP->program);
 157       }
 158       newVP->id = get_new_program_id(brw->intelScreen);
 159
 160       /* Also tell tnl about it:
 161        */
 162       _tnl_program_string(ctx, target, prog);
 163
 164       brw_add_texrect_params(prog);
 165
 166       brw_vs_precompile(ctx, NULL, prog);
 167       break;
 168    }
 169    default:
 170       /*
 171        * driver->ProgramStringNotify is only called for ARB programs, fixed
 172        * function vertex programs, and ir_to_mesa (which isn't used by the
 173        * i965 back-end).  Therefore, even after geometry shaders are added,
 174        * this function should only ever be called with a target of
 175        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 176        */
 177       unreachable("Unexpected target in brwProgramStringNotify");
 178    }
 179
 180    return true;
 181 }
 182
 183 static void
 184 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 185 {
 186    struct brw_context *brw = brw_context(ctx);
 187    unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
 188                     PIPE_CONTROL_NO_WRITE |
 189                     PIPE_CONTROL_CS_STALL);
 190    assert(brw->gen >= 7 && brw->gen <= 8);
 191
 192    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 193                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 194                    GL_COMMAND_BARRIER_BIT))
 195       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 196
 197    if (barriers & GL_UNIFORM_BARRIER_BIT)
 198       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 199                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 200
 201    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 202       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 203
 204    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 205       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 206
 207    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 208       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 209                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 210
 211    /* Typed surface messages are handled by the render cache on IVB, so we
 212     * need to flush it too.
 213     */
 214    if (brw->gen == 7 && !brw->is_haswell)
 215       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 216
 217    brw_emit_pipe_control_flush(brw, bits);
 218 }
 219
 220 void
 221 brw_add_texrect_params(struct gl_program *prog)
 222 {
 223    for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
 224       if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
 225          continue;
 226
 227       int tokens[STATE_LENGTH] = {
 228          STATE_INTERNAL,
 229          STATE_TEXRECT_SCALE,
 230          texunit,
 231          0,
 232          0
 233       };
 234
 235       _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
 236    }
 237 }
 238
 239 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
 240 int
 241 brw_get_scratch_size(int size)
 242 {
 243    int i;
 244
 245    for (i = 1024; i < size; i *= 2)
 246       ;
 247
 248    return i;
 249 }
 250
 251 void
 252 brw_get_scratch_bo(struct brw_context *brw,
 253                    drm_intel_bo **scratch_bo, int size)
 254 {
 255    drm_intel_bo *old_bo = *scratch_bo;
 256
 257    if (old_bo && old_bo->size < size) {
 258       drm_intel_bo_unreference(old_bo);
 259       old_bo = NULL;
 260    }
 261
 262    if (!old_bo) {
 263       *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 264    }
 265 }
 266
 267 void brwInitFragProgFuncs( struct dd_function_table *functions )
 268 {
 269    assert(functions->ProgramStringNotify == _tnl_program_string);
 270
 271    functions->NewProgram = brwNewProgram;
 272    functions->DeleteProgram = brwDeleteProgram;
 273    functions->ProgramStringNotify = brwProgramStringNotify;
 274
 275    functions->NewShader = brw_new_shader;
 276    functions->LinkShader = brw_link_shader;
 277
 278    functions->MemoryBarrier = brw_memory_barrier;
 279 }
 280
 281 void
 282 brw_init_shader_time(struct brw_context *brw)
 283 {
 284    const int max_entries = 4096;
 285    brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
 286                                             max_entries * SHADER_TIME_STRIDE,
 287                                             4096);
 288    brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
 289                                                     max_entries);
 290    brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
 291                                              max_entries);
 292    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 293                                           max_entries);
 294    brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
 295                                                max_entries);
 296    brw->shader_time.max_entries = max_entries;
 297 }
 298
 299 static int
 300 compare_time(const void *a, const void *b)
 301 {
 302    uint64_t * const *a_val = a;
 303    uint64_t * const *b_val = b;
 304
 305    /* We don't just subtract because we're turning the value to an int. */
 306    if (**a_val < **b_val)
 307       return -1;
 308    else if (**a_val == **b_val)
 309       return 0;
 310    else
 311       return 1;
 312 }
 313
 314 static void
 315 get_written_and_reset(struct brw_context *brw, int i,
 316                       uint64_t *written, uint64_t *reset)
 317 {
 318    enum shader_time_shader_type type = brw->shader_time.types[i];
 319    assert(type == ST_VS || type == ST_GS || type == ST_FS8 || type == ST_FS16);
 320
 321    /* Find where we recorded written and reset. */
 322    int wi, ri;
 323
 324    for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
 325       ;
 326
 327    for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
 328       ;
 329
 330    *written = brw->shader_time.cumulative[wi];
 331    *reset = brw->shader_time.cumulative[ri];
 332 }
 333
 334 static void
 335 print_shader_time_line(const char *stage, const char *name,
 336                        int shader_num, uint64_t time, uint64_t total)
 337 {
 338    fprintf(stderr, "%-6s%-18s", stage, name);
 339
 340    if (shader_num != -1)
 341       fprintf(stderr, "%4d: ", shader_num);
 342    else
 343       fprintf(stderr, "    : ");
 344
 345    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 346            (long long)time,
 347            (double)time / 1000000000.0,
 348            (double)time / total * 100.0);
 349 }
 350
 351 static void
 352 brw_report_shader_time(struct brw_context *brw)
 353 {
 354    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 355       return;
 356
 357    uint64_t scaled[brw->shader_time.num_entries];
 358    uint64_t *sorted[brw->shader_time.num_entries];
 359    uint64_t total_by_type[ST_FS16 + 1];
 360    memset(total_by_type, 0, sizeof(total_by_type));
 361    double total = 0;
 362    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 363       uint64_t written = 0, reset = 0;
 364       enum shader_time_shader_type type = brw->shader_time.types[i];
 365
 366       sorted[i] = &scaled[i];
 367
 368       switch (type) {
 369       case ST_VS_WRITTEN:
 370       case ST_VS_RESET:
 371       case ST_GS_WRITTEN:
 372       case ST_GS_RESET:
 373       case ST_FS8_WRITTEN:
 374       case ST_FS8_RESET:
 375       case ST_FS16_WRITTEN:
 376       case ST_FS16_RESET:
 377          /* We'll handle these when along with the time. */
 378          scaled[i] = 0;
 379          continue;
 380
 381       case ST_VS:
 382       case ST_GS:
 383       case ST_FS8:
 384       case ST_FS16:
 385          get_written_and_reset(brw, i, &written, &reset);
 386          break;
 387
 388       default:
 389          /* I sometimes want to print things that aren't the 3 shader times.
 390           * Just print the sum in that case.
 391           */
 392          written = 1;
 393          reset = 0;
 394          break;
 395       }
 396
 397       uint64_t time = brw->shader_time.cumulative[i];
 398       if (written) {
 399          scaled[i] = time / written * (written + reset);
 400       } else {
 401          scaled[i] = time;
 402       }
 403
 404       switch (type) {
 405       case ST_VS:
 406       case ST_GS:
 407       case ST_FS8:
 408       case ST_FS16:
 409          total_by_type[type] += scaled[i];
 410          break;
 411       default:
 412          break;
 413       }
 414
 415       total += scaled[i];
 416    }
 417
 418    if (total == 0) {
 419       fprintf(stderr, "No shader time collected yet\n");
 420       return;
 421    }
 422
 423    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 424
 425    fprintf(stderr, "\n");
 426    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 427    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 428       const char *shader_name;
 429       const char *stage;
 430       /* Work back from the sorted pointers times to a time to print. */
 431       int i = sorted[s] - scaled;
 432       struct gl_shader_program *prog = brw->shader_time.shader_programs[i];
 433
 434       if (scaled[i] == 0)
 435          continue;
 436
 437       int shader_num = -1;
 438       if (prog) {
 439          shader_num = prog->Name;
 440
 441          /* The fixed function fragment shader generates GLSL IR with a Name
 442           * of 0, and nothing else does.
 443           */
 444          if (prog->Label) {
 445             shader_name = prog->Label;
 446          } else if (shader_num == 0 &&
 447              (brw->shader_time.types[i] == ST_FS8 ||
 448               brw->shader_time.types[i] == ST_FS16)) {
 449             shader_name = "ff";
 450             shader_num = -1;
 451          } else {
 452             shader_name = "glsl";
 453          }
 454       } else if (brw->shader_time.programs[i]) {
 455          shader_num = brw->shader_time.programs[i]->Id;
 456          if (shader_num == 0) {
 457             shader_name = "ff";
 458             shader_num = -1;
 459          } else {
 460             shader_name = "prog";
 461          }
 462       } else {
 463          shader_name = "other";
 464       }
 465
 466       switch (brw->shader_time.types[i]) {
 467       case ST_VS:
 468          stage = "vs";
 469          break;
 470       case ST_GS:
 471          stage = "gs";
 472          break;
 473       case ST_FS8:
 474          stage = "fs8";
 475          break;
 476       case ST_FS16:
 477          stage = "fs16";
 478          break;
 479       default:
 480          stage = "other";
 481          break;
 482       }
 483
 484       print_shader_time_line(stage, shader_name, shader_num,
 485                              scaled[i], total);
 486    }
 487
 488    fprintf(stderr, "\n");
 489    print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
 490    print_shader_time_line("total", "gs", -1, total_by_type[ST_GS], total);
 491    print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
 492    print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
 493 }
 494
 495 static void
 496 brw_collect_shader_time(struct brw_context *brw)
 497 {
 498    if (!brw->shader_time.bo)
 499       return;
 500
 501    /* This probably stalls on the last rendering.  We could fix that by
 502     * delaying reading the reports, but it doesn't look like it's a big
 503     * overhead compared to the cost of tracking the time in the first place.
 504     */
 505    drm_intel_bo_map(brw->shader_time.bo, true);
 506
 507    uint32_t *times = brw->shader_time.bo->virtual;
 508
 509    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 510       brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
 511    }
 512
 513    /* Zero the BO out to clear it out for our next collection.
 514     */
 515    memset(times, 0, brw->shader_time.bo->size);
 516    drm_intel_bo_unmap(brw->shader_time.bo);
 517 }
 518
 519 void
 520 brw_collect_and_report_shader_time(struct brw_context *brw)
 521 {
 522    brw_collect_shader_time(brw);
 523
 524    if (brw->shader_time.report_time == 0 ||
 525        get_time() - brw->shader_time.report_time >= 1.0) {
 526       brw_report_shader_time(brw);
 527       brw->shader_time.report_time = get_time();
 528    }
 529 }
 530
 531 /**
 532  * Chooses an index in the shader_time buffer and sets up tracking information
 533  * for our printouts.
 534  *
 535  * Note that this holds on to references to the underlying programs, which may
 536  * change their lifetimes compared to normal operation.
 537  */
 538 int
 539 brw_get_shader_time_index(struct brw_context *brw,
 540                           struct gl_shader_program *shader_prog,
 541                           struct gl_program *prog,
 542                           enum shader_time_shader_type type)
 543 {
 544    struct gl_context *ctx = &brw->ctx;
 545
 546    int shader_time_index = brw->shader_time.num_entries++;
 547    assert(shader_time_index < brw->shader_time.max_entries);
 548    brw->shader_time.types[shader_time_index] = type;
 549
 550    _mesa_reference_shader_program(ctx,
 551                                   &brw->shader_time.shader_programs[shader_time_index],
 552                                   shader_prog);
 553
 554    _mesa_reference_program(ctx,
 555                            &brw->shader_time.programs[shader_time_index],
 556                            prog);
 557
 558    return shader_time_index;
 559 }
 560
 561 void
 562 brw_destroy_shader_time(struct brw_context *brw)
 563 {
 564    drm_intel_bo_unreference(brw->shader_time.bo);
 565    brw->shader_time.bo = NULL;
 566 }
 567
 568 void
 569 brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
 570                       unsigned surf_index)
 571 {
 572    assert(surf_index < BRW_MAX_SURFACES);
 573
 574    prog_data->binding_table.size_bytes =
 575       MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
 576 }
 577
 578 bool
 579 brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
 580                             const struct brw_stage_prog_data *b)
 581 {
 582    /* Compare all the struct up to the pointers. */
 583    if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
 584       return false;
 585
 586    if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
 587       return false;
 588
 589    if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
 590       return false;
 591
 592    return true;
 593 }
 594
 595 void
 596 brw_stage_prog_data_free(const void *p)
 597 {
 598    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 599
 600    ralloc_free(prog_data->param);
 601    ralloc_free(prog_data->pull_param);
 602 }
 603
 604 void
 605 brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
 606             struct gl_shader *shader, struct gl_program *prog)
 607 {
 608    if (shader_prog) {
 609       fprintf(stderr,
 610               "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
 611       _mesa_print_ir(stderr, shader->ir, NULL);
 612       fprintf(stderr, "\n\n");
 613    } else {
 614       fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 615               stage, prog->Id, stage);
 616       _mesa_print_program(prog);
 617    }
 618 }