src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_print.h"
  36 #include "program/program.h"
  37 #include "program/programopt.h"
  38 #include "tnl/tnl.h"
  39 #include "util/ralloc.h"
  40 #include "compiler/glsl/ir.h"
  41
  42 #include "brw_program.h"
  43 #include "brw_context.h"
  44 #include "brw_shader.h"
  45 #include "brw_nir.h"
  46 #include "intel_batchbuffer.h"
  47
  48 static unsigned
  49 get_new_program_id(struct intel_screen *screen)
  50 {
  51    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  52    pthread_mutex_lock(&m);
  53    unsigned id = screen->program_id++;
  54    pthread_mutex_unlock(&m);
  55    return id;
  56 }
  57
  58 static struct gl_program *brwNewProgram( struct gl_context *ctx,
  59                                       GLenum target,
  60                                       GLuint id )
  61 {
  62    struct brw_context *brw = brw_context(ctx);
  63
  64    switch (target) {
  65    case GL_VERTEX_PROGRAM_ARB: {
  66       struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
  67       if (prog) {
  68          prog->id = get_new_program_id(brw->intelScreen);
  69
  70          return _mesa_init_gl_program(&prog->program.Base, target, id);
  71       }
  72       else
  73          return NULL;
  74    }
  75
  76    case GL_FRAGMENT_PROGRAM_ARB: {
  77       struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
  78       if (prog) {
  79          prog->id = get_new_program_id(brw->intelScreen);
  80
  81          return _mesa_init_gl_program(&prog->program.Base, target, id);
  82       }
  83       else
  84          return NULL;
  85    }
  86
  87    case GL_GEOMETRY_PROGRAM_NV: {
  88       struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
  89       if (prog) {
  90          prog->id = get_new_program_id(brw->intelScreen);
  91
  92          return _mesa_init_gl_program(&prog->program.Base, target, id);
  93       } else {
  94          return NULL;
  95       }
  96    }
  97
  98    case GL_TESS_CONTROL_PROGRAM_NV: {
  99       struct brw_tess_ctrl_program *prog = CALLOC_STRUCT(brw_tess_ctrl_program);
 100       if (prog) {
 101          prog->id = get_new_program_id(brw->intelScreen);
 102
 103          return _mesa_init_gl_program(&prog->program.Base, target, id);
 104       } else {
 105          return NULL;
 106       }
 107    }
 108
 109    case GL_TESS_EVALUATION_PROGRAM_NV: {
 110       struct brw_tess_eval_program *prog = CALLOC_STRUCT(brw_tess_eval_program);
 111       if (prog) {
 112          prog->id = get_new_program_id(brw->intelScreen);
 113
 114          return _mesa_init_gl_program(&prog->program.Base, target, id);
 115       } else {
 116          return NULL;
 117       }
 118    }
 119
 120    case GL_COMPUTE_PROGRAM_NV: {
 121       struct brw_compute_program *prog = CALLOC_STRUCT(brw_compute_program);
 122       if (prog) {
 123          prog->id = get_new_program_id(brw->intelScreen);
 124
 125          return _mesa_init_gl_program(&prog->program.Base, target, id);
 126       } else {
 127          return NULL;
 128       }
 129    }
 130
 131    default:
 132       unreachable("Unsupported target in brwNewProgram()");
 133    }
 134 }
 135
 136 static void brwDeleteProgram( struct gl_context *ctx,
 137                               struct gl_program *prog )
 138 {
 139    _mesa_delete_program( ctx, prog );
 140 }
 141
 142
 143 static GLboolean
 144 brwProgramStringNotify(struct gl_context *ctx,
 145                        GLenum target,
 146                        struct gl_program *prog)
 147 {
 148    struct brw_context *brw = brw_context(ctx);
 149    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 150
 151    switch (target) {
 152    case GL_FRAGMENT_PROGRAM_ARB: {
 153       struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
 154       struct brw_fragment_program *newFP = brw_fragment_program(fprog);
 155       const struct brw_fragment_program *curFP =
 156          brw_fragment_program_const(brw->fragment_program);
 157
 158       if (newFP == curFP)
 159          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 160       newFP->id = get_new_program_id(brw->intelScreen);
 161
 162       brw_add_texrect_params(prog);
 163
 164       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 165
 166       brw_fs_precompile(ctx, NULL, prog);
 167       break;
 168    }
 169    case GL_VERTEX_PROGRAM_ARB: {
 170       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
 171       struct brw_vertex_program *newVP = brw_vertex_program(vprog);
 172       const struct brw_vertex_program *curVP =
 173          brw_vertex_program_const(brw->vertex_program);
 174
 175       if (newVP == curVP)
 176          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
 177       if (newVP->program.IsPositionInvariant) {
 178          _mesa_insert_mvp_code(ctx, &newVP->program);
 179       }
 180       newVP->id = get_new_program_id(brw->intelScreen);
 181
 182       /* Also tell tnl about it:
 183        */
 184       _tnl_program_string(ctx, target, prog);
 185
 186       brw_add_texrect_params(prog);
 187
 188       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
 189                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 190
 191       brw_vs_precompile(ctx, NULL, prog);
 192       break;
 193    }
 194    default:
 195       /*
 196        * driver->ProgramStringNotify is only called for ARB programs, fixed
 197        * function vertex programs, and ir_to_mesa (which isn't used by the
 198        * i965 back-end).  Therefore, even after geometry shaders are added,
 199        * this function should only ever be called with a target of
 200        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
 201        */
 202       unreachable("Unexpected target in brwProgramStringNotify");
 203    }
 204
 205    return true;
 206 }
 207
 208 static void
 209 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 210 {
 211    struct brw_context *brw = brw_context(ctx);
 212    unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
 213                     PIPE_CONTROL_NO_WRITE |
 214                     PIPE_CONTROL_CS_STALL);
 215    assert(brw->gen >= 7 && brw->gen <= 9);
 216
 217    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 218                    GL_ELEMENT_ARRAY_BARRIER_BIT |
 219                    GL_COMMAND_BARRIER_BIT))
 220       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 221
 222    if (barriers & GL_UNIFORM_BARRIER_BIT)
 223       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 224                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
 225
 226    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
 227       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 228
 229    if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
 230       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 231
 232    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
 233       bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 234                PIPE_CONTROL_RENDER_TARGET_FLUSH);
 235
 236    /* Typed surface messages are handled by the render cache on IVB, so we
 237     * need to flush it too.
 238     */
 239    if (brw->gen == 7 && !brw->is_haswell)
 240       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 241
 242    brw_emit_pipe_control_flush(brw, bits);
 243 }
 244
 245 void
 246 brw_add_texrect_params(struct gl_program *prog)
 247 {
 248    for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
 249       if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
 250          continue;
 251
 252       int tokens[STATE_LENGTH] = {
 253          STATE_INTERNAL,
 254          STATE_TEXRECT_SCALE,
 255          texunit,
 256          0,
 257          0
 258       };
 259
 260       _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
 261    }
 262 }
 263
 264 void
 265 brw_get_scratch_bo(struct brw_context *brw,
 266                    drm_intel_bo **scratch_bo, int size)
 267 {
 268    drm_intel_bo *old_bo = *scratch_bo;
 269
 270    if (old_bo && old_bo->size < size) {
 271       drm_intel_bo_unreference(old_bo);
 272       old_bo = NULL;
 273    }
 274
 275    if (!old_bo) {
 276       *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
 277    }
 278 }
 279
 280 void brwInitFragProgFuncs( struct dd_function_table *functions )
 281 {
 282    /* assert(functions->ProgramStringNotify == _tnl_program_string); */
 283
 284    functions->NewProgram = brwNewProgram;
 285    functions->DeleteProgram = brwDeleteProgram;
 286    functions->ProgramStringNotify = brwProgramStringNotify;
 287
 288    functions->NewShader = brw_new_shader;
 289    functions->LinkShader = brw_link_shader;
 290
 291    functions->MemoryBarrier = brw_memory_barrier;
 292 }
 293
 294 struct shader_times {
 295    uint64_t time;
 296    uint64_t written;
 297    uint64_t reset;
 298 };
 299
 300 void
 301 brw_init_shader_time(struct brw_context *brw)
 302 {
 303    const int max_entries = 2048;
 304    brw->shader_time.bo =
 305       drm_intel_bo_alloc(brw->bufmgr, "shader time",
 306                          max_entries * SHADER_TIME_STRIDE * 3, 4096);
 307    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
 308    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
 309    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 310                                           max_entries);
 311    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
 312                                                max_entries);
 313    brw->shader_time.max_entries = max_entries;
 314 }
 315
 316 static int
 317 compare_time(const void *a, const void *b)
 318 {
 319    uint64_t * const *a_val = a;
 320    uint64_t * const *b_val = b;
 321
 322    /* We don't just subtract because we're turning the value to an int. */
 323    if (**a_val < **b_val)
 324       return -1;
 325    else if (**a_val == **b_val)
 326       return 0;
 327    else
 328       return 1;
 329 }
 330
 331 static void
 332 print_shader_time_line(const char *stage, const char *name,
 333                        int shader_num, uint64_t time, uint64_t total)
 334 {
 335    fprintf(stderr, "%-6s%-18s", stage, name);
 336
 337    if (shader_num != 0)
 338       fprintf(stderr, "%4d: ", shader_num);
 339    else
 340       fprintf(stderr, "    : ");
 341
 342    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
 343            (long long)time,
 344            (double)time / 1000000000.0,
 345            (double)time / total * 100.0);
 346 }
 347
 348 static void
 349 brw_report_shader_time(struct brw_context *brw)
 350 {
 351    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 352       return;
 353
 354    uint64_t scaled[brw->shader_time.num_entries];
 355    uint64_t *sorted[brw->shader_time.num_entries];
 356    uint64_t total_by_type[ST_CS + 1];
 357    memset(total_by_type, 0, sizeof(total_by_type));
 358    double total = 0;
 359    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 360       uint64_t written = 0, reset = 0;
 361       enum shader_time_shader_type type = brw->shader_time.types[i];
 362
 363       sorted[i] = &scaled[i];
 364
 365       switch (type) {
 366       case ST_VS:
 367       case ST_TCS:
 368       case ST_TES:
 369       case ST_GS:
 370       case ST_FS8:
 371       case ST_FS16:
 372       case ST_CS:
 373          written = brw->shader_time.cumulative[i].written;
 374          reset = brw->shader_time.cumulative[i].reset;
 375          break;
 376
 377       default:
 378          /* I sometimes want to print things that aren't the 3 shader times.
 379           * Just print the sum in that case.
 380           */
 381          written = 1;
 382          reset = 0;
 383          break;
 384       }
 385
 386       uint64_t time = brw->shader_time.cumulative[i].time;
 387       if (written) {
 388          scaled[i] = time / written * (written + reset);
 389       } else {
 390          scaled[i] = time;
 391       }
 392
 393       switch (type) {
 394       case ST_VS:
 395       case ST_TCS:
 396       case ST_TES:
 397       case ST_GS:
 398       case ST_FS8:
 399       case ST_FS16:
 400       case ST_CS:
 401          total_by_type[type] += scaled[i];
 402          break;
 403       default:
 404          break;
 405       }
 406
 407       total += scaled[i];
 408    }
 409
 410    if (total == 0) {
 411       fprintf(stderr, "No shader time collected yet\n");
 412       return;
 413    }
 414
 415    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 416
 417    fprintf(stderr, "\n");
 418    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
 419    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 420       const char *stage;
 421       /* Work back from the sorted pointers times to a time to print. */
 422       int i = sorted[s] - scaled;
 423
 424       if (scaled[i] == 0)
 425          continue;
 426
 427       int shader_num = brw->shader_time.ids[i];
 428       const char *shader_name = brw->shader_time.names[i];
 429
 430       switch (brw->shader_time.types[i]) {
 431       case ST_VS:
 432          stage = "vs";
 433          break;
 434       case ST_TCS:
 435          stage = "tcs";
 436          break;
 437       case ST_TES:
 438          stage = "tes";
 439          break;
 440       case ST_GS:
 441          stage = "gs";
 442          break;
 443       case ST_FS8:
 444          stage = "fs8";
 445          break;
 446       case ST_FS16:
 447          stage = "fs16";
 448          break;
 449       case ST_CS:
 450          stage = "cs";
 451          break;
 452       default:
 453          stage = "other";
 454          break;
 455       }
 456
 457       print_shader_time_line(stage, shader_name, shader_num,
 458                              scaled[i], total);
 459    }
 460
 461    fprintf(stderr, "\n");
 462    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
 463    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
 464    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
 465    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
 466    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
 467    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
 468    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
 469 }
 470
 471 static void
 472 brw_collect_shader_time(struct brw_context *brw)
 473 {
 474    if (!brw->shader_time.bo)
 475       return;
 476
 477    /* This probably stalls on the last rendering.  We could fix that by
 478     * delaying reading the reports, but it doesn't look like it's a big
 479     * overhead compared to the cost of tracking the time in the first place.
 480     */
 481    drm_intel_bo_map(brw->shader_time.bo, true);
 482    void *bo_map = brw->shader_time.bo->virtual;
 483
 484    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 485       uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
 486
 487       brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
 488       brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
 489       brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
 490    }
 491
 492    /* Zero the BO out to clear it out for our next collection.
 493     */
 494    memset(bo_map, 0, brw->shader_time.bo->size);
 495    drm_intel_bo_unmap(brw->shader_time.bo);
 496 }
 497
 498 void
 499 brw_collect_and_report_shader_time(struct brw_context *brw)
 500 {
 501    brw_collect_shader_time(brw);
 502
 503    if (brw->shader_time.report_time == 0 ||
 504        get_time() - brw->shader_time.report_time >= 1.0) {
 505       brw_report_shader_time(brw);
 506       brw->shader_time.report_time = get_time();
 507    }
 508 }
 509
 510 /**
 511  * Chooses an index in the shader_time buffer and sets up tracking information
 512  * for our printouts.
 513  *
 514  * Note that this holds on to references to the underlying programs, which may
 515  * change their lifetimes compared to normal operation.
 516  */
 517 int
 518 brw_get_shader_time_index(struct brw_context *brw,
 519                           struct gl_shader_program *shader_prog,
 520                           struct gl_program *prog,
 521                           enum shader_time_shader_type type)
 522 {
 523    int shader_time_index = brw->shader_time.num_entries++;
 524    assert(shader_time_index < brw->shader_time.max_entries);
 525    brw->shader_time.types[shader_time_index] = type;
 526
 527    int id = shader_prog ? shader_prog->Name : prog->Id;
 528    const char *name;
 529    if (id == 0) {
 530       name = "ff";
 531    } else if (!shader_prog) {
 532       name = "prog";
 533    } else if (shader_prog->Label) {
 534       name = ralloc_strdup(brw->shader_time.names, shader_prog->Label);
 535    } else {
 536       name = "glsl";
 537    }
 538
 539    brw->shader_time.names[shader_time_index] = name;
 540    brw->shader_time.ids[shader_time_index] = id;
 541
 542    return shader_time_index;
 543 }
 544
 545 void
 546 brw_destroy_shader_time(struct brw_context *brw)
 547 {
 548    drm_intel_bo_unreference(brw->shader_time.bo);
 549    brw->shader_time.bo = NULL;
 550 }
 551
 552 void
 553 brw_stage_prog_data_free(const void *p)
 554 {
 555    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 556
 557    ralloc_free(prog_data->param);
 558    ralloc_free(prog_data->pull_param);
 559    ralloc_free(prog_data->image_param);
 560 }
 561
 562 void
 563 brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
 564             struct gl_shader *shader, struct gl_program *prog)
 565 {
 566    if (shader_prog) {
 567       if (shader->ir) {
 568          fprintf(stderr,
 569                  "GLSL IR for native %s shader %d:\n",
 570                  stage, shader_prog->Name);
 571          _mesa_print_ir(stderr, shader->ir, NULL);
 572          fprintf(stderr, "\n\n");
 573       }
 574    } else {
 575       fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
 576               stage, prog->Id, stage);
 577       _mesa_print_program(prog);
 578    }
 579 }
 580
 581 void
 582 brw_setup_tex_for_precompile(struct brw_context *brw,
 583                              struct brw_sampler_prog_key_data *tex,
 584                              struct gl_program *prog)
 585 {
 586    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
 587    unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
 588    for (unsigned i = 0; i < sampler_count; i++) {
 589       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
 590          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 591          tex->swizzles[i] =
 592             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 593       } else {
 594          /* Color sampler: assume no swizzling. */
 595          tex->swizzles[i] = SWIZZLE_XYZW;
 596       }
 597    }
 598 }