src/mesa/drivers/dri/i965/brw_program.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include <pthread.h>
  33 #include "main/imports.h"
  34 #include "main/enums.h"
  35 #include "main/shaderobj.h"
  36 #include "program/prog_parameter.h"
  37 #include "program/program.h"
  38 #include "program/programopt.h"
  39 #include "tnl/tnl.h"
  40 #include "glsl/ralloc.h"
  41
  42 #include "brw_context.h"
  43 #include "brw_wm.h"
  44
  45 static unsigned
  46 get_new_program_id(struct intel_screen *screen)
  47 {
  48    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  49    pthread_mutex_lock(&m);
  50    unsigned id = screen->program_id++;
  51    pthread_mutex_unlock(&m);
  52    return id;
  53 }
  54
  55 static void brwBindProgram( struct gl_context *ctx,
  56                             GLenum target,
  57                             struct gl_program *prog )
  58 {
  59    struct brw_context *brw = brw_context(ctx);
  60
  61    switch (target) {
  62    case GL_VERTEX_PROGRAM_ARB:
  63       brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
  64       break;
  65    case GL_FRAGMENT_PROGRAM_ARB:
  66       brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
  67       break;
  68    }
  69 }
  70
  71 static struct gl_program *brwNewProgram( struct gl_context *ctx,
  72                                       GLenum target,
  73                                       GLuint id )
  74 {
  75    struct brw_context *brw = brw_context(ctx);
  76
  77    switch (target) {
  78    case GL_VERTEX_PROGRAM_ARB: {
  79       struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
  80       if (prog) {
  81          prog->id = get_new_program_id(brw->intel.intelScreen);
  82
  83          return _mesa_init_vertex_program( ctx, &prog->program,
  84                                              target, id );
  85       }
  86       else
  87          return NULL;
  88    }
  89
  90    case GL_FRAGMENT_PROGRAM_ARB: {
  91       struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
  92       if (prog) {
  93          prog->id = get_new_program_id(brw->intel.intelScreen);
  94
  95          return _mesa_init_fragment_program( ctx, &prog->program,
  96                                              target, id );
  97       }
  98       else
  99          return NULL;
 100    }
 101
 102    default:
 103       return _mesa_new_program(ctx, target, id);
 104    }
 105 }
 106
 107 static void brwDeleteProgram( struct gl_context *ctx,
 108                               struct gl_program *prog )
 109 {
 110    _mesa_delete_program( ctx, prog );
 111 }
 112
 113
 114 static GLboolean
 115 brwIsProgramNative(struct gl_context *ctx,
 116                    GLenum target,
 117                    struct gl_program *prog)
 118 {
 119    return true;
 120 }
 121
 122 static GLboolean
 123 brwProgramStringNotify(struct gl_context *ctx,
 124                        GLenum target,
 125                        struct gl_program *prog)
 126 {
 127    struct brw_context *brw = brw_context(ctx);
 128
 129    if (target == GL_FRAGMENT_PROGRAM_ARB) {
 130       struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
 131       struct brw_fragment_program *newFP = brw_fragment_program(fprog);
 132       const struct brw_fragment_program *curFP =
 133          brw_fragment_program_const(brw->fragment_program);
 134
 135       if (newFP == curFP)
 136          brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
 137       newFP->id = get_new_program_id(brw->intel.intelScreen);
 138    }
 139    else if (target == GL_VERTEX_PROGRAM_ARB) {
 140       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
 141       struct brw_vertex_program *newVP = brw_vertex_program(vprog);
 142       const struct brw_vertex_program *curVP =
 143          brw_vertex_program_const(brw->vertex_program);
 144
 145       if (newVP == curVP)
 146          brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
 147       if (newVP->program.IsPositionInvariant) {
 148          _mesa_insert_mvp_code(ctx, &newVP->program);
 149       }
 150       newVP->id = get_new_program_id(brw->intel.intelScreen);
 151
 152       /* Also tell tnl about it:
 153        */
 154       _tnl_program_string(ctx, target, prog);
 155    }
 156
 157    brw_add_texrect_params(prog);
 158
 159    return true;
 160 }
 161
 162 void
 163 brw_add_texrect_params(struct gl_program *prog)
 164 {
 165    for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
 166       if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
 167          continue;
 168
 169       int tokens[STATE_LENGTH] = {
 170          STATE_INTERNAL,
 171          STATE_TEXRECT_SCALE,
 172          texunit,
 173          0,
 174          0
 175       };
 176
 177       _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
 178    }
 179 }
 180
 181 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
 182 int
 183 brw_get_scratch_size(int size)
 184 {
 185    int i;
 186
 187    for (i = 1024; i < size; i *= 2)
 188       ;
 189
 190    return i;
 191 }
 192
 193 void
 194 brw_get_scratch_bo(struct intel_context *intel,
 195                    drm_intel_bo **scratch_bo, int size)
 196 {
 197    drm_intel_bo *old_bo = *scratch_bo;
 198
 199    if (old_bo && old_bo->size < size) {
 200       drm_intel_bo_unreference(old_bo);
 201       old_bo = NULL;
 202    }
 203
 204    if (!old_bo) {
 205       *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
 206    }
 207 }
 208
 209 void brwInitFragProgFuncs( struct dd_function_table *functions )
 210 {
 211    assert(functions->ProgramStringNotify == _tnl_program_string);
 212
 213    functions->BindProgram = brwBindProgram;
 214    functions->NewProgram = brwNewProgram;
 215    functions->DeleteProgram = brwDeleteProgram;
 216    functions->IsProgramNative = brwIsProgramNative;
 217    functions->ProgramStringNotify = brwProgramStringNotify;
 218
 219    functions->NewShader = brw_new_shader;
 220    functions->NewShaderProgram = brw_new_shader_program;
 221    functions->LinkShader = brw_link_shader;
 222 }
 223
 224 void
 225 brw_init_shader_time(struct brw_context *brw)
 226 {
 227    struct intel_context *intel = &brw->intel;
 228
 229    const int max_entries = 4096;
 230    brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time",
 231                                             max_entries * 4, 4096);
 232    brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *,
 233                                              max_entries);
 234    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
 235                                           max_entries);
 236    brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
 237                                                max_entries);
 238    brw->shader_time.max_entries = max_entries;
 239 }
 240
 241 static int
 242 compare_time(const void *a, const void *b)
 243 {
 244    uint64_t * const *a_val = a;
 245    uint64_t * const *b_val = b;
 246
 247    /* We don't just subtract because we're turning the value to an int. */
 248    if (**a_val < **b_val)
 249       return -1;
 250    else if (**a_val == **b_val)
 251       return 0;
 252    else
 253       return 1;
 254 }
 255
 256 static void
 257 get_written_and_reset(struct brw_context *brw, int i,
 258                       uint64_t *written, uint64_t *reset)
 259 {
 260    enum shader_time_shader_type type = brw->shader_time.types[i];
 261    assert(type == ST_VS || type == ST_FS8 || type == ST_FS16);
 262
 263    /* Find where we recorded written and reset. */
 264    int wi, ri;
 265
 266    for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
 267       ;
 268
 269    for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
 270       ;
 271
 272    *written = brw->shader_time.cumulative[wi];
 273    *reset = brw->shader_time.cumulative[ri];
 274 }
 275
 276 static void
 277 print_shader_time_line(const char *name, int shader_num,
 278                        uint64_t time, uint64_t total)
 279 {
 280    printf("%s", name);
 281    for (int i = strlen(name); i < 10; i++)
 282       printf(" ");
 283    printf("%4d: ", shader_num);
 284
 285    printf("%16lld (%7.2f Gcycles)      %4.1f%%\n",
 286           (long long)time,
 287           (double)time / 1000000000.0,
 288           (double)time / total * 100.0);
 289 }
 290
 291 static void
 292 brw_report_shader_time(struct brw_context *brw)
 293 {
 294    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
 295       return;
 296
 297    uint64_t scaled[brw->shader_time.num_entries];
 298    uint64_t *sorted[brw->shader_time.num_entries];
 299    uint64_t total_by_type[ST_FS16 + 1];
 300    memset(total_by_type, 0, sizeof(total_by_type));
 301    double total = 0;
 302    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 303       uint64_t written = 0, reset = 0;
 304       enum shader_time_shader_type type = brw->shader_time.types[i];
 305
 306       sorted[i] = &scaled[i];
 307
 308       switch (type) {
 309       case ST_VS_WRITTEN:
 310       case ST_VS_RESET:
 311       case ST_FS8_WRITTEN:
 312       case ST_FS8_RESET:
 313       case ST_FS16_WRITTEN:
 314       case ST_FS16_RESET:
 315          /* We'll handle these when along with the time. */
 316          scaled[i] = 0;
 317          continue;
 318
 319       case ST_VS:
 320       case ST_FS8:
 321       case ST_FS16:
 322          get_written_and_reset(brw, i, &written, &reset);
 323          break;
 324
 325       default:
 326          /* I sometimes want to print things that aren't the 3 shader times.
 327           * Just print the sum in that case.
 328           */
 329          written = 1;
 330          reset = 0;
 331          break;
 332       }
 333
 334       uint64_t time = brw->shader_time.cumulative[i];
 335       if (written) {
 336          scaled[i] = time / written * (written + reset);
 337       } else {
 338          scaled[i] = time;
 339       }
 340
 341       switch (type) {
 342       case ST_VS:
 343       case ST_FS8:
 344       case ST_FS16:
 345          total_by_type[type] += scaled[i];
 346          break;
 347       default:
 348          break;
 349       }
 350
 351       total += scaled[i];
 352    }
 353
 354    if (total == 0) {
 355       printf("No shader time collected yet\n");
 356       return;
 357    }
 358
 359    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
 360
 361    printf("\n");
 362    printf("type   ID      cycles spent                   %% of total\n");
 363    for (int s = 0; s < brw->shader_time.num_entries; s++) {
 364       /* Work back from the sorted pointers times to a time to print. */
 365       int i = sorted[s] - scaled;
 366
 367       if (scaled[i] == 0)
 368          continue;
 369
 370       int shader_num = -1;
 371       if (brw->shader_time.programs[i]) {
 372          shader_num = brw->shader_time.programs[i]->Name;
 373       }
 374
 375       switch (brw->shader_time.types[i]) {
 376       case ST_VS:
 377          print_shader_time_line("vs", shader_num, scaled[i], total);
 378          break;
 379       case ST_FS8:
 380          print_shader_time_line("fs8", shader_num, scaled[i], total);
 381          break;
 382       case ST_FS16:
 383          print_shader_time_line("fs16", shader_num, scaled[i], total);
 384          break;
 385       default:
 386          print_shader_time_line("other", shader_num, scaled[i], total);
 387          break;
 388       }
 389    }
 390
 391    printf("\n");
 392    print_shader_time_line("total vs", -1, total_by_type[ST_VS], total);
 393    print_shader_time_line("total fs8", -1, total_by_type[ST_FS8], total);
 394    print_shader_time_line("total fs16", -1, total_by_type[ST_FS16], total);
 395 }
 396
 397 static void
 398 brw_collect_shader_time(struct brw_context *brw)
 399 {
 400    if (!brw->shader_time.bo)
 401       return;
 402
 403    /* This probably stalls on the last rendering.  We could fix that by
 404     * delaying reading the reports, but it doesn't look like it's a big
 405     * overhead compared to the cost of tracking the time in the first place.
 406     */
 407    drm_intel_bo_map(brw->shader_time.bo, true);
 408
 409    uint32_t *times = brw->shader_time.bo->virtual;
 410
 411    for (int i = 0; i < brw->shader_time.num_entries; i++) {
 412       brw->shader_time.cumulative[i] += times[i];
 413    }
 414
 415    /* Zero the BO out to clear it out for our next collection.
 416     */
 417    memset(times, 0, brw->shader_time.bo->size);
 418    drm_intel_bo_unmap(brw->shader_time.bo);
 419 }
 420
 421 void
 422 brw_collect_and_report_shader_time(struct brw_context *brw)
 423 {
 424    brw_collect_shader_time(brw);
 425
 426    if (brw->shader_time.report_time == 0 ||
 427        get_time() - brw->shader_time.report_time >= 1.0) {
 428       brw_report_shader_time(brw);
 429       brw->shader_time.report_time = get_time();
 430    }
 431 }
 432
 433 void
 434 brw_destroy_shader_time(struct brw_context *brw)
 435 {
 436    drm_intel_bo_unreference(brw->shader_time.bo);
 437    brw->shader_time.bo = NULL;
 438 }