src/mesa/drivers/dri/i965/brw_wm.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "brw_context.h"
  33 #include "brw_wm.h"
  34 #include "brw_state.h"
  35 #include "main/formats.h"
  36
  37 /** Return number of src args for given instruction */
  38 GLuint brw_wm_nr_args( GLuint opcode )
  39 {
  40    switch (opcode) {
  41    case WM_FRONTFACING:
  42    case WM_PIXELXY:
  43       return 0;
  44    case WM_CINTERP:
  45    case WM_WPOSXY:
  46    case WM_DELTAXY:
  47       return 1;
  48    case WM_LINTERP:
  49    case WM_PIXELW:
  50       return 2;
  51    case WM_FB_WRITE:
  52    case WM_PINTERP:
  53       return 3;
  54    default:
  55       assert(opcode < MAX_OPCODE);
  56       return _mesa_num_inst_src_regs(opcode);
  57    }
  58 }
  59
  60
  61 GLuint brw_wm_is_scalar_result( GLuint opcode )
  62 {
  63    switch (opcode) {
  64    case OPCODE_COS:
  65    case OPCODE_EX2:
  66    case OPCODE_LG2:
  67    case OPCODE_POW:
  68    case OPCODE_RCP:
  69    case OPCODE_RSQ:
  70    case OPCODE_SIN:
  71    case OPCODE_DP2:
  72    case OPCODE_DP3:
  73    case OPCODE_DP4:
  74    case OPCODE_DPH:
  75    case OPCODE_DST:
  76       return 1;
  77
  78    default:
  79       return 0;
  80    }
  81 }
  82
  83
  84 /**
  85  * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
  86  * no flow control instructions so we can more readily do SSA-style
  87  * optimizations.
  88  */
  89 static void
  90 brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  91 {
  92    /* Augment fragment program.  Add instructions for pre- and
  93     * post-fragment-program tasks such as interpolation and fogging.
  94     */
  95    brw_wm_pass_fp(c);
  96
  97    /* Translate to intermediate representation.  Build register usage
  98     * chains.
  99     */
 100    brw_wm_pass0(c);
 101
 102    /* Dead code removal.
 103     */
 104    brw_wm_pass1(c);
 105
 106    /* Register allocation.
 107     * Divide by two because we operate on 16 pixels at a time and require
 108     * two GRF entries for each logical shader register.
 109     */
 110    c->grf_limit = BRW_WM_MAX_GRF / 2;
 111
 112    brw_wm_pass2(c);
 113
 114    /* how many general-purpose registers are used */
 115    c->prog_data.total_grf = c->max_wm_grf;
 116
 117    /* Emit GEN4 code.
 118     */
 119    brw_wm_emit(c);
 120 }
 121
 122 static void
 123 brw_wm_payload_setup(struct brw_context *brw,
 124                      struct brw_wm_compile *c)
 125 {
 126    struct intel_context *intel = &brw->intel;
 127    bool uses_depth = (c->fp->program.Base.InputsRead &
 128                       (1 << FRAG_ATTRIB_WPOS)) != 0;
 129
 130    if (intel->gen >= 6) {
 131       /* R0-1: masks, pixel X/Y coordinates. */
 132       c->nr_payload_regs = 2;
 133       /* R2: only for 32-pixel dispatch.*/
 134       /* R3-4: perspective pixel location barycentric */
 135       c->nr_payload_regs += 2;
 136       /* R5-6: perspective pixel location bary for dispatch width != 8 */
 137       if (c->dispatch_width == 16) {
 138          c->nr_payload_regs += 2;
 139       }
 140       /* R7-10: perspective centroid barycentric */
 141       /* R11-14: perspective sample barycentric */
 142       /* R15-18: linear pixel location barycentric */
 143       /* R19-22: linear centroid barycentric */
 144       /* R23-26: linear sample barycentric */
 145
 146       /* R27: interpolated depth if uses source depth */
 147       if (uses_depth) {
 148          c->source_depth_reg = c->nr_payload_regs;
 149          c->nr_payload_regs++;
 150          if (c->dispatch_width == 16) {
 151             /* R28: interpolated depth if not 8-wide. */
 152             c->nr_payload_regs++;
 153          }
 154       }
 155       /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
 156        */
 157       if (uses_depth) {
 158          c->source_w_reg = c->nr_payload_regs;
 159          c->nr_payload_regs++;
 160          if (c->dispatch_width == 16) {
 161             /* R30: interpolated W if not 8-wide. */
 162             c->nr_payload_regs++;
 163          }
 164       }
 165       /* R31: MSAA position offsets. */
 166       /* R32-: bary for 32-pixel. */
 167       /* R58-59: interp W for 32-pixel. */
 168
 169       if (c->fp->program.Base.OutputsWritten &
 170           BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 171          c->source_depth_to_render_target = GL_TRUE;
 172          c->computes_depth = GL_TRUE;
 173       }
 174    } else {
 175       brw_wm_lookup_iz(intel, c);
 176    }
 177 }
 178
 179 /**
 180  * All Mesa program -> GPU code generation goes through this function.
 181  * Depending on the instructions used (i.e. flow control instructions)
 182  * we'll use one of two code generators.
 183  */
 184 static void do_wm_prog( struct brw_context *brw,
 185                         struct brw_fragment_program *fp,
 186                         struct brw_wm_prog_key *key)
 187 {
 188    struct brw_wm_compile *c;
 189    const GLuint *program;
 190    GLuint program_size;
 191
 192    c = brw->wm.compile_data;
 193    if (c == NULL) {
 194       brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
 195       c = brw->wm.compile_data;
 196       if (c == NULL) {
 197          /* Ouch - big out of memory problem.  Can't continue
 198           * without triggering a segfault, no way to signal,
 199           * so just return.
 200           */
 201          return;
 202       }
 203       c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction));
 204       c->prog_instructions = calloc(1, BRW_WM_MAX_INSN *
 205                                           sizeof(*c->prog_instructions));
 206       c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg));
 207       c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs));
 208    } else {
 209       void *instruction = c->instruction;
 210       void *prog_instructions = c->prog_instructions;
 211       void *vreg = c->vreg;
 212       void *refs = c->refs;
 213       memset(c, 0, sizeof(*brw->wm.compile_data));
 214       c->instruction = instruction;
 215       c->prog_instructions = prog_instructions;
 216       c->vreg = vreg;
 217       c->refs = refs;
 218    }
 219    memcpy(&c->key, key, sizeof(*key));
 220
 221    c->fp = fp;
 222    c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
 223
 224    brw_init_compile(brw, &c->func);
 225
 226    brw_wm_payload_setup(brw, c);
 227
 228    if (!brw_wm_fs_emit(brw, c)) {
 229       /*
 230        * Shader which use GLSL features such as flow control are handled
 231        * differently from "simple" shaders.
 232        */
 233       c->dispatch_width = 16;
 234       brw_wm_payload_setup(brw, c);
 235       brw_wm_non_glsl_emit(brw, c);
 236    }
 237    c->prog_data.dispatch_width = c->dispatch_width;
 238
 239    /* Scratch space is used for register spilling */
 240    if (c->last_scratch) {
 241       /* Per-thread scratch space is power-of-two sized. */
 242       for (c->prog_data.total_scratch = 1024;
 243            c->prog_data.total_scratch <= c->last_scratch;
 244            c->prog_data.total_scratch *= 2) {
 245          /* empty */
 246       }
 247    }
 248    else {
 249       c->prog_data.total_scratch = 0;
 250    }
 251
 252    if (unlikely(INTEL_DEBUG & DEBUG_WM))
 253       fprintf(stderr, "\n");
 254
 255    /* get the program
 256     */
 257    program = brw_get_program(&c->func, &program_size);
 258
 259    drm_intel_bo_unreference(brw->wm.prog_bo);
 260    brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
 261                                                    &c->key, sizeof(c->key),
 262                                                    NULL, 0,
 263                                                    program, program_size,
 264                                                    &c->prog_data,
 265                                                    sizeof(c->prog_data),
 266                                                    &brw->wm.prog_data);
 267 }
 268
 269
 270
 271 static void brw_wm_populate_key( struct brw_context *brw,
 272                                  struct brw_wm_prog_key *key )
 273 {
 274    struct gl_context *ctx = &brw->intel.ctx;
 275    /* BRW_NEW_FRAGMENT_PROGRAM */
 276    const struct brw_fragment_program *fp =
 277       (struct brw_fragment_program *)brw->fragment_program;
 278    GLuint lookup = 0;
 279    GLuint line_aa;
 280    GLuint i;
 281
 282    memset(key, 0, sizeof(*key));
 283
 284    /* Build the index for table lookup
 285     */
 286    /* _NEW_COLOR */
 287    if (fp->program.UsesKill ||
 288        ctx->Color.AlphaEnabled)
 289       lookup |= IZ_PS_KILL_ALPHATEST_BIT;
 290
 291    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 292       lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
 293
 294    /* _NEW_DEPTH */
 295    if (ctx->Depth.Test)
 296       lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 297
 298    if (ctx->Depth.Test &&
 299        ctx->Depth.Mask) /* ?? */
 300       lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 301
 302    /* _NEW_STENCIL */
 303    if (ctx->Stencil._Enabled) {
 304       lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 305
 306       if (ctx->Stencil.WriteMask[0] ||
 307           ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
 308          lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
 309    }
 310
 311    line_aa = AA_NEVER;
 312
 313    /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
 314    if (ctx->Line.SmoothFlag) {
 315       if (brw->intel.reduced_primitive == GL_LINES) {
 316          line_aa = AA_ALWAYS;
 317       }
 318       else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
 319          if (ctx->Polygon.FrontMode == GL_LINE) {
 320             line_aa = AA_SOMETIMES;
 321
 322             if (ctx->Polygon.BackMode == GL_LINE ||
 323                 (ctx->Polygon.CullFlag &&
 324                  ctx->Polygon.CullFaceMode == GL_BACK))
 325                line_aa = AA_ALWAYS;
 326          }
 327          else if (ctx->Polygon.BackMode == GL_LINE) {
 328             line_aa = AA_SOMETIMES;
 329
 330             if ((ctx->Polygon.CullFlag &&
 331                  ctx->Polygon.CullFaceMode == GL_FRONT))
 332                line_aa = AA_ALWAYS;
 333          }
 334       }
 335    }
 336
 337    key->iz_lookup = lookup;
 338    key->line_aa = line_aa;
 339    key->stats_wm = brw->intel.stats_wm;
 340
 341    /* BRW_NEW_WM_INPUT_DIMENSIONS */
 342    key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
 343
 344    /* _NEW_LIGHT */
 345    key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
 346
 347    /* _NEW_HINT */
 348    key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
 349
 350    /* _NEW_TEXTURE */
 351    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
 352       const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
 353
 354       if (unit->_ReallyEnabled) {
 355          const struct gl_texture_object *t = unit->_Current;
 356          const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 357          int swizzles[SWIZZLE_NIL + 1] = {
 358             SWIZZLE_X,
 359             SWIZZLE_Y,
 360             SWIZZLE_Z,
 361             SWIZZLE_W,
 362             SWIZZLE_ZERO,
 363             SWIZZLE_ONE,
 364             SWIZZLE_NIL
 365          };
 366
 367          key->tex_swizzles[i] = SWIZZLE_NOOP;
 368
 369          /* GL_DEPTH_TEXTURE_MODE is normally handled through
 370           * brw_wm_surface_state, but it applies to shadow compares as
 371           * well and our shadow compares always return the result in
 372           * all 4 channels.
 373           */
 374          if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
 375             if (t->DepthMode == GL_ALPHA) {
 376                swizzles[0] = SWIZZLE_ZERO;
 377                swizzles[1] = SWIZZLE_ZERO;
 378                swizzles[2] = SWIZZLE_ZERO;
 379             } else if (t->DepthMode == GL_LUMINANCE) {
 380                swizzles[3] = SWIZZLE_ONE;
 381             } else if (t->DepthMode == GL_RED) {
 382                swizzles[1] = SWIZZLE_ZERO;
 383                swizzles[2] = SWIZZLE_ZERO;
 384                swizzles[3] = SWIZZLE_ZERO;
 385             }
 386          }
 387
 388          if (img->InternalFormat == GL_YCBCR_MESA) {
 389             key->yuvtex_mask |= 1 << i;
 390             if (img->TexFormat == MESA_FORMAT_YCBCR)
 391                 key->yuvtex_swap_mask |= 1 << i;
 392          }
 393
 394          key->tex_swizzles[i] =
 395             MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 396                           swizzles[GET_SWZ(t->_Swizzle, 1)],
 397                           swizzles[GET_SWZ(t->_Swizzle, 2)],
 398                           swizzles[GET_SWZ(t->_Swizzle, 3)]);
 399       }
 400       else {
 401          key->tex_swizzles[i] = SWIZZLE_NOOP;
 402       }
 403    }
 404
 405    /* Shadow */
 406    key->shadowtex_mask = fp->program.Base.ShadowSamplers;
 407
 408    /* _NEW_BUFFERS */
 409    /*
 410     * Include the draw buffer origin and height so that we can calculate
 411     * fragment position values relative to the bottom left of the drawable,
 412     * from the incoming screen origin relative position we get as part of our
 413     * payload.
 414     *
 415     * This is only needed for the WM_WPOSXY opcode when the fragment program
 416     * uses the gl_FragCoord input.
 417     *
 418     * We could avoid recompiling by including this as a constant referenced by
 419     * our program, but if we were to do that it would also be nice to handle
 420     * getting that constant updated at batchbuffer submit time (when we
 421     * hold the lock and know where the buffer really is) rather than at emit
 422     * time when we don't hold the lock and are just guessing.  We could also
 423     * just avoid using this as key data if the program doesn't use
 424     * fragment.position.
 425     *
 426     * For DRI2 the origin_x/y will always be (0,0) but we still need the
 427     * drawable height in order to invert the Y axis.
 428     */
 429    if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
 430       key->drawable_height = ctx->DrawBuffer->Height;
 431       key->render_to_fbo = ctx->DrawBuffer->Name != 0;
 432    }
 433
 434    key->nr_color_regions = brw->state.nr_color_regions;
 435
 436    /* CACHE_NEW_VS_PROG */
 437    key->vp_outputs_written = brw->vs.prog_data->outputs_written;
 438
 439    /* The unique fragment program ID */
 440    key->program_string_id = fp->id;
 441 }
 442
 443
 444 static void brw_prepare_wm_prog(struct brw_context *brw)
 445 {
 446    struct brw_wm_prog_key key;
 447    struct brw_fragment_program *fp = (struct brw_fragment_program *)
 448       brw->fragment_program;
 449
 450    brw_wm_populate_key(brw, &key);
 451
 452    /* Make an early check for the key.
 453     */
 454    drm_intel_bo_unreference(brw->wm.prog_bo);
 455    brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
 456                                       &key, sizeof(key),
 457                                       NULL, 0,
 458                                       &brw->wm.prog_data);
 459    if (brw->wm.prog_bo == NULL)
 460       do_wm_prog(brw, fp, &key);
 461 }
 462
 463
 464 const struct brw_tracked_state brw_wm_prog = {
 465    .dirty = {
 466       .mesa  = (_NEW_COLOR |
 467                 _NEW_DEPTH |
 468                 _NEW_HINT |
 469                 _NEW_STENCIL |
 470                 _NEW_POLYGON |
 471                 _NEW_LINE |
 472                 _NEW_LIGHT |
 473                 _NEW_BUFFERS |
 474                 _NEW_TEXTURE),
 475       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 476                 BRW_NEW_WM_INPUT_DIMENSIONS |
 477                 BRW_NEW_REDUCED_PRIMITIVE),
 478       .cache = CACHE_NEW_VS_PROG,
 479    },
 480    .prepare = brw_prepare_wm_prog
 481 };
 482