src/mesa/drivers/dri/i965/brw_wm.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "brw_context.h"
  33 #include "brw_util.h"
  34 #include "brw_wm.h"
  35 #include "brw_state.h"
  36
  37
  38 /** Return number of src args for given instruction */
  39 GLuint brw_wm_nr_args( GLuint opcode )
  40 {
  41    switch (opcode) {
  42    case WM_FRONTFACING:
  43    case WM_PIXELXY:
  44       return 0;
  45    case WM_CINTERP:
  46    case WM_WPOSXY:
  47    case WM_DELTAXY:
  48       return 1;
  49    case WM_LINTERP:
  50    case WM_PIXELW:
  51       return 2;
  52    case WM_FB_WRITE:
  53    case WM_PINTERP:
  54       return 3;
  55    default:
  56       assert(opcode < MAX_OPCODE);
  57       return _mesa_num_inst_src_regs(opcode);
  58    }
  59 }
  60
  61
  62 GLuint brw_wm_is_scalar_result( GLuint opcode )
  63 {
  64    switch (opcode) {
  65    case OPCODE_COS:
  66    case OPCODE_EX2:
  67    case OPCODE_LG2:
  68    case OPCODE_POW:
  69    case OPCODE_RCP:
  70    case OPCODE_RSQ:
  71    case OPCODE_SIN:
  72    case OPCODE_DP3:
  73    case OPCODE_DP4:
  74    case OPCODE_DPH:
  75    case OPCODE_DST:
  76       return 1;
  77
  78    default:
  79       return 0;
  80    }
  81 }
  82
  83
  84 /**
  85  * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
  86  * no flow control instructions so we can more readily do SSA-style
  87  * optimizations.
  88  */
  89 static void
  90 brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  91 {
  92    /* Augment fragment program.  Add instructions for pre- and
  93     * post-fragment-program tasks such as interpolation and fogging.
  94     */
  95    brw_wm_pass_fp(c);
  96
  97    /* Translate to intermediate representation.  Build register usage
  98     * chains.
  99     */
 100    brw_wm_pass0(c);
 101
 102    /* Dead code removal.
 103     */
 104    brw_wm_pass1(c);
 105
 106    /* Register allocation.
 107     * Divide by two because we operate on 16 pixels at a time and require
 108     * two GRF entries for each logical shader register.
 109     */
 110    c->grf_limit = BRW_WM_MAX_GRF / 2;
 111
 112    brw_wm_pass2(c);
 113
 114    /* how many general-purpose registers are used */
 115    c->prog_data.total_grf = c->max_wm_grf;
 116
 117    /* Scratch space is used for register spilling */
 118    if (c->last_scratch) {
 119       c->prog_data.total_scratch = c->last_scratch + 0x40;
 120    }
 121    else {
 122       c->prog_data.total_scratch = 0;
 123    }
 124
 125    /* Emit GEN4 code.
 126     */
 127    brw_wm_emit(c);
 128 }
 129
 130
 131 /**
 132  * All Mesa program -> GPU code generation goes through this function.
 133  * Depending on the instructions used (i.e. flow control instructions)
 134  * we'll use one of two code generators.
 135  */
 136 static void do_wm_prog( struct brw_context *brw,
 137                         struct brw_fragment_program *fp,
 138                         struct brw_wm_prog_key *key)
 139 {
 140    struct brw_wm_compile *c;
 141    const GLuint *program;
 142    GLuint program_size;
 143
 144    c = brw->wm.compile_data;
 145    if (c == NULL) {
 146       brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
 147       c = brw->wm.compile_data;
 148       if (c == NULL) {
 149          /* Ouch - big out of memory problem.  Can't continue
 150           * without triggering a segfault, no way to signal,
 151           * so just return.
 152           */
 153          return;
 154       }
 155       c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
 156       c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
 157                                           sizeof(*c->prog_instructions));
 158       c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
 159       c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
 160    } else {
 161       void *instruction = c->instruction;
 162       void *prog_instructions = c->prog_instructions;
 163       void *vreg = c->vreg;
 164       void *refs = c->refs;
 165       memset(c, 0, sizeof(*brw->wm.compile_data));
 166       c->instruction = instruction;
 167       c->prog_instructions = prog_instructions;
 168       c->vreg = vreg;
 169       c->refs = refs;
 170    }
 171    memcpy(&c->key, key, sizeof(*key));
 172
 173    c->fp = fp;
 174    c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
 175
 176    brw_init_compile(brw, &c->func);
 177
 178    /* temporary sanity check assertion */
 179    ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
 180
 181    /*
 182     * Shader which use GLSL features such as flow control are handled
 183     * differently from "simple" shaders.
 184     */
 185    if (fp->isGLSL) {
 186       c->dispatch_width = 8;
 187       brw_wm_glsl_emit(brw, c);
 188    }
 189    else {
 190       c->dispatch_width = 16;
 191       brw_wm_non_glsl_emit(brw, c);
 192    }
 193
 194    if (INTEL_DEBUG & DEBUG_WM)
 195       fprintf(stderr, "\n");
 196
 197    /* get the program
 198     */
 199    program = brw_get_program(&c->func, &program_size);
 200
 201    dri_bo_unreference(brw->wm.prog_bo);
 202    brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
 203                                                    &c->key, sizeof(c->key),
 204                                                    NULL, 0,
 205                                                    program, program_size,
 206                                                    &c->prog_data,
 207                                                    sizeof(c->prog_data),
 208                                                    &brw->wm.prog_data);
 209 }
 210
 211
 212
 213 static void brw_wm_populate_key( struct brw_context *brw,
 214                                  struct brw_wm_prog_key *key )
 215 {
 216    GLcontext *ctx = &brw->intel.ctx;
 217    /* BRW_NEW_FRAGMENT_PROGRAM */
 218    const struct brw_fragment_program *fp =
 219       (struct brw_fragment_program *)brw->fragment_program;
 220    GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
 221    GLuint lookup = 0;
 222    GLuint line_aa;
 223    GLuint i;
 224
 225    memset(key, 0, sizeof(*key));
 226
 227    /* Build the index for table lookup
 228     */
 229    /* _NEW_COLOR */
 230    if (fp->program.UsesKill ||
 231        ctx->Color.AlphaEnabled)
 232       lookup |= IZ_PS_KILL_ALPHATEST_BIT;
 233
 234    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 235       lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
 236
 237    /* _NEW_DEPTH */
 238    if (ctx->Depth.Test)
 239       lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 240
 241    if (ctx->Depth.Test &&
 242        ctx->Depth.Mask) /* ?? */
 243       lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 244
 245    /* _NEW_STENCIL */
 246    if (ctx->Stencil._Enabled) {
 247       lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 248
 249       if (ctx->Stencil.WriteMask[0] ||
 250           ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
 251          lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
 252    }
 253
 254    line_aa = AA_NEVER;
 255
 256    /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
 257    if (ctx->Line.SmoothFlag) {
 258       if (brw->intel.reduced_primitive == GL_LINES) {
 259          line_aa = AA_ALWAYS;
 260       }
 261       else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
 262          if (ctx->Polygon.FrontMode == GL_LINE) {
 263             line_aa = AA_SOMETIMES;
 264
 265             if (ctx->Polygon.BackMode == GL_LINE ||
 266                 (ctx->Polygon.CullFlag &&
 267                  ctx->Polygon.CullFaceMode == GL_BACK))
 268                line_aa = AA_ALWAYS;
 269          }
 270          else if (ctx->Polygon.BackMode == GL_LINE) {
 271             line_aa = AA_SOMETIMES;
 272
 273             if ((ctx->Polygon.CullFlag &&
 274                  ctx->Polygon.CullFaceMode == GL_FRONT))
 275                line_aa = AA_ALWAYS;
 276          }
 277       }
 278    }
 279
 280    brw_wm_lookup_iz(line_aa,
 281                     lookup,
 282                     uses_depth,
 283                     key);
 284
 285
 286    /* BRW_NEW_WM_INPUT_DIMENSIONS */
 287    key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
 288
 289    /* _NEW_LIGHT */
 290    key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
 291
 292    /* _NEW_HINT */
 293    key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
 294
 295    /* _NEW_TEXTURE */
 296    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
 297       const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
 298
 299       if (unit->_ReallyEnabled) {
 300          const struct gl_texture_object *t = unit->_Current;
 301          const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 302          if (img->InternalFormat == GL_YCBCR_MESA) {
 303             key->yuvtex_mask |= 1 << i;
 304             if (img->TexFormat == MESA_FORMAT_YCBCR)
 305                 key->yuvtex_swap_mask |= 1 << i;
 306          }
 307
 308          key->tex_swizzles[i] = t->_Swizzle;
 309       }
 310       else {
 311          key->tex_swizzles[i] = SWIZZLE_NOOP;
 312       }
 313    }
 314
 315    /* Shadow */
 316    key->shadowtex_mask = fp->program.Base.ShadowSamplers;
 317
 318    /* _NEW_BUFFERS */
 319    /*
 320     * Include the draw buffer origin and height so that we can calculate
 321     * fragment position values relative to the bottom left of the drawable,
 322     * from the incoming screen origin relative position we get as part of our
 323     * payload.
 324     *
 325     * This is only needed for the WM_WPOSXY opcode when the fragment program
 326     * uses the gl_FragCoord input.
 327     *
 328     * We could avoid recompiling by including this as a constant referenced by
 329     * our program, but if we were to do that it would also be nice to handle
 330     * getting that constant updated at batchbuffer submit time (when we
 331     * hold the lock and know where the buffer really is) rather than at emit
 332     * time when we don't hold the lock and are just guessing.  We could also
 333     * just avoid using this as key data if the program doesn't use
 334     * fragment.position.
 335     *
 336     * For DRI2 the origin_x/y will always be (0,0) but we still need the
 337     * drawable height in order to invert the Y axis.
 338     */
 339    if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
 340       if (brw->intel.driDrawable != NULL) {
 341          key->origin_x = brw->intel.driDrawable->x;
 342          key->origin_y = brw->intel.driDrawable->y;
 343          key->drawable_height = brw->intel.driDrawable->h;
 344       }
 345    }
 346
 347    key->nr_color_regions = brw->state.nr_color_regions;
 348
 349    /* CACHE_NEW_VS_PROG */
 350    key->vp_outputs_written = brw->vs.prog_data->outputs_written;
 351
 352    /* The unique fragment program ID */
 353    key->program_string_id = fp->id;
 354 }
 355
 356
 357 static void brw_prepare_wm_prog(struct brw_context *brw)
 358 {
 359    struct brw_wm_prog_key key;
 360    struct brw_fragment_program *fp = (struct brw_fragment_program *)
 361       brw->fragment_program;
 362
 363    brw_wm_populate_key(brw, &key);
 364
 365    /* Make an early check for the key.
 366     */
 367    dri_bo_unreference(brw->wm.prog_bo);
 368    brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
 369                                       &key, sizeof(key),
 370                                       NULL, 0,
 371                                       &brw->wm.prog_data);
 372    if (brw->wm.prog_bo == NULL)
 373       do_wm_prog(brw, fp, &key);
 374 }
 375
 376
 377 const struct brw_tracked_state brw_wm_prog = {
 378    .dirty = {
 379       .mesa  = (_NEW_COLOR |
 380                 _NEW_DEPTH |
 381                 _NEW_HINT |
 382                 _NEW_STENCIL |
 383                 _NEW_POLYGON |
 384                 _NEW_LINE |
 385                 _NEW_LIGHT |
 386                 _NEW_BUFFERS |
 387                 _NEW_TEXTURE),
 388       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 389                 BRW_NEW_WM_INPUT_DIMENSIONS |
 390                 BRW_NEW_REDUCED_PRIMITIVE),
 391       .cache = CACHE_NEW_VS_PROG,
 392    },
 393    .prepare = brw_prepare_wm_prog
 394 };
 395