src/mesa/drivers/dri/i965/brw_wm.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "brw_context.h"
  33 #include "brw_wm.h"
  34 #include "brw_state.h"
  35
  36
  37 /** Return number of src args for given instruction */
  38 GLuint brw_wm_nr_args( GLuint opcode )
  39 {
  40    switch (opcode) {
  41    case WM_FRONTFACING:
  42    case WM_PIXELXY:
  43       return 0;
  44    case WM_CINTERP:
  45    case WM_WPOSXY:
  46    case WM_DELTAXY:
  47       return 1;
  48    case WM_LINTERP:
  49    case WM_PIXELW:
  50       return 2;
  51    case WM_FB_WRITE:
  52    case WM_PINTERP:
  53       return 3;
  54    default:
  55       assert(opcode < MAX_OPCODE);
  56       return _mesa_num_inst_src_regs(opcode);
  57    }
  58 }
  59
  60
  61 GLuint brw_wm_is_scalar_result( GLuint opcode )
  62 {
  63    switch (opcode) {
  64    case OPCODE_COS:
  65    case OPCODE_EX2:
  66    case OPCODE_LG2:
  67    case OPCODE_POW:
  68    case OPCODE_RCP:
  69    case OPCODE_RSQ:
  70    case OPCODE_SIN:
  71    case OPCODE_DP3:
  72    case OPCODE_DP4:
  73    case OPCODE_DPH:
  74    case OPCODE_DST:
  75       return 1;
  76
  77    default:
  78       return 0;
  79    }
  80 }
  81
  82
  83 /**
  84  * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
  85  * no flow control instructions so we can more readily do SSA-style
  86  * optimizations.
  87  */
  88 static void
  89 brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  90 {
  91    /* Augment fragment program.  Add instructions for pre- and
  92     * post-fragment-program tasks such as interpolation and fogging.
  93     */
  94    brw_wm_pass_fp(c);
  95
  96    /* Translate to intermediate representation.  Build register usage
  97     * chains.
  98     */
  99    brw_wm_pass0(c);
 100
 101    /* Dead code removal.
 102     */
 103    brw_wm_pass1(c);
 104
 105    /* Register allocation.
 106     * Divide by two because we operate on 16 pixels at a time and require
 107     * two GRF entries for each logical shader register.
 108     */
 109    c->grf_limit = BRW_WM_MAX_GRF / 2;
 110
 111    brw_wm_pass2(c);
 112
 113    /* how many general-purpose registers are used */
 114    c->prog_data.total_grf = c->max_wm_grf;
 115
 116    /* Scratch space is used for register spilling */
 117    if (c->last_scratch) {
 118       c->prog_data.total_scratch = c->last_scratch + 0x40;
 119    }
 120    else {
 121       c->prog_data.total_scratch = 0;
 122    }
 123
 124    /* Emit GEN4 code.
 125     */
 126    brw_wm_emit(c);
 127 }
 128
 129
 130 /**
 131  * All Mesa program -> GPU code generation goes through this function.
 132  * Depending on the instructions used (i.e. flow control instructions)
 133  * we'll use one of two code generators.
 134  */
 135 static void do_wm_prog( struct brw_context *brw,
 136                         struct brw_fragment_program *fp,
 137                         struct brw_wm_prog_key *key)
 138 {
 139    struct brw_wm_compile *c;
 140    const GLuint *program;
 141    GLuint program_size;
 142
 143    c = brw->wm.compile_data;
 144    if (c == NULL) {
 145       brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
 146       c = brw->wm.compile_data;
 147       if (c == NULL) {
 148          /* Ouch - big out of memory problem.  Can't continue
 149           * without triggering a segfault, no way to signal,
 150           * so just return.
 151           */
 152          return;
 153       }
 154       c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
 155       c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
 156                                           sizeof(*c->prog_instructions));
 157       c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
 158       c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
 159    } else {
 160       void *instruction = c->instruction;
 161       void *prog_instructions = c->prog_instructions;
 162       void *vreg = c->vreg;
 163       void *refs = c->refs;
 164       memset(c, 0, sizeof(*brw->wm.compile_data));
 165       c->instruction = instruction;
 166       c->prog_instructions = prog_instructions;
 167       c->vreg = vreg;
 168       c->refs = refs;
 169    }
 170    memcpy(&c->key, key, sizeof(*key));
 171
 172    c->fp = fp;
 173    c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
 174
 175    brw_init_compile(brw, &c->func);
 176
 177    /* temporary sanity check assertion */
 178    ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
 179
 180    /*
 181     * Shader which use GLSL features such as flow control are handled
 182     * differently from "simple" shaders.
 183     */
 184    if (fp->isGLSL) {
 185       c->dispatch_width = 8;
 186       brw_wm_glsl_emit(brw, c);
 187    }
 188    else {
 189       c->dispatch_width = 16;
 190       brw_wm_non_glsl_emit(brw, c);
 191    }
 192
 193    if (INTEL_DEBUG & DEBUG_WM)
 194       fprintf(stderr, "\n");
 195
 196    /* get the program
 197     */
 198    program = brw_get_program(&c->func, &program_size);
 199
 200    dri_bo_unreference(brw->wm.prog_bo);
 201    brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
 202                                                    &c->key, sizeof(c->key),
 203                                                    NULL, 0,
 204                                                    program, program_size,
 205                                                    &c->prog_data,
 206                                                    sizeof(c->prog_data),
 207                                                    &brw->wm.prog_data);
 208 }
 209
 210
 211
 212 static void brw_wm_populate_key( struct brw_context *brw,
 213                                  struct brw_wm_prog_key *key )
 214 {
 215    GLcontext *ctx = &brw->intel.ctx;
 216    /* BRW_NEW_FRAGMENT_PROGRAM */
 217    const struct brw_fragment_program *fp =
 218       (struct brw_fragment_program *)brw->fragment_program;
 219    GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
 220    GLuint lookup = 0;
 221    GLuint line_aa;
 222    GLuint i;
 223
 224    memset(key, 0, sizeof(*key));
 225
 226    /* Build the index for table lookup
 227     */
 228    /* _NEW_COLOR */
 229    if (fp->program.UsesKill ||
 230        ctx->Color.AlphaEnabled)
 231       lookup |= IZ_PS_KILL_ALPHATEST_BIT;
 232
 233    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 234       lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
 235
 236    /* _NEW_DEPTH */
 237    if (ctx->Depth.Test)
 238       lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 239
 240    if (ctx->Depth.Test &&
 241        ctx->Depth.Mask) /* ?? */
 242       lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 243
 244    /* _NEW_STENCIL */
 245    if (ctx->Stencil._Enabled) {
 246       lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 247
 248       if (ctx->Stencil.WriteMask[0] ||
 249           ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
 250          lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
 251    }
 252
 253    line_aa = AA_NEVER;
 254
 255    /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
 256    if (ctx->Line.SmoothFlag) {
 257       if (brw->intel.reduced_primitive == GL_LINES) {
 258          line_aa = AA_ALWAYS;
 259       }
 260       else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
 261          if (ctx->Polygon.FrontMode == GL_LINE) {
 262             line_aa = AA_SOMETIMES;
 263
 264             if (ctx->Polygon.BackMode == GL_LINE ||
 265                 (ctx->Polygon.CullFlag &&
 266                  ctx->Polygon.CullFaceMode == GL_BACK))
 267                line_aa = AA_ALWAYS;
 268          }
 269          else if (ctx->Polygon.BackMode == GL_LINE) {
 270             line_aa = AA_SOMETIMES;
 271
 272             if ((ctx->Polygon.CullFlag &&
 273                  ctx->Polygon.CullFaceMode == GL_FRONT))
 274                line_aa = AA_ALWAYS;
 275          }
 276       }
 277    }
 278
 279    brw_wm_lookup_iz(line_aa,
 280                     lookup,
 281                     uses_depth,
 282                     key);
 283
 284
 285    /* BRW_NEW_WM_INPUT_DIMENSIONS */
 286    key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
 287
 288    /* _NEW_LIGHT */
 289    key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
 290
 291    /* _NEW_HINT */
 292    key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
 293
 294    /* _NEW_TEXTURE */
 295    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
 296       const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
 297
 298       if (unit->_ReallyEnabled) {
 299          const struct gl_texture_object *t = unit->_Current;
 300          const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 301          if (img->InternalFormat == GL_YCBCR_MESA) {
 302             key->yuvtex_mask |= 1 << i;
 303             if (img->TexFormat == MESA_FORMAT_YCBCR)
 304                 key->yuvtex_swap_mask |= 1 << i;
 305          }
 306
 307          key->tex_swizzles[i] = t->_Swizzle;
 308       }
 309       else {
 310          key->tex_swizzles[i] = SWIZZLE_NOOP;
 311       }
 312    }
 313
 314    /* Shadow */
 315    key->shadowtex_mask = fp->program.Base.ShadowSamplers;
 316
 317    /* _NEW_BUFFERS */
 318    /*
 319     * Include the draw buffer origin and height so that we can calculate
 320     * fragment position values relative to the bottom left of the drawable,
 321     * from the incoming screen origin relative position we get as part of our
 322     * payload.
 323     *
 324     * This is only needed for the WM_WPOSXY opcode when the fragment program
 325     * uses the gl_FragCoord input.
 326     *
 327     * We could avoid recompiling by including this as a constant referenced by
 328     * our program, but if we were to do that it would also be nice to handle
 329     * getting that constant updated at batchbuffer submit time (when we
 330     * hold the lock and know where the buffer really is) rather than at emit
 331     * time when we don't hold the lock and are just guessing.  We could also
 332     * just avoid using this as key data if the program doesn't use
 333     * fragment.position.
 334     *
 335     * For DRI2 the origin_x/y will always be (0,0) but we still need the
 336     * drawable height in order to invert the Y axis.
 337     */
 338    if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
 339       if (brw->intel.driDrawable != NULL) {
 340          key->origin_x = brw->intel.driDrawable->x;
 341          key->origin_y = brw->intel.driDrawable->y;
 342          key->drawable_height = brw->intel.driDrawable->h;
 343       }
 344    }
 345
 346    key->nr_color_regions = brw->state.nr_color_regions;
 347
 348    /* CACHE_NEW_VS_PROG */
 349    key->vp_outputs_written = brw->vs.prog_data->outputs_written;
 350
 351    /* The unique fragment program ID */
 352    key->program_string_id = fp->id;
 353 }
 354
 355
 356 static void brw_prepare_wm_prog(struct brw_context *brw)
 357 {
 358    struct brw_wm_prog_key key;
 359    struct brw_fragment_program *fp = (struct brw_fragment_program *)
 360       brw->fragment_program;
 361
 362    brw_wm_populate_key(brw, &key);
 363
 364    /* Make an early check for the key.
 365     */
 366    dri_bo_unreference(brw->wm.prog_bo);
 367    brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
 368                                       &key, sizeof(key),
 369                                       NULL, 0,
 370                                       &brw->wm.prog_data);
 371    if (brw->wm.prog_bo == NULL)
 372       do_wm_prog(brw, fp, &key);
 373 }
 374
 375
 376 const struct brw_tracked_state brw_wm_prog = {
 377    .dirty = {
 378       .mesa  = (_NEW_COLOR |
 379                 _NEW_DEPTH |
 380                 _NEW_HINT |
 381                 _NEW_STENCIL |
 382                 _NEW_POLYGON |
 383                 _NEW_LINE |
 384                 _NEW_LIGHT |
 385                 _NEW_BUFFERS |
 386                 _NEW_TEXTURE),
 387       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 388                 BRW_NEW_WM_INPUT_DIMENSIONS |
 389                 BRW_NEW_REDUCED_PRIMITIVE),
 390       .cache = CACHE_NEW_VS_PROG,
 391    },
 392    .prepare = brw_prepare_wm_prog
 393 };
 394