src/mesa/drivers/dri/i965/brw_wm_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33
  34 #include "intel_fbo.h"
  35 #include "brw_context.h"
  36 #include "brw_state.h"
  37 #include "brw_defines.h"
  38 #include "brw_wm.h"
  39
  40 /***********************************************************************
  41  * WM unit - fragment programs and rasterization
  42  */
  43
  44 bool
  45 brw_color_buffer_write_enabled(struct brw_context *brw)
  46 {
  47    struct gl_context *ctx = &brw->intel.ctx;
  48    const struct gl_fragment_program *fp = brw->fragment_program;
  49    int i;
  50
  51    /* _NEW_BUFFERS */
  52    for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
  53       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
  54
  55       /* _NEW_COLOR */
  56       if (rb &&
  57           (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
  58            fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
  59           (ctx->Color.ColorMask[i][0] ||
  60            ctx->Color.ColorMask[i][1] ||
  61            ctx->Color.ColorMask[i][2] ||
  62            ctx->Color.ColorMask[i][3])) {
  63          return true;
  64       }
  65    }
  66
  67    return false;
  68 }
  69
  70 /**
  71  * Setup wm hardware state.  See page 225 of Volume 2
  72  */
  73 static void
  74 brw_prepare_wm_unit(struct brw_context *brw)
  75 {
  76    struct intel_context *intel = &brw->intel;
  77    struct gl_context *ctx = &intel->ctx;
  78    const struct gl_fragment_program *fp = brw->fragment_program;
  79    struct brw_wm_unit_state *wm;
  80
  81    wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset);
  82    memset(wm, 0, sizeof(*wm));
  83
  84    if (brw->wm.prog_data->prog_offset_16) {
  85       /* These two fields should be the same pre-gen6, which is why we
  86        * only have one hardware field to program for both dispatch
  87        * widths.
  88        */
  89       assert(brw->wm.prog_data->first_curbe_grf ==
  90              brw->wm.prog_data->first_curbe_grf_16);
  91    }
  92
  93    /* CACHE_NEW_WM_PROG */
  94    wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
  95    wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
  96    wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
  97    /* reloc */
  98    wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
  99                                      brw->wm.prog_data->prog_offset_16) >> 6;
 100    wm->thread1.depth_coef_urb_read_offset = 1;
 101    wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 102
 103    if (intel->gen == 5)
 104       wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
 105    else {
 106       /* BRW_NEW_NR_SURFACES */
 107       wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
 108    }
 109
 110    if (brw->wm.prog_data->total_scratch != 0) {
 111       wm->thread2.scratch_space_base_pointer =
 112          brw->wm.scratch_bo->offset >> 10; /* reloc */
 113       wm->thread2.per_thread_scratch_space =
 114          ffs(brw->wm.prog_data->total_scratch) - 11;
 115    } else {
 116       wm->thread2.scratch_space_base_pointer = 0;
 117       wm->thread2.per_thread_scratch_space = 0;
 118    }
 119
 120    wm->thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
 121    wm->thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
 122    wm->thread3.urb_entry_read_offset = 0;
 123    wm->thread3.const_urb_entry_read_length =
 124       brw->wm.prog_data->curb_read_length;
 125    /* BRW_NEW_CURBE_OFFSETS */
 126    wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
 127
 128    if (intel->gen == 5)
 129       wm->wm4.sampler_count = 0; /* hardware requirement */
 130    else {
 131       /* CACHE_NEW_SAMPLER */
 132       wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
 133    }
 134
 135    if (brw->wm.sampler_count) {
 136       /* reloc */
 137       wm->wm4.sampler_state_pointer = (intel->batch.bo->offset +
 138                                        brw->wm.sampler_offset) >> 5;
 139    } else {
 140       wm->wm4.sampler_state_pointer = 0;
 141    }
 142
 143    /* BRW_NEW_FRAGMENT_PROGRAM */
 144    wm->wm5.program_uses_depth = (fp->Base.InputsRead &
 145                                  (1 << FRAG_ATTRIB_WPOS)) != 0;
 146    wm->wm5.program_computes_depth = (fp->Base.OutputsWritten &
 147                                      BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
 148    /* _NEW_BUFFERS
 149     * Override for NULL depthbuffer case, required by the Pixel Shader Computed
 150     * Depth field.
 151     */
 152    if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
 153       wm->wm5.program_computes_depth = 0;
 154
 155    /* _NEW_COLOR */
 156    wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
 157
 158
 159    /* BRW_NEW_FRAGMENT_PROGRAM
 160     *
 161     * If using the fragment shader backend, the program is always
 162     * 8-wide.  If not, it's always 16.
 163     */
 164    if (ctx->Shader.CurrentFragmentProgram) {
 165       struct brw_shader *shader = (struct brw_shader *)
 166          ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
 167
 168       if (shader != NULL && shader->ir != NULL) {
 169          wm->wm5.enable_8_pix = 1;
 170          if (brw->wm.prog_data->prog_offset_16)
 171             wm->wm5.enable_16_pix = 1;
 172       }
 173    }
 174    if (!wm->wm5.enable_8_pix)
 175       wm->wm5.enable_16_pix = 1;
 176
 177    wm->wm5.max_threads = brw->wm_max_threads - 1;
 178
 179    /* _NEW_BUFFERS | _NEW_COLOR */
 180    if (brw_color_buffer_write_enabled(brw) ||
 181        wm->wm5.program_uses_killpixel ||
 182        wm->wm5.program_computes_depth) {
 183       wm->wm5.thread_dispatch_enable = 1;
 184    }
 185
 186    wm->wm5.legacy_line_rast = 0;
 187    wm->wm5.legacy_global_depth_bias = 0;
 188    wm->wm5.early_depth_test = 1;                /* never need to disable */
 189    wm->wm5.line_aa_region_width = 0;
 190    wm->wm5.line_endcap_aa_region_width = 1;
 191
 192    /* _NEW_POLYGONSTIPPLE */
 193    wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
 194
 195    /* _NEW_POLYGON */
 196    if (ctx->Polygon.OffsetFill) {
 197       wm->wm5.depth_offset = 1;
 198       /* Something wierd going on with legacy_global_depth_bias,
 199        * offset_constant, scaling and MRD.  This value passes glean
 200        * but gives some odd results elsewere (eg. the
 201        * quad-offset-units test).
 202        */
 203       wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
 204
 205       /* This is the only value that passes glean:
 206        */
 207       wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
 208    }
 209
 210    /* _NEW_LINE */
 211    wm->wm5.line_stipple = ctx->Line.StippleFlag;
 212
 213    /* _NEW_DEPTH */
 214    if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm)
 215       wm->wm4.stats_enable = 1;
 216
 217    /* Emit WM program relocation */
 218    drm_intel_bo_emit_reloc(intel->batch.bo,
 219                            brw->wm.state_offset +
 220                            offsetof(struct brw_wm_unit_state, thread0),
 221                            brw->wm.prog_bo, wm->thread0.grf_reg_count << 1,
 222                            I915_GEM_DOMAIN_INSTRUCTION, 0);
 223
 224    if (brw->wm.prog_data->prog_offset_16) {
 225       drm_intel_bo_emit_reloc(intel->batch.bo,
 226                               brw->wm.state_offset +
 227                               offsetof(struct brw_wm_unit_state, wm9),
 228                               brw->wm.prog_bo,
 229                               ((wm->wm9.grf_reg_count_2 << 1) +
 230                                brw->wm.prog_data->prog_offset_16),
 231                               I915_GEM_DOMAIN_INSTRUCTION, 0);
 232    }
 233
 234    /* Emit scratch space relocation */
 235    if (brw->wm.prog_data->total_scratch != 0) {
 236       drm_intel_bo_emit_reloc(intel->batch.bo,
 237                               brw->wm.state_offset +
 238                               offsetof(struct brw_wm_unit_state, thread2),
 239                               brw->wm.scratch_bo,
 240                               wm->thread2.per_thread_scratch_space,
 241                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 242    }
 243
 244    /* Emit sampler state relocation */
 245    if (brw->wm.sampler_count != 0) {
 246       drm_intel_bo_emit_reloc(intel->batch.bo,
 247                               brw->wm.state_offset +
 248                               offsetof(struct brw_wm_unit_state, wm4),
 249                               intel->batch.bo, (brw->wm.sampler_offset |
 250                                                 wm->wm4.stats_enable |
 251                                                 (wm->wm4.sampler_count << 2)),
 252                               I915_GEM_DOMAIN_INSTRUCTION, 0);
 253    }
 254
 255    brw->state.dirty.cache |= CACHE_NEW_WM_UNIT;
 256 }
 257
 258 const struct brw_tracked_state brw_wm_unit = {
 259    .dirty = {
 260       .mesa = (_NEW_POLYGON |
 261                _NEW_POLYGONSTIPPLE |
 262                _NEW_LINE |
 263                _NEW_COLOR |
 264                _NEW_DEPTH |
 265                _NEW_BUFFERS),
 266
 267       .brw = (BRW_NEW_BATCH |
 268               BRW_NEW_FRAGMENT_PROGRAM |
 269               BRW_NEW_CURBE_OFFSETS |
 270               BRW_NEW_NR_WM_SURFACES),
 271
 272       .cache = (CACHE_NEW_WM_PROG |
 273                 CACHE_NEW_SAMPLER)
 274    },
 275    .prepare = brw_prepare_wm_unit,
 276 };
 277