src/mesa/drivers/dri/i965/gen6_wm_state.c

   1 /*
   2  * Copyright © 2009 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "brw_context.h"
  29 #include "brw_state.h"
  30 #include "brw_defines.h"
  31 #include "brw_util.h"
  32 #include "brw_wm.h"
  33 #include "program/program.h"
  34 #include "program/prog_parameter.h"
  35 #include "program/prog_statevars.h"
  36 #include "intel_batchbuffer.h"
  37
  38 static void
  39 gen6_upload_wm_push_constants(struct brw_context *brw)
  40 {
  41    struct brw_stage_state *stage_state = &brw->wm.base;
  42    /* BRW_NEW_FRAGMENT_PROGRAM */
  43    const struct brw_fragment_program *fp =
  44       brw_fragment_program_const(brw->fragment_program);
  45    /* CACHE_NEW_WM_PROG */
  46    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
  47
  48    gen6_upload_push_constants(brw, &fp->program.Base, &prog_data->base,
  49                               stage_state, AUB_TRACE_WM_CONSTANTS);
  50
  51    if (brw->gen >= 7) {
  52       gen7_upload_constant_state(brw, &brw->wm.base, true,
  53                                  _3DSTATE_CONSTANT_PS);
  54    }
  55 }
  56
  57 const struct brw_tracked_state gen6_wm_push_constants = {
  58    .dirty = {
  59       .mesa  = _NEW_PROGRAM_CONSTANTS,
  60       .brw   = BRW_NEW_BATCH |
  61                BRW_NEW_FRAGMENT_PROGRAM |
  62                BRW_NEW_PUSH_CONSTANT_ALLOCATION,
  63       .cache = CACHE_NEW_WM_PROG,
  64    },
  65    .emit = gen6_upload_wm_push_constants,
  66 };
  67
  68 static void
  69 upload_wm_state(struct brw_context *brw)
  70 {
  71    struct gl_context *ctx = &brw->ctx;
  72    /* BRW_NEW_FRAGMENT_PROGRAM */
  73    const struct brw_fragment_program *fp =
  74       brw_fragment_program_const(brw->fragment_program);
  75    /* CACHE_NEW_WM_PROG */
  76    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
  77    uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
  78
  79    /* _NEW_BUFFERS */
  80    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
  81
  82    /* We can't fold this into gen6_upload_wm_push_constants(), because
  83     * according to the SNB PRM, vol 2 part 1 section 7.2.2
  84     * (3DSTATE_CONSTANT_PS [DevSNB]):
  85     *
  86     *     "[DevSNB]: This packet must be followed by WM_STATE."
  87     */
  88    if (prog_data->base.nr_params == 0) {
  89       /* Disable the push constant buffers. */
  90       BEGIN_BATCH(5);
  91       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
  92       OUT_BATCH(0);
  93       OUT_BATCH(0);
  94       OUT_BATCH(0);
  95       OUT_BATCH(0);
  96       ADVANCE_BATCH();
  97    } else {
  98       BEGIN_BATCH(5);
  99       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
 100                 GEN6_CONSTANT_BUFFER_0_ENABLE |
 101                 (5 - 2));
 102       /* Pointer to the WM constant buffer.  Covered by the set of
 103        * state flags from gen6_upload_wm_push_constants.
 104        */
 105       OUT_BATCH(brw->wm.base.push_const_offset +
 106                 brw->wm.base.push_const_size - 1);
 107       OUT_BATCH(0);
 108       OUT_BATCH(0);
 109       OUT_BATCH(0);
 110       ADVANCE_BATCH();
 111    }
 112
 113    dw2 = dw4 = dw5 = dw6 = ksp2 = 0;
 114    dw4 |= GEN6_WM_STATISTICS_ENABLE;
 115    dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
 116    dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
 117
 118    /* Use ALT floating point mode for ARB fragment programs, because they
 119     * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
 120     * rendering, CurrentProgram[MESA_SHADER_FRAGMENT] is used for this check
 121     * to differentiate between the GLSL and non-GLSL cases.
 122     */
 123    if (ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] == NULL)
 124       dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
 125
 126    dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
 127            GEN6_WM_SAMPLER_COUNT_SHIFT;
 128
 129    dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
 130            GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 131
 132    dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 133
 134    /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
 135     * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
 136     * is successfully compiled. In majority of the cases that bring us
 137     * better performance than 'SIMD8 only' dispatch.
 138     */
 139    int min_inv_per_frag =
 140       _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
 141    assert(min_inv_per_frag >= 1);
 142
 143    if (prog_data->prog_offset_16) {
 144       dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
 145
 146       if (min_inv_per_frag == 1) {
 147          dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
 148          dw4 |= (prog_data->base.dispatch_grf_start_reg <<
 149                  GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 150          dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
 151                  GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
 152          ksp0 = brw->wm.base.prog_offset;
 153          ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
 154       } else {
 155          dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
 156                 GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 157          ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
 158       }
 159    }
 160    else {
 161       dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
 162       dw4 |= (prog_data->base.dispatch_grf_start_reg <<
 163               GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 164       ksp0 = brw->wm.base.prog_offset;
 165    }
 166
 167    /* CACHE_NEW_WM_PROG | _NEW_COLOR */
 168    if (prog_data->dual_src_blend &&
 169        (ctx->Color.BlendEnabled & 1) &&
 170        ctx->Color.Blend[0]._UsesDualSrc) {
 171       dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
 172    }
 173
 174    /* _NEW_LINE */
 175    if (ctx->Line.StippleFlag)
 176       dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
 177
 178    /* _NEW_POLYGON */
 179    if (ctx->Polygon.StippleFlag)
 180       dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
 181
 182    /* BRW_NEW_FRAGMENT_PROGRAM */
 183    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
 184       dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
 185    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 186       dw5 |= GEN6_WM_COMPUTED_DEPTH;
 187    dw6 |= prog_data->barycentric_interp_modes <<
 188       GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 189
 190    /* _NEW_COLOR, _NEW_MULTISAMPLE */
 191    if (prog_data->uses_kill || ctx->Color.AlphaEnabled ||
 192        ctx->Multisample.SampleAlphaToCoverage ||
 193        prog_data->uses_omask)
 194       dw5 |= GEN6_WM_KILL_ENABLE;
 195
 196    /* _NEW_BUFFERS | _NEW_COLOR */
 197    if (brw_color_buffer_write_enabled(brw) ||
 198        dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
 199       dw5 |= GEN6_WM_DISPATCH_ENABLE;
 200    }
 201
 202    /* From the SNB PRM, volume 2 part 1, page 278:
 203     * "This bit is inserted in the PS payload header and made available to
 204     * the DataPort (either via the message header or via header bypass) to
 205     * indicate that oMask data (one or two phases) is included in Render
 206     * Target Write messages. If present, the oMask data is used to mask off
 207     * samples."
 208     */
 209     if (prog_data->uses_omask)
 210       dw5 |= GEN6_WM_OMASK_TO_RENDER_TARGET;
 211
 212    dw6 |= prog_data->num_varying_inputs <<
 213       GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
 214    if (multisampled_fbo) {
 215       /* _NEW_MULTISAMPLE */
 216       if (ctx->Multisample.Enabled)
 217          dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
 218       else
 219          dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
 220
 221       if (min_inv_per_frag > 1)
 222          dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
 223       else {
 224          dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
 225
 226          /* From the Sandy Bridge PRM, Vol 2 part 1, 7.7.1 ("Pixel Grouping
 227           * (Dispatch Size) Control"), p.334:
 228           *
 229           *     Note: in the table below, the Valid column indicates which
 230           *     products that combination is supported on. Combinations of
 231           *     dispatch enables not listed in the table are not available on
 232           *     any product.
 233           *
 234           *     A: Valid on all products
 235           *
 236           *     B: Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
 237           *     computed depth.
 238           *
 239           *     D: Valid on all products, except when in non-1x PERSAMPLE mode
 240           *     (applies to [DevSNB+] only). Not valid on [DevSNB] if 4x
 241           *     PERPIXEL mode with pixel shader computed depth.
 242           *
 243           *     E: Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
 244           *     computed depth.
 245           *
 246           *     F: Valid on all products, except not valid on [DevSNB] if 4x
 247           *     PERPIXEL mode with pixel shader computed depth.
 248           *
 249           * In the table that follows, the only entry with "A" in the Valid
 250           * column is the entry where only 8 pixel dispatch is enabled.
 251           * Therefore, when we are in PERPIXEL mode with pixel shader computed
 252           * depth, we need to disable SIMD16 dispatch.
 253           */
 254          if (dw5 & GEN6_WM_COMPUTED_DEPTH)
 255             dw5 &= ~GEN6_WM_16_DISPATCH_ENABLE;
 256       }
 257    } else {
 258       dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
 259       dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
 260    }
 261
 262    /* From the SNB PRM, volume 2 part 1, page 281:
 263     * "If the PS kernel does not need the Position XY Offsets
 264     * to compute a Position XY value, then this field should be
 265     * programmed to POSOFFSET_NONE."
 266     *
 267     * "SW Recommendation: If the PS kernel needs the Position Offsets
 268     * to compute a Position XY value, this field should match Position
 269     * ZW Interpolation Mode to ensure a consistent position.xyzw
 270     * computation."
 271     * We only require XY sample offsets. So, this recommendation doesn't
 272     * look useful at the moment. We might need this in future.
 273     */
 274    if (prog_data->uses_pos_offset)
 275       dw6 |= GEN6_WM_POSOFFSET_SAMPLE;
 276    else
 277       dw6 |= GEN6_WM_POSOFFSET_NONE;
 278
 279    BEGIN_BATCH(9);
 280    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
 281    OUT_BATCH(ksp0);
 282    OUT_BATCH(dw2);
 283    if (prog_data->base.total_scratch) {
 284       OUT_RELOC(brw->wm.base.scratch_bo,
 285                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 286                 ffs(prog_data->base.total_scratch) - 11);
 287    } else {
 288       OUT_BATCH(0);
 289    }
 290    OUT_BATCH(dw4);
 291    OUT_BATCH(dw5);
 292    OUT_BATCH(dw6);
 293    OUT_BATCH(0); /* kernel 1 pointer */
 294    OUT_BATCH(ksp2);
 295    ADVANCE_BATCH();
 296 }
 297
 298 const struct brw_tracked_state gen6_wm_state = {
 299    .dirty = {
 300       .mesa  = _NEW_BUFFERS |
 301                _NEW_COLOR |
 302                _NEW_LINE |
 303                _NEW_MULTISAMPLE |
 304                _NEW_POLYGON |
 305                _NEW_PROGRAM_CONSTANTS,
 306       .brw   = BRW_NEW_BATCH |
 307                BRW_NEW_FRAGMENT_PROGRAM |
 308                BRW_NEW_PUSH_CONSTANT_ALLOCATION,
 309       .cache = CACHE_NEW_WM_PROG
 310    },
 311    .emit = upload_wm_state,
 312 };