src/mesa/drivers/dri/i965/gen8_ps_state.c

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <stdbool.h>
  25 #include "program/program.h"
  26 #include "brw_state.h"
  27 #include "brw_defines.h"
  28 #include "intel_batchbuffer.h"
  29
  30 static void
  31 upload_ps_extra(struct brw_context *brw)
  32 {
  33    struct gl_context *ctx = &brw->ctx;
  34    /* BRW_NEW_FRAGMENT_PROGRAM */
  35    const struct brw_fragment_program *fp =
  36       brw_fragment_program_const(brw->fragment_program);
  37    /* BRW_NEW_FS_PROG_DATA */
  38    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
  39    uint32_t dw1 = 0;
  40
  41    dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
  42    dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
  43
  44    if (prog_data->uses_kill)
  45       dw1 |= GEN8_PSX_KILL_ENABLE;
  46
  47    if (prog_data->num_varying_inputs != 0)
  48       dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
  49
  50    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
  51       dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W;
  52
  53    /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
  54    bool multisampled_fbo = brw->num_samples > 1;
  55    if (multisampled_fbo &&
  56        _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1)
  57       dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
  58
  59    if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
  60       dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
  61
  62    if (prog_data->uses_omask)
  63       dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
  64
  65    BEGIN_BATCH(2);
  66    OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
  67    OUT_BATCH(dw1);
  68    ADVANCE_BATCH();
  69 }
  70
  71 const struct brw_tracked_state gen8_ps_extra = {
  72    .dirty = {
  73       .mesa  = _NEW_MULTISAMPLE,
  74       .brw   = BRW_NEW_CONTEXT |
  75                BRW_NEW_FRAGMENT_PROGRAM |
  76                BRW_NEW_FS_PROG_DATA |
  77                BRW_NEW_NUM_SAMPLES,
  78    },
  79    .emit = upload_ps_extra,
  80 };
  81
  82 static void
  83 upload_wm_state(struct brw_context *brw)
  84 {
  85    struct gl_context *ctx = &brw->ctx;
  86    uint32_t dw1 = 0;
  87
  88    dw1 |= GEN7_WM_STATISTICS_ENABLE;
  89    dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
  90    dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
  91    dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
  92
  93    /* _NEW_LINE */
  94    if (ctx->Line.StippleFlag)
  95       dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
  96
  97    /* _NEW_POLYGON */
  98    if (ctx->Polygon.StippleFlag)
  99       dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
 100
 101    /* BRW_NEW_FS_PROG_DATA */
 102    dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
 103       GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 104
 105    BEGIN_BATCH(2);
 106    OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
 107    OUT_BATCH(dw1);
 108    ADVANCE_BATCH();
 109 }
 110
 111 const struct brw_tracked_state gen8_wm_state = {
 112    .dirty = {
 113       .mesa  = _NEW_LINE |
 114                _NEW_POLYGON,
 115       .brw   = BRW_NEW_CONTEXT |
 116                BRW_NEW_FS_PROG_DATA,
 117    },
 118    .emit = upload_wm_state,
 119 };
 120
 121 static void
 122 upload_ps_state(struct brw_context *brw)
 123 {
 124    struct gl_context *ctx = &brw->ctx;
 125    uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
 126
 127    /* BRW_NEW_FS_PROG_DATA */
 128    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
 129
 130    /* Initialize the execution mask with VMask.  Otherwise, derivatives are
 131     * incorrect for subspans where some of the pixels are unlit.  We believe
 132     * the bit just didn't take effect in previous generations.
 133     */
 134    dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;
 135
 136    dw3 |=
 137       (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
 138
 139    /* BRW_NEW_FS_PROG_DATA */
 140    dw3 |=
 141       ((prog_data->base.binding_table.size_bytes / 4) <<
 142        GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 143
 144    if (prog_data->base.use_alt_mode)
 145       dw3 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
 146
 147    /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
 148     * it implicitly scales for different GT levels (which have some # of PSDs).
 149     *
 150     * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
 151     */
 152    if (brw->gen >= 9)
 153       dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT;
 154    else
 155       dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
 156
 157    if (prog_data->base.nr_params > 0)
 158       dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 159
 160    /* From the documentation for this packet:
 161     * "If the PS kernel does not need the Position XY Offsets to
 162     *  compute a Position Value, then this field should be programmed
 163     *  to POSOFFSET_NONE."
 164     *
 165     * "SW Recommendation: If the PS kernel needs the Position Offsets
 166     *  to compute a Position XY value, this field should match Position
 167     *  ZW Interpolation Mode to ensure a consistent position.xyzw
 168     *  computation."
 169     *
 170     * We only require XY sample offsets. So, this recommendation doesn't
 171     * look useful at the moment. We might need this in future.
 172     */
 173    if (brw->wm.prog_data->uses_pos_offset)
 174       dw6 |= GEN7_PS_POSOFFSET_SAMPLE;
 175    else
 176       dw6 |= GEN7_PS_POSOFFSET_NONE;
 177
 178    dw6 |= brw->wm.fast_clear_op;
 179
 180    /* _NEW_MULTISAMPLE
 181     * In case of non 1x per sample shading, only one of SIMD8 and SIMD16
 182     * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
 183     * is successfully compiled. In majority of the cases that bring us
 184     * better performance than 'SIMD8 only' dispatch.
 185     */
 186    int min_invocations_per_fragment =
 187       _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
 188    assert(min_invocations_per_fragment >= 1);
 189
 190    if (prog_data->prog_offset_16 || prog_data->no_8) {
 191       dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
 192       if (!prog_data->no_8 && min_invocations_per_fragment == 1) {
 193          dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
 194          dw7 |= (prog_data->base.dispatch_grf_start_reg <<
 195                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
 196          dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
 197                  GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
 198          ksp0 = brw->wm.base.prog_offset;
 199          ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
 200       } else {
 201          dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
 202                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
 203
 204          ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
 205       }
 206    } else {
 207       dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
 208       dw7 |= (prog_data->base.dispatch_grf_start_reg <<
 209               GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
 210       ksp0 = brw->wm.base.prog_offset;
 211    }
 212
 213    BEGIN_BATCH(12);
 214    OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
 215    OUT_BATCH(ksp0);
 216    OUT_BATCH(0);
 217    OUT_BATCH(dw3);
 218    if (prog_data->base.total_scratch) {
 219       OUT_RELOC64(brw->wm.base.scratch_bo,
 220                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 221                   ffs(prog_data->base.total_scratch) - 11);
 222    } else {
 223       OUT_BATCH(0);
 224       OUT_BATCH(0);
 225    }
 226    OUT_BATCH(dw6);
 227    OUT_BATCH(dw7);
 228    OUT_BATCH(0); /* kernel 1 pointer */
 229    OUT_BATCH(0);
 230    OUT_BATCH(ksp2);
 231    OUT_BATCH(0);
 232    ADVANCE_BATCH();
 233 }
 234
 235 const struct brw_tracked_state gen8_ps_state = {
 236    .dirty = {
 237       .mesa  = _NEW_MULTISAMPLE,
 238       .brw   = BRW_NEW_BATCH |
 239                BRW_NEW_FRAGMENT_PROGRAM |
 240                BRW_NEW_FS_PROG_DATA,
 241    },
 242    .emit = upload_ps_state,
 243 };