src/gallium/drivers/ilo/ilo_render_media.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2014 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "genhw/genhw.h"
  29 #include "core/ilo_builder_media.h"
  30 #include "core/ilo_builder_mi.h"
  31 #include "core/ilo_builder_render.h"
  32
  33 #include "ilo_state.h"
  34 #include "ilo_render_gen.h"
  35
  36 struct gen7_l3_config {
  37    int slm;
  38    int urb;
  39    int rest;
  40    int dc;
  41    int ro;
  42    int is;
  43    int c;
  44    int t;
  45 };
  46
  47 /*
  48  * From the Ivy Bridge PRM, volume 1 part 7, page 10:
  49  *
  50  *     "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to
  51  *      distribute cycles. The following allocation is a suggested programming
  52  *      model. Note all numbers below are given in KBytes."
  53  *
  54  * From the Haswell PRM, volume 7, page 662:
  55  *
  56  *     "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C =
  57  *      0,T =0, SUM 512} was validated as a later supported configuration and
  58  *      can be utilized if desired."
  59  */
  60 static const struct gen7_l3_config gen7_l3_non_slm_configs[] = {
  61    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
  62    [0] = {    0,  256,    0,    0,  256,    0,    0,    0, },
  63    [1] = {    0,  256,    0,  128,  128,    0,    0,    0, },
  64    [2] = {    0,  256,    0,   32,    0,   64,   32,  128, },
  65    [3] = {    0,  224,    0,   64,    0,   64,   32,  128, },
  66    [4] = {    0,  224,    0,  128,    0,   64,   32,   64, },
  67    [5] = {    0,  224,    0,   64,    0,  128,   32,   64, },
  68    [6] = {    0,  224,    0,    0,    0,  128,   32,  128, },
  69    [7] = {    0,  256,    0,    0,    0,  128,    0,  128, },
  70
  71    [8] = {    0,  224,    0,   32,  256,    0,    0,    0, },
  72 };
  73
  74 /*
  75  * From the Ivy Bridge PRM, volume 1 part 7, page 11:
  76  *
  77  *     "With the existence of Shared Local Memory, a 64KB chunk from each of
  78  *      the 2 L3 banks will be reserved for SLM usage. The remaining cache
  79  *      space is divided between the remaining clients. SLM allocation is done
  80  *      via reducing the number of ways on the two banks from 64 to 32."
  81  *
  82  * From the Haswell PRM, volume 7, page 662:
  83  *
  84  *     "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C =
  85  *      0,T =0, SUM 512} was validated as a later supported configuration and
  86  *      can be utilized if desired. For this configuration, global atomics
  87  *      must be programmed to be in GTI."
  88  */
  89 static const struct gen7_l3_config gen7_l3_slm_configs[] = {
  90    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
  91    [0] = {  128,  128,    0,  128,  128,    0,    0,    0, },
  92    [1] = {  128,  128,    0,   64,    0,   64,   64,   64, },
  93    [2] = {  128,  128,    0,   32,    0,   64,   32,  128, },
  94    [3] = {  128,  128,    0,   32,    0,  128,   32,   64, },
  95
  96    [4] = {  128,  128,    0,    0,  256,    0,    0,    0, },
  97 };
  98
  99 static void
 100 gen7_launch_grid_l3(struct ilo_render *r, bool use_slm)
 101 {
 102    uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3;
 103    const struct gen7_l3_config *conf;
 104
 105    /*
 106     * This function mostly follows what beignet does.  I do not know why, for
 107     * example, CON4DCUNC should be reset.  I do not know if it should be set
 108     * again after launch_grid().
 109     */
 110
 111    ILO_DEV_ASSERT(r->dev, 7, 7.5);
 112
 113    if (use_slm)
 114       conf = &gen7_l3_slm_configs[1];
 115    else
 116       conf = &gen7_l3_non_slm_configs[4];
 117
 118    /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */
 119    if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
 120       l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 |
 121                   GEN75_REG_L3SQCREG1_SQHPCI_8;
 122    } else {
 123       l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6;
 124    }
 125
 126    l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT |
 127                 (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT |
 128                 (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT;
 129
 130    l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT |
 131                 (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT |
 132                 (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT;
 133
 134    if (conf->slm) {
 135       /*
 136        * From the Ivy Bridge PRM, volume 1 part 7, page 11:
 137        *
 138        *     "Note that URB needs to be set as low b/w client in SLM mode,
 139        *      else the hash will fail. This is a required s/w model."
 140        */
 141       l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB |
 142                     GEN7_REG_L3CNTLREG2_SLMMENB;
 143    }
 144
 145    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1);
 146    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2);
 147    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3);
 148 }
 149
 150 int
 151 ilo_render_get_launch_grid_commands_len(const struct ilo_render *render,
 152                                         const struct ilo_state_vector *vec)
 153 {
 154    static int len;
 155
 156    ILO_DEV_ASSERT(render->dev, 7, 7.5);
 157
 158    if (!len) {
 159       len +=
 160          GEN6_PIPELINE_SELECT__SIZE +
 161          GEN6_STATE_BASE_ADDRESS__SIZE +
 162          GEN6_MEDIA_VFE_STATE__SIZE +
 163          GEN6_MEDIA_CURBE_LOAD__SIZE +
 164          GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE +
 165          GEN6_MEDIA_STATE_FLUSH__SIZE;
 166
 167       len += ilo_render_get_flush_len(render) * 3;
 168
 169       if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
 170          len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2;
 171          len += GEN7_GPGPU_WALKER__SIZE;
 172       }
 173    }
 174
 175    return len;
 176 }
 177
 178 void
 179 ilo_render_emit_launch_grid_commands(struct ilo_render *render,
 180                                      const struct ilo_state_vector *vec,
 181                                      const struct ilo_render_launch_grid_session *session)
 182 {
 183    const unsigned batch_used = ilo_builder_batch_used(render->builder);
 184    const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER;
 185    const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size;
 186    int simd_size;
 187    bool use_slm;
 188
 189    ILO_DEV_ASSERT(render->dev, 7, 7.5);
 190
 191    simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE);
 192    use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE);
 193
 194    ilo_render_emit_flush(render);
 195
 196    if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
 197       gen7_launch_grid_l3(render, use_slm);
 198       ilo_render_emit_flush(render);
 199
 200       gen6_PIPELINE_SELECT(render->builder,
 201             GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU);
 202    } else {
 203       gen6_PIPELINE_SELECT(render->builder,
 204             GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA);
 205    }
 206
 207    gen6_state_base_address(render->builder, true);
 208
 209    gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
 210
 211    if (pcb_size)
 212       gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
 213
 214    gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder,
 215          session->idrt, session->idrt_size);
 216
 217    gen7_GPGPU_WALKER(render->builder, session->thread_group_offset,
 218          session->thread_group_dim, session->thread_group_size, simd_size);
 219
 220    gen6_MEDIA_STATE_FLUSH(render->builder);
 221
 222    if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) {
 223       ilo_render_emit_flush(render);
 224       gen7_launch_grid_l3(render, false);
 225    }
 226
 227    assert(ilo_builder_batch_used(render->builder) <= batch_used +
 228          ilo_render_get_launch_grid_commands_len(render, vec));
 229 }