src/mesa/drivers/dri/i965/brw_vs.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/compiler.h"
  34 #include "brw_context.h"
  35 #include "brw_vs.h"
  36 #include "brw_util.h"
  37 #include "brw_state.h"
  38 #include "program/prog_print.h"
  39 #include "program/prog_parameter.h"
  40
  41 #include "glsl/ralloc.h"
  42
  43 static inline void assign_vue_slot(struct brw_vue_map *vue_map,
  44                                    int vert_result)
  45 {
  46    /* Make sure this vert_result hasn't been assigned a slot already */
  47    assert (vue_map->vert_result_to_slot[vert_result] == -1);
  48
  49    vue_map->vert_result_to_slot[vert_result] = vue_map->num_slots;
  50    vue_map->slot_to_vert_result[vue_map->num_slots++] = vert_result;
  51 }
  52
  53 /**
  54  * Compute the VUE map for vertex shader program.
  55  */
  56 void
  57 brw_compute_vue_map(struct brw_vue_map *vue_map,
  58                     const struct intel_context *intel, int nr_userclip,
  59                     bool two_side_color, GLbitfield64 outputs_written)
  60 {
  61    int i;
  62
  63    vue_map->num_slots = 0;
  64    for (i = 0; i < BRW_VERT_RESULT_MAX; ++i) {
  65       vue_map->vert_result_to_slot[i] = -1;
  66       vue_map->slot_to_vert_result[i] = BRW_VERT_RESULT_MAX;
  67    }
  68
  69    /* VUE header: format depends on chip generation and whether clipping is
  70     * enabled.
  71     */
  72    switch (intel->gen) {
  73    case 4:
  74       /* There are 8 dwords in VUE header pre-Ironlake:
  75        * dword 0-3 is indices, point width, clip flags.
  76        * dword 4-7 is ndc position
  77        * dword 8-11 is the first vertex data.
  78        */
  79       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  80       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  81       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
  82       break;
  83    case 5:
  84       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
  85        * dword 0-3 of the header is indices, point width, clip flags.
  86        * dword 4-7 is the ndc position
  87        * dword 8-11 of the vertex header is the 4D space position
  88        * dword 12-19 of the vertex header is the user clip distance.
  89        * dword 20-23 is a pad so that the vertex element data is aligned
  90        * dword 24-27 is the first vertex data we fill.
  91        *
  92        * Note: future pipeline stages expect 4D space position to be
  93        * contiguous with the other vert_results, so we make dword 24-27 a
  94        * duplicate copy of the 4D space position.
  95        */
  96       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  97       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  98       assign_vue_slot(vue_map, BRW_VERT_RESULT_HPOS_DUPLICATE);
  99       assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
 100       assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
 101       assign_vue_slot(vue_map, BRW_VERT_RESULT_PAD);
 102       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 103       break;
 104    case 6:
 105    case 7:
 106       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
 107        * dword 0-3 of the header is indices, point width, clip flags.
 108        * dword 4-7 is the 4D space position
 109        * dword 8-15 of the vertex header is the user clip distance if
 110        * enabled.
 111        * dword 8-11 or 16-19 is the first vertex element data we fill.
 112        */
 113       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
 114       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 115       if (nr_userclip) {
 116          assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
 117          assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
 118       }
 119       if (two_side_color) {
 120          /* front and back colors need to be consecutive */
 121          if ((outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
 122              (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
 123             assert(outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
 124             assert(outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
 125             assign_vue_slot(vue_map, VERT_RESULT_COL0);
 126             assign_vue_slot(vue_map, VERT_RESULT_BFC0);
 127             assign_vue_slot(vue_map, VERT_RESULT_COL1);
 128             assign_vue_slot(vue_map, VERT_RESULT_BFC1);
 129          } else if ((outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
 130                     (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) {
 131             assign_vue_slot(vue_map, VERT_RESULT_COL0);
 132             assign_vue_slot(vue_map, VERT_RESULT_BFC0);
 133          }
 134       }
 135       break;
 136    default:
 137       assert (!"VUE map not known for this chip generation");
 138       break;
 139    }
 140
 141    /* The hardware doesn't care about the rest of the vertex outputs, so just
 142     * assign them contiguously.  Don't reassign outputs that already have a
 143     * slot.
 144     */
 145    for (int i = 0; i < VERT_RESULT_MAX; ++i) {
 146       if ((outputs_written & BITFIELD64_BIT(i)) &&
 147           vue_map->vert_result_to_slot[i] == -1) {
 148          assign_vue_slot(vue_map, i);
 149       }
 150    }
 151 }
 152
 153 static bool
 154 do_vs_prog(struct brw_context *brw,
 155            struct gl_shader_program *prog,
 156            struct brw_vertex_program *vp,
 157            struct brw_vs_prog_key *key)
 158 {
 159    struct gl_context *ctx = &brw->intel.ctx;
 160    struct intel_context *intel = &brw->intel;
 161    GLuint program_size;
 162    const GLuint *program;
 163    struct brw_vs_compile c;
 164    void *mem_ctx;
 165    int aux_size;
 166    int i;
 167
 168    memset(&c, 0, sizeof(c));
 169    memcpy(&c.key, key, sizeof(*key));
 170
 171    mem_ctx = ralloc_context(NULL);
 172
 173    brw_init_compile(brw, &c.func, mem_ctx);
 174    c.vp = vp;
 175
 176    c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
 177    c.prog_data.inputs_read = vp->program.Base.InputsRead;
 178
 179    if (c.key.copy_edgeflag) {
 180       c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
 181       c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
 182    }
 183
 184    /* Put dummy slots into the VUE for the SF to put the replaced
 185     * point sprite coords in.  We shouldn't need these dummy slots,
 186     * which take up precious URB space, but it would mean that the SF
 187     * doesn't get nice aligned pairs of input coords into output
 188     * coords, which would be a pain to handle.
 189     */
 190    for (i = 0; i < 8; i++) {
 191       if (c.key.point_coord_replace & (1 << i))
 192          c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
 193    }
 194
 195    if (0) {
 196       _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
 197                                GL_TRUE);
 198    }
 199
 200    /* Emit GEN4 code.
 201     */
 202    if (brw->new_vs_backend && prog) {
 203       if (!brw_vs_emit(prog, &c)) {
 204          ralloc_free(mem_ctx);
 205          return false;
 206       }
 207    } else {
 208       brw_old_vs_emit(&c);
 209    }
 210
 211    /* Scratch space is used for register spilling */
 212    if (c.last_scratch) {
 213       c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
 214
 215       brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
 216                          c.prog_data.total_scratch * brw->vs_max_threads);
 217    }
 218
 219    /* get the program
 220     */
 221    program = brw_get_program(&c.func, &program_size);
 222
 223    /* We upload from &c.prog_data including the constant_map assuming
 224     * they're packed together.  It would be nice to have a
 225     * compile-time assert macro here.
 226     */
 227    assert(c.constant_map == (int8_t *)&c.prog_data +
 228           sizeof(c.prog_data));
 229    assert(ctx->Const.VertexProgram.MaxNativeParameters ==
 230           ARRAY_SIZE(c.constant_map));
 231    (void) ctx;
 232
 233    aux_size = sizeof(c.prog_data);
 234    /* constant_map */
 235    aux_size += c.vp->program.Base.Parameters->NumParameters;
 236
 237    brw_upload_cache(&brw->cache, BRW_VS_PROG,
 238                     &c.key, sizeof(c.key),
 239                     program, program_size,
 240                     &c.prog_data, aux_size,
 241                     &brw->vs.prog_offset, &brw->vs.prog_data);
 242    ralloc_free(mem_ctx);
 243
 244    return true;
 245 }
 246
 247
 248 static void brw_upload_vs_prog(struct brw_context *brw)
 249 {
 250    struct gl_context *ctx = &brw->intel.ctx;
 251    struct brw_vs_prog_key key;
 252    struct brw_vertex_program *vp =
 253       (struct brw_vertex_program *)brw->vertex_program;
 254    int i;
 255
 256    memset(&key, 0, sizeof(key));
 257
 258    /* Just upload the program verbatim for now.  Always send it all
 259     * the inputs it asks for, whether they are varying or not.
 260     */
 261    key.program_string_id = vp->id;
 262    key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
 263    key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 264                         ctx->Polygon.BackMode != GL_FILL);
 265    key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
 266
 267    /* _NEW_LIGHT | _NEW_BUFFERS */
 268    key.clamp_vertex_color = ctx->Light._ClampVertexColor;
 269
 270    /* _NEW_POINT */
 271    if (ctx->Point.PointSprite) {
 272       for (i = 0; i < 8; i++) {
 273          if (ctx->Point.CoordReplace[i])
 274             key.point_coord_replace |= (1 << i);
 275       }
 276    }
 277
 278    /* BRW_NEW_VERTICES */
 279    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
 280       if (vp->program.Base.InputsRead & (1 << i) &&
 281           brw->vb.inputs[i].glarray->Type == GL_FIXED) {
 282          key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size;
 283       }
 284    }
 285
 286    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 287                          &key, sizeof(key),
 288                          &brw->vs.prog_offset, &brw->vs.prog_data)) {
 289       bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
 290                                 vp, &key);
 291
 292       assert(success);
 293    }
 294    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 295                            sizeof(*brw->vs.prog_data));
 296 }
 297
 298 /* See brw_vs.c:
 299  */
 300 const struct brw_tracked_state brw_vs_prog = {
 301    .dirty = {
 302       .mesa  = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
 303                 _NEW_BUFFERS),
 304       .brw   = (BRW_NEW_VERTEX_PROGRAM |
 305                 BRW_NEW_VERTICES),
 306       .cache = 0
 307    },
 308    .prepare = brw_upload_vs_prog
 309 };
 310
 311 bool
 312 brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 313 {
 314    struct brw_context *brw = brw_context(ctx);
 315    struct brw_vs_prog_key key;
 316    struct gl_vertex_program *vp = prog->VertexProgram;
 317    struct brw_vertex_program *bvp = brw_vertex_program(vp);
 318    uint32_t old_prog_offset = brw->vs.prog_offset;
 319    struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
 320    bool success;
 321
 322    if (!vp)
 323       return true;
 324
 325    memset(&key, 0, sizeof(key));
 326
 327    key.program_string_id = bvp->id;
 328    key.clamp_vertex_color = true;
 329
 330    success = do_vs_prog(brw, prog, bvp, &key);
 331
 332    brw->vs.prog_offset = old_prog_offset;
 333    brw->vs.prog_data = old_prog_data;
 334
 335    return success;
 336 }