src/mesa/drivers/dri/i965/brw_vs.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/compiler.h"
  34 #include "brw_context.h"
  35 #include "brw_vs.h"
  36 #include "brw_util.h"
  37 #include "brw_state.h"
  38 #include "program/prog_print.h"
  39 #include "program/prog_parameter.h"
  40
  41 #include "glsl/ralloc.h"
  42
  43 static inline void assign_vue_slot(struct brw_vue_map *vue_map,
  44                                    int vert_result)
  45 {
  46    /* Make sure this vert_result hasn't been assigned a slot already */
  47    assert (vue_map->vert_result_to_slot[vert_result] == -1);
  48
  49    vue_map->vert_result_to_slot[vert_result] = vue_map->num_slots;
  50    vue_map->slot_to_vert_result[vue_map->num_slots++] = vert_result;
  51 }
  52
  53 /**
  54  * Compute the VUE map for vertex shader program.
  55  */
  56 void
  57 brw_compute_vue_map(struct brw_vue_map *vue_map,
  58                     const struct intel_context *intel, int nr_userclip,
  59                     GLbitfield64 outputs_written)
  60 {
  61    int i;
  62
  63    vue_map->num_slots = 0;
  64    for (i = 0; i < BRW_VERT_RESULT_MAX; ++i) {
  65       vue_map->vert_result_to_slot[i] = -1;
  66       vue_map->slot_to_vert_result[i] = BRW_VERT_RESULT_MAX;
  67    }
  68
  69    /* VUE header: format depends on chip generation and whether clipping is
  70     * enabled.
  71     */
  72    switch (intel->gen) {
  73    case 4:
  74       /* There are 8 dwords in VUE header pre-Ironlake:
  75        * dword 0-3 is indices, point width, clip flags.
  76        * dword 4-7 is ndc position
  77        * dword 8-11 is the first vertex data.
  78        */
  79       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  80       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  81       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
  82       break;
  83    case 5:
  84       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
  85        * dword 0-3 of the header is indices, point width, clip flags.
  86        * dword 4-7 is the ndc position
  87        * dword 8-11 of the vertex header is the 4D space position
  88        * dword 12-19 of the vertex header is the user clip distance.
  89        * dword 20-23 is a pad so that the vertex element data is aligned
  90        * dword 24-27 is the first vertex data we fill.
  91        *
  92        * Note: future pipeline stages expect 4D space position to be
  93        * contiguous with the other vert_results, so we make dword 24-27 a
  94        * duplicate copy of the 4D space position.
  95        */
  96       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  97       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  98       assign_vue_slot(vue_map, BRW_VERT_RESULT_HPOS_DUPLICATE);
  99       assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0);
 100       assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1);
 101       assign_vue_slot(vue_map, BRW_VERT_RESULT_PAD);
 102       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 103       break;
 104    case 6:
 105    case 7:
 106       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
 107        * dword 0-3 of the header is indices, point width, clip flags.
 108        * dword 4-7 is the 4D space position
 109        * dword 8-15 of the vertex header is the user clip distance if
 110        * enabled.
 111        * dword 8-11 or 16-19 is the first vertex element data we fill.
 112        */
 113       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
 114       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 115       if (nr_userclip) {
 116          assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0);
 117          assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1);
 118       }
 119       /* front and back colors need to be consecutive so that we can use
 120        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
 121        * two-sided color.
 122        */
 123       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0))
 124          assign_vue_slot(vue_map, VERT_RESULT_COL0);
 125       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))
 126          assign_vue_slot(vue_map, VERT_RESULT_BFC0);
 127       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1))
 128          assign_vue_slot(vue_map, VERT_RESULT_COL1);
 129       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))
 130          assign_vue_slot(vue_map, VERT_RESULT_BFC1);
 131       break;
 132    default:
 133       assert (!"VUE map not known for this chip generation");
 134       break;
 135    }
 136
 137    /* The hardware doesn't care about the rest of the vertex outputs, so just
 138     * assign them contiguously.  Don't reassign outputs that already have a
 139     * slot.
 140     *
 141     * Also, don't assign a slot for VERT_RESULT_CLIP_VERTEX, since it is
 142     * unsupported in pre-GEN6, and in GEN6+ the vertex shader converts it into
 143     * clip distances.
 144     */
 145    for (int i = 0; i < VERT_RESULT_MAX; ++i) {
 146       if ((outputs_written & BITFIELD64_BIT(i)) &&
 147           vue_map->vert_result_to_slot[i] == -1 &&
 148           i != VERT_RESULT_CLIP_VERTEX) {
 149          assign_vue_slot(vue_map, i);
 150       }
 151    }
 152 }
 153
 154
 155 /**
 156  * Decide which set of clip planes should be used when clipping via
 157  * gl_Position or gl_ClipVertex.
 158  */
 159 gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
 160 {
 161    if (ctx->Shader.CurrentVertexProgram) {
 162       /* There is currently a GLSL vertex shader, so clip according to GLSL
 163        * rules, which means compare gl_ClipVertex (or gl_Position, if
 164        * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
 165        * that were stored in EyeUserPlane at the time the clip planes were
 166        * specified.
 167        */
 168       return ctx->Transform.EyeUserPlane;
 169    } else {
 170       /* Either we are using fixed function or an ARB vertex program.  In
 171        * either case the clip planes are going to be compared against
 172        * gl_Position (which is in clip coordinates) so we have to clip using
 173        * _ClipUserPlane, which was transformed into clip coordinates by Mesa
 174        * core.
 175        */
 176       return ctx->Transform._ClipUserPlane;
 177    }
 178 }
 179
 180
 181 static bool
 182 do_vs_prog(struct brw_context *brw,
 183            struct gl_shader_program *prog,
 184            struct brw_vertex_program *vp,
 185            struct brw_vs_prog_key *key)
 186 {
 187    struct gl_context *ctx = &brw->intel.ctx;
 188    struct intel_context *intel = &brw->intel;
 189    GLuint program_size;
 190    const GLuint *program;
 191    struct brw_vs_compile c;
 192    void *mem_ctx;
 193    int aux_size;
 194    int i;
 195
 196    memset(&c, 0, sizeof(c));
 197    memcpy(&c.key, key, sizeof(*key));
 198
 199    mem_ctx = ralloc_context(NULL);
 200
 201    brw_init_compile(brw, &c.func, mem_ctx);
 202    c.vp = vp;
 203
 204    c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
 205    c.prog_data.inputs_read = vp->program.Base.InputsRead;
 206
 207    if (c.key.copy_edgeflag) {
 208       c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
 209       c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
 210    }
 211
 212    /* Put dummy slots into the VUE for the SF to put the replaced
 213     * point sprite coords in.  We shouldn't need these dummy slots,
 214     * which take up precious URB space, but it would mean that the SF
 215     * doesn't get nice aligned pairs of input coords into output
 216     * coords, which would be a pain to handle.
 217     */
 218    for (i = 0; i < 8; i++) {
 219       if (c.key.point_coord_replace & (1 << i))
 220          c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
 221    }
 222
 223    if (0) {
 224       _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
 225                                GL_TRUE);
 226    }
 227
 228    /* Emit GEN4 code.
 229     */
 230    if (brw->new_vs_backend && prog) {
 231       if (!brw_vs_emit(prog, &c)) {
 232          ralloc_free(mem_ctx);
 233          return false;
 234       }
 235    } else {
 236       brw_old_vs_emit(&c);
 237    }
 238
 239    /* Scratch space is used for register spilling */
 240    if (c.last_scratch) {
 241       c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
 242
 243       brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
 244                          c.prog_data.total_scratch * brw->vs_max_threads);
 245    }
 246
 247    /* get the program
 248     */
 249    program = brw_get_program(&c.func, &program_size);
 250
 251    /* We upload from &c.prog_data including the constant_map assuming
 252     * they're packed together.  It would be nice to have a
 253     * compile-time assert macro here.
 254     */
 255    assert(c.constant_map == (int8_t *)&c.prog_data +
 256           sizeof(c.prog_data));
 257    assert(ctx->Const.VertexProgram.MaxNativeParameters ==
 258           ARRAY_SIZE(c.constant_map));
 259    (void) ctx;
 260
 261    aux_size = sizeof(c.prog_data);
 262    /* constant_map */
 263    aux_size += c.vp->program.Base.Parameters->NumParameters;
 264
 265    brw_upload_cache(&brw->cache, BRW_VS_PROG,
 266                     &c.key, sizeof(c.key),
 267                     program, program_size,
 268                     &c.prog_data, aux_size,
 269                     &brw->vs.prog_offset, &brw->vs.prog_data);
 270    ralloc_free(mem_ctx);
 271
 272    return true;
 273 }
 274
 275
 276 static void brw_upload_vs_prog(struct brw_context *brw)
 277 {
 278    struct gl_context *ctx = &brw->intel.ctx;
 279    struct brw_vs_prog_key key;
 280    struct brw_vertex_program *vp =
 281       (struct brw_vertex_program *)brw->vertex_program;
 282    int i;
 283
 284    memset(&key, 0, sizeof(key));
 285
 286    /* Just upload the program verbatim for now.  Always send it all
 287     * the inputs it asks for, whether they are varying or not.
 288     */
 289    key.program_string_id = vp->id;
 290    key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
 291    key.uses_clip_distance = vp->program.UsesClipDistance;
 292    key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 293                         ctx->Polygon.BackMode != GL_FILL);
 294
 295    /* _NEW_LIGHT | _NEW_BUFFERS */
 296    key.clamp_vertex_color = ctx->Light._ClampVertexColor;
 297
 298    /* _NEW_POINT */
 299    if (ctx->Point.PointSprite) {
 300       for (i = 0; i < 8; i++) {
 301          if (ctx->Point.CoordReplace[i])
 302             key.point_coord_replace |= (1 << i);
 303       }
 304    }
 305
 306    /* BRW_NEW_VERTICES */
 307    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
 308       if (vp->program.Base.InputsRead & (1 << i) &&
 309           brw->vb.inputs[i].glarray->Type == GL_FIXED) {
 310          key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size;
 311       }
 312    }
 313
 314    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 315                          &key, sizeof(key),
 316                          &brw->vs.prog_offset, &brw->vs.prog_data)) {
 317       bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
 318                                 vp, &key);
 319
 320       assert(success);
 321    }
 322    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 323                            sizeof(*brw->vs.prog_data));
 324 }
 325
 326 /* See brw_vs.c:
 327  */
 328 const struct brw_tracked_state brw_vs_prog = {
 329    .dirty = {
 330       .mesa  = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
 331                 _NEW_BUFFERS),
 332       .brw   = (BRW_NEW_VERTEX_PROGRAM |
 333                 BRW_NEW_VERTICES),
 334       .cache = 0
 335    },
 336    .prepare = brw_upload_vs_prog
 337 };
 338
 339 bool
 340 brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 341 {
 342    struct brw_context *brw = brw_context(ctx);
 343    struct brw_vs_prog_key key;
 344    struct gl_vertex_program *vp = prog->VertexProgram;
 345    struct brw_vertex_program *bvp = brw_vertex_program(vp);
 346    uint32_t old_prog_offset = brw->vs.prog_offset;
 347    struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
 348    bool success;
 349
 350    if (!vp)
 351       return true;
 352
 353    memset(&key, 0, sizeof(key));
 354
 355    key.program_string_id = bvp->id;
 356    key.clamp_vertex_color = true;
 357
 358    success = do_vs_prog(brw, prog, bvp, &key);
 359
 360    brw->vs.prog_offset = old_prog_offset;
 361    brw->vs.prog_data = old_prog_data;
 362
 363    return success;
 364 }