src/mesa/drivers/dri/i965/brw_vs.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/compiler.h"
  34 #include "brw_context.h"
  35 #include "brw_vs.h"
  36 #include "brw_util.h"
  37 #include "brw_state.h"
  38 #include "program/prog_print.h"
  39 #include "program/prog_parameter.h"
  40
  41 #include "glsl/ralloc.h"
  42
  43 static inline void assign_vue_slot(struct brw_vue_map *vue_map,
  44                                    int vert_result)
  45 {
  46    /* Make sure this vert_result hasn't been assigned a slot already */
  47    assert (vue_map->vert_result_to_slot[vert_result] == -1);
  48
  49    vue_map->vert_result_to_slot[vert_result] = vue_map->num_slots;
  50    vue_map->slot_to_vert_result[vue_map->num_slots++] = vert_result;
  51 }
  52
  53 /**
  54  * Compute the VUE map for vertex shader program.
  55  */
  56 void
  57 brw_compute_vue_map(struct brw_vue_map *vue_map,
  58                     const struct intel_context *intel, int nr_userclip,
  59                     bool two_side_color, GLbitfield64 outputs_written)
  60 {
  61    int i;
  62
  63    vue_map->num_slots = 0;
  64    for (i = 0; i < BRW_VERT_RESULT_MAX; ++i) {
  65       vue_map->vert_result_to_slot[i] = -1;
  66       vue_map->slot_to_vert_result[i] = BRW_VERT_RESULT_MAX;
  67    }
  68
  69    /* VUE header: format depends on chip generation and whether clipping is
  70     * enabled.
  71     */
  72    switch (intel->gen) {
  73    case 4:
  74       /* There are 8 dwords in VUE header pre-Ironlake:
  75        * dword 0-3 is indices, point width, clip flags.
  76        * dword 4-7 is ndc position
  77        * dword 8-11 is the first vertex data.
  78        */
  79       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  80       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  81       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
  82       break;
  83    case 5:
  84       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
  85        * dword 0-3 of the header is indices, point width, clip flags.
  86        * dword 4-7 is the ndc position
  87        * dword 8-11 of the vertex header is the 4D space position
  88        * dword 12-19 of the vertex header is the user clip distance.
  89        * dword 20-23 is a pad so that the vertex element data is aligned
  90        * dword 24-27 is the first vertex data we fill.
  91        *
  92        * Note: future pipeline stages expect 4D space position to be
  93        * contiguous with the other vert_results, so we make dword 24-27 a
  94        * duplicate copy of the 4D space position.
  95        */
  96       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
  97       assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
  98       assign_vue_slot(vue_map, BRW_VERT_RESULT_HPOS_DUPLICATE);
  99       assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
 100       assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
 101       assign_vue_slot(vue_map, BRW_VERT_RESULT_PAD);
 102       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 103       break;
 104    case 6:
 105    case 7:
 106       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
 107        * dword 0-3 of the header is indices, point width, clip flags.
 108        * dword 4-7 is the 4D space position
 109        * dword 8-15 of the vertex header is the user clip distance if
 110        * enabled.
 111        * dword 8-11 or 16-19 is the first vertex element data we fill.
 112        */
 113       assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
 114       assign_vue_slot(vue_map, VERT_RESULT_HPOS);
 115       if (nr_userclip) {
 116          assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
 117          assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
 118       }
 119       /* front and back colors need to be consecutive so that we can use
 120        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
 121        * two-sided color.
 122        */
 123       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0))
 124          assign_vue_slot(vue_map, VERT_RESULT_COL0);
 125       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))
 126          assign_vue_slot(vue_map, VERT_RESULT_BFC0);
 127       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1))
 128          assign_vue_slot(vue_map, VERT_RESULT_COL1);
 129       if (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))
 130          assign_vue_slot(vue_map, VERT_RESULT_BFC1);
 131       break;
 132    default:
 133       assert (!"VUE map not known for this chip generation");
 134       break;
 135    }
 136
 137    /* The hardware doesn't care about the rest of the vertex outputs, so just
 138     * assign them contiguously.  Don't reassign outputs that already have a
 139     * slot.
 140     */
 141    for (int i = 0; i < VERT_RESULT_MAX; ++i) {
 142       if ((outputs_written & BITFIELD64_BIT(i)) &&
 143           vue_map->vert_result_to_slot[i] == -1) {
 144          assign_vue_slot(vue_map, i);
 145       }
 146    }
 147 }
 148
 149 static bool
 150 do_vs_prog(struct brw_context *brw,
 151            struct gl_shader_program *prog,
 152            struct brw_vertex_program *vp,
 153            struct brw_vs_prog_key *key)
 154 {
 155    struct gl_context *ctx = &brw->intel.ctx;
 156    struct intel_context *intel = &brw->intel;
 157    GLuint program_size;
 158    const GLuint *program;
 159    struct brw_vs_compile c;
 160    void *mem_ctx;
 161    int aux_size;
 162    int i;
 163
 164    memset(&c, 0, sizeof(c));
 165    memcpy(&c.key, key, sizeof(*key));
 166
 167    mem_ctx = ralloc_context(NULL);
 168
 169    brw_init_compile(brw, &c.func, mem_ctx);
 170    c.vp = vp;
 171
 172    c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
 173    c.prog_data.inputs_read = vp->program.Base.InputsRead;
 174
 175    if (c.key.copy_edgeflag) {
 176       c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
 177       c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
 178    }
 179
 180    /* Put dummy slots into the VUE for the SF to put the replaced
 181     * point sprite coords in.  We shouldn't need these dummy slots,
 182     * which take up precious URB space, but it would mean that the SF
 183     * doesn't get nice aligned pairs of input coords into output
 184     * coords, which would be a pain to handle.
 185     */
 186    for (i = 0; i < 8; i++) {
 187       if (c.key.point_coord_replace & (1 << i))
 188          c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
 189    }
 190
 191    if (0) {
 192       _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
 193                                GL_TRUE);
 194    }
 195
 196    /* Emit GEN4 code.
 197     */
 198    if (brw->new_vs_backend && prog) {
 199       if (!brw_vs_emit(prog, &c)) {
 200          ralloc_free(mem_ctx);
 201          return false;
 202       }
 203    } else {
 204       brw_old_vs_emit(&c);
 205    }
 206
 207    /* Scratch space is used for register spilling */
 208    if (c.last_scratch) {
 209       c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
 210
 211       brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
 212                          c.prog_data.total_scratch * brw->vs_max_threads);
 213    }
 214
 215    /* get the program
 216     */
 217    program = brw_get_program(&c.func, &program_size);
 218
 219    /* We upload from &c.prog_data including the constant_map assuming
 220     * they're packed together.  It would be nice to have a
 221     * compile-time assert macro here.
 222     */
 223    assert(c.constant_map == (int8_t *)&c.prog_data +
 224           sizeof(c.prog_data));
 225    assert(ctx->Const.VertexProgram.MaxNativeParameters ==
 226           ARRAY_SIZE(c.constant_map));
 227    (void) ctx;
 228
 229    aux_size = sizeof(c.prog_data);
 230    /* constant_map */
 231    aux_size += c.vp->program.Base.Parameters->NumParameters;
 232
 233    brw_upload_cache(&brw->cache, BRW_VS_PROG,
 234                     &c.key, sizeof(c.key),
 235                     program, program_size,
 236                     &c.prog_data, aux_size,
 237                     &brw->vs.prog_offset, &brw->vs.prog_data);
 238    ralloc_free(mem_ctx);
 239
 240    return true;
 241 }
 242
 243
 244 static void brw_upload_vs_prog(struct brw_context *brw)
 245 {
 246    struct gl_context *ctx = &brw->intel.ctx;
 247    struct brw_vs_prog_key key;
 248    struct brw_vertex_program *vp =
 249       (struct brw_vertex_program *)brw->vertex_program;
 250    int i;
 251
 252    memset(&key, 0, sizeof(key));
 253
 254    /* Just upload the program verbatim for now.  Always send it all
 255     * the inputs it asks for, whether they are varying or not.
 256     */
 257    key.program_string_id = vp->id;
 258    key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
 259    key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 260                         ctx->Polygon.BackMode != GL_FILL);
 261    key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
 262
 263    /* _NEW_LIGHT | _NEW_BUFFERS */
 264    key.clamp_vertex_color = ctx->Light._ClampVertexColor;
 265
 266    /* _NEW_POINT */
 267    if (ctx->Point.PointSprite) {
 268       for (i = 0; i < 8; i++) {
 269          if (ctx->Point.CoordReplace[i])
 270             key.point_coord_replace |= (1 << i);
 271       }
 272    }
 273
 274    /* BRW_NEW_VERTICES */
 275    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
 276       if (vp->program.Base.InputsRead & (1 << i) &&
 277           brw->vb.inputs[i].glarray->Type == GL_FIXED) {
 278          key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size;
 279       }
 280    }
 281
 282    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 283                          &key, sizeof(key),
 284                          &brw->vs.prog_offset, &brw->vs.prog_data)) {
 285       bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
 286                                 vp, &key);
 287
 288       assert(success);
 289    }
 290    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 291                            sizeof(*brw->vs.prog_data));
 292 }
 293
 294 /* See brw_vs.c:
 295  */
 296 const struct brw_tracked_state brw_vs_prog = {
 297    .dirty = {
 298       .mesa  = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
 299                 _NEW_BUFFERS),
 300       .brw   = (BRW_NEW_VERTEX_PROGRAM |
 301                 BRW_NEW_VERTICES),
 302       .cache = 0
 303    },
 304    .prepare = brw_upload_vs_prog
 305 };
 306
 307 bool
 308 brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 309 {
 310    struct brw_context *brw = brw_context(ctx);
 311    struct brw_vs_prog_key key;
 312    struct gl_vertex_program *vp = prog->VertexProgram;
 313    struct brw_vertex_program *bvp = brw_vertex_program(vp);
 314    uint32_t old_prog_offset = brw->vs.prog_offset;
 315    struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
 316    bool success;
 317
 318    if (!vp)
 319       return true;
 320
 321    memset(&key, 0, sizeof(key));
 322
 323    key.program_string_id = bvp->id;
 324    key.clamp_vertex_color = true;
 325
 326    success = do_vs_prog(brw, prog, bvp, &key);
 327
 328    brw->vs.prog_offset = old_prog_offset;
 329    brw->vs.prog_data = old_prog_data;
 330
 331    return success;
 332 }