src/mesa/drivers/dri/i965/brw_vs.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/compiler.h"
  34 #include "brw_context.h"
  35 #include "brw_vs.h"
  36 #include "brw_util.h"
  37 #include "brw_state.h"
  38 #include "program/prog_print.h"
  39 #include "program/prog_parameter.h"
  40
  41 #include "glsl/ralloc.h"
  42
  43 static inline void assign_vue_slot(struct brw_vue_map *vue_map,
  44                                    int varying)
  45 {
  46    /* Make sure this varying hasn't been assigned a slot already */
  47    assert (vue_map->varying_to_slot[varying] == -1);
  48
  49    vue_map->varying_to_slot[varying] = vue_map->num_slots;
  50    vue_map->slot_to_varying[vue_map->num_slots++] = varying;
  51 }
  52
  53 /**
  54  * Compute the VUE map for vertex shader program.
  55  *
  56  * Note that consumers of this map using cache keys must include
  57  * prog_data->userclip and prog_data->outputs_written in their key
  58  * (generated by CACHE_NEW_VS_PROG).
  59  */
  60 void
  61 brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
  62                     GLbitfield64 slots_valid, bool userclip_active)
  63 {
  64    const struct intel_context *intel = &brw->intel;
  65
  66    vue_map->slots_valid = slots_valid;
  67    int i;
  68
  69    /* Make sure that the values we store in vue_map->varying_to_slot and
  70     * vue_map->slot_to_varying won't overflow the signed chars that are used
  71     * to store them.  Note that since vue_map->slot_to_varying sometimes holds
  72     * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
  73     * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
  74     */
  75    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
  76
  77    vue_map->num_slots = 0;
  78    for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
  79       vue_map->varying_to_slot[i] = -1;
  80       vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
  81    }
  82
  83    /* VUE header: format depends on chip generation and whether clipping is
  84     * enabled.
  85     */
  86    switch (intel->gen) {
  87    case 4:
  88       /* There are 8 dwords in VUE header pre-Ironlake:
  89        * dword 0-3 is indices, point width, clip flags.
  90        * dword 4-7 is ndc position
  91        * dword 8-11 is the first vertex data.
  92        */
  93       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
  94       assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
  95       assign_vue_slot(vue_map, VARYING_SLOT_POS);
  96       break;
  97    case 5:
  98       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
  99        * dword 0-3 of the header is indices, point width, clip flags.
 100        * dword 4-7 is the ndc position
 101        * dword 8-11 of the vertex header is the 4D space position
 102        * dword 12-19 of the vertex header is the user clip distance.
 103        * dword 20-23 is a pad so that the vertex element data is aligned
 104        * dword 24-27 is the first vertex data we fill.
 105        *
 106        * Note: future pipeline stages expect 4D space position to be
 107        * contiguous with the other varyings, so we make dword 24-27 a
 108        * duplicate copy of the 4D space position.
 109        */
 110       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
 111       assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
 112       assign_vue_slot(vue_map, BRW_VARYING_SLOT_POS_DUPLICATE);
 113       assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
 114       assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
 115       assign_vue_slot(vue_map, BRW_VARYING_SLOT_PAD);
 116       assign_vue_slot(vue_map, VARYING_SLOT_POS);
 117       break;
 118    case 6:
 119    case 7:
 120       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
 121        * dword 0-3 of the header is indices, point width, clip flags.
 122        * dword 4-7 is the 4D space position
 123        * dword 8-15 of the vertex header is the user clip distance if
 124        * enabled.
 125        * dword 8-11 or 16-19 is the first vertex element data we fill.
 126        */
 127       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
 128       assign_vue_slot(vue_map, VARYING_SLOT_POS);
 129       if (userclip_active) {
 130          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
 131          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
 132       }
 133       /* front and back colors need to be consecutive so that we can use
 134        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
 135        * two-sided color.
 136        */
 137       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
 138          assign_vue_slot(vue_map, VARYING_SLOT_COL0);
 139       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
 140          assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
 141       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
 142          assign_vue_slot(vue_map, VARYING_SLOT_COL1);
 143       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
 144          assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
 145       break;
 146    default:
 147       assert (!"VUE map not known for this chip generation");
 148       break;
 149    }
 150
 151    /* The hardware doesn't care about the rest of the vertex outputs, so just
 152     * assign them contiguously.  Don't reassign outputs that already have a
 153     * slot.
 154     *
 155     * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
 156     * since it's encoded as the clip distances by emit_clip_distances().
 157     * However, it may be output by transform feedback, and we'd rather not
 158     * recompute state when TF changes, so we just always include it.
 159     */
 160    for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
 161       if ((slots_valid & BITFIELD64_BIT(i)) &&
 162           vue_map->varying_to_slot[i] == -1) {
 163          assign_vue_slot(vue_map, i);
 164       }
 165    }
 166 }
 167
 168
 169 /**
 170  * Decide which set of clip planes should be used when clipping via
 171  * gl_Position or gl_ClipVertex.
 172  */
 173 gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
 174 {
 175    if (ctx->Shader.CurrentVertexProgram) {
 176       /* There is currently a GLSL vertex shader, so clip according to GLSL
 177        * rules, which means compare gl_ClipVertex (or gl_Position, if
 178        * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
 179        * that were stored in EyeUserPlane at the time the clip planes were
 180        * specified.
 181        */
 182       return ctx->Transform.EyeUserPlane;
 183    } else {
 184       /* Either we are using fixed function or an ARB vertex program.  In
 185        * either case the clip planes are going to be compared against
 186        * gl_Position (which is in clip coordinates) so we have to clip using
 187        * _ClipUserPlane, which was transformed into clip coordinates by Mesa
 188        * core.
 189        */
 190       return ctx->Transform._ClipUserPlane;
 191    }
 192 }
 193
 194
 195 bool
 196 brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a,
 197                            const struct brw_vec4_prog_data *b)
 198 {
 199    /* Compare all the struct up to the pointers. */
 200    if (memcmp(a, b, offsetof(struct brw_vec4_prog_data, param)))
 201       return false;
 202
 203    if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
 204       return false;
 205
 206    if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
 207       return false;
 208
 209    return true;
 210 }
 211
 212
 213 bool
 214 brw_vs_prog_data_compare(const void *in_a, const void *in_b,
 215                          int aux_size, const void *in_key)
 216 {
 217    const struct brw_vs_prog_data *a = in_a;
 218    const struct brw_vs_prog_data *b = in_b;
 219
 220    /* Compare the base vec4 structure. */
 221    if (!brw_vec4_prog_data_compare(&a->base, &b->base))
 222       return false;
 223
 224    /* Compare the rest of the struct. */
 225    const unsigned offset = sizeof(struct brw_vec4_prog_data);
 226    if (memcmp(((char *) &a) + offset, ((char *) &b) + offset,
 227               sizeof(struct brw_vs_prog_data) - offset)) {
 228       return false;
 229    }
 230
 231    return true;
 232 }
 233
 234 static bool
 235 do_vs_prog(struct brw_context *brw,
 236            struct gl_shader_program *prog,
 237            struct brw_vertex_program *vp,
 238            struct brw_vs_prog_key *key)
 239 {
 240    struct intel_context *intel = &brw->intel;
 241    GLuint program_size;
 242    const GLuint *program;
 243    struct brw_vs_compile c;
 244    struct brw_vs_prog_data prog_data;
 245    void *mem_ctx;
 246    int i;
 247    struct gl_shader *vs = NULL;
 248
 249    if (prog)
 250       vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
 251
 252    memset(&c, 0, sizeof(c));
 253    memcpy(&c.key, key, sizeof(*key));
 254    memset(&prog_data, 0, sizeof(prog_data));
 255
 256    mem_ctx = ralloc_context(NULL);
 257
 258    c.vp = vp;
 259
 260    /* Allocate the references to the uniforms that will end up in the
 261     * prog_data associated with the compiled program, and which will be freed
 262     * by the state cache.
 263     */
 264    int param_count;
 265    if (vs) {
 266       /* We add padding around uniform values below vec4 size, with the worst
 267        * case being a float value that gets blown up to a vec4, so be
 268        * conservative here.
 269        */
 270       param_count = vs->num_uniform_components * 4;
 271
 272    } else {
 273       param_count = vp->program.Base.Parameters->NumParameters * 4;
 274    }
 275    /* We also upload clip plane data as uniforms */
 276    param_count += MAX_CLIP_PLANES * 4;
 277
 278    prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
 279    prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
 280
 281    GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
 282    prog_data.inputs_read = vp->program.Base.InputsRead;
 283
 284    if (c.key.copy_edgeflag) {
 285       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
 286       prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
 287    }
 288
 289    if (intel->gen < 6) {
 290       /* Put dummy slots into the VUE for the SF to put the replaced
 291        * point sprite coords in.  We shouldn't need these dummy slots,
 292        * which take up precious URB space, but it would mean that the SF
 293        * doesn't get nice aligned pairs of input coords into output
 294        * coords, which would be a pain to handle.
 295        */
 296       for (i = 0; i < 8; i++) {
 297          if (c.key.point_coord_replace & (1 << i))
 298             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
 299       }
 300    }
 301
 302    brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written,
 303                        c.key.base.userclip_active);
 304
 305    if (0) {
 306       _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
 307                                true);
 308    }
 309
 310    /* Emit GEN4 code.
 311     */
 312    program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size);
 313    if (program == NULL) {
 314       ralloc_free(mem_ctx);
 315       return false;
 316    }
 317
 318    if (prog_data.base.nr_pull_params)
 319       prog_data.base.num_surfaces = 1;
 320    if (c.vp->program.Base.SamplersUsed)
 321       prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
 322    if (prog &&
 323        prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) {
 324       prog_data.base.num_surfaces =
 325          SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks);
 326    }
 327
 328    /* Scratch space is used for register spilling */
 329    if (c.base.last_scratch) {
 330       perf_debug("Vertex shader triggered register spilling.  "
 331                  "Try reducing the number of live vec4 values to "
 332                  "improve performance.\n");
 333
 334       prog_data.base.total_scratch
 335          = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
 336
 337       brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
 338                          prog_data.base.total_scratch * brw->max_vs_threads);
 339    }
 340
 341    brw_upload_cache(&brw->cache, BRW_VS_PROG,
 342                     &c.key, sizeof(c.key),
 343                     program, program_size,
 344                     &prog_data, sizeof(prog_data),
 345                     &brw->vs.prog_offset, &brw->vs.prog_data);
 346    ralloc_free(mem_ctx);
 347
 348    return true;
 349 }
 350
 351 static bool
 352 key_debug(struct intel_context *intel, const char *name, int a, int b)
 353 {
 354    if (a != b) {
 355       perf_debug("  %s %d->%d\n", name, a, b);
 356       return true;
 357    }
 358    return false;
 359 }
 360
 361 void
 362 brw_vs_debug_recompile(struct brw_context *brw,
 363                        struct gl_shader_program *prog,
 364                        const struct brw_vs_prog_key *key)
 365 {
 366    struct intel_context *intel = &brw->intel;
 367    struct brw_cache_item *c = NULL;
 368    const struct brw_vs_prog_key *old_key = NULL;
 369    bool found = false;
 370
 371    perf_debug("Recompiling vertex shader for program %d\n", prog->Name);
 372
 373    for (unsigned int i = 0; i < brw->cache.size; i++) {
 374       for (c = brw->cache.items[i]; c; c = c->next) {
 375          if (c->cache_id == BRW_VS_PROG) {
 376             old_key = c->key;
 377
 378             if (old_key->base.program_string_id == key->base.program_string_id)
 379                break;
 380          }
 381       }
 382       if (c)
 383          break;
 384    }
 385
 386    if (!c) {
 387       perf_debug("  Didn't find previous compile in the shader cache for "
 388                  "debug\n");
 389       return;
 390    }
 391
 392    for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
 393       found |= key_debug(intel, "Vertex attrib w/a flags",
 394                          old_key->gl_attrib_wa_flags[i],
 395                          key->gl_attrib_wa_flags[i]);
 396    }
 397
 398    found |= key_debug(intel, "user clip flags",
 399                       old_key->base.userclip_active, key->base.userclip_active);
 400
 401    found |= key_debug(intel, "user clipping planes as push constants",
 402                       old_key->base.nr_userclip_plane_consts,
 403                       key->base.nr_userclip_plane_consts);
 404
 405    found |= key_debug(intel, "clip distance enable",
 406                       old_key->base.uses_clip_distance, key->base.uses_clip_distance);
 407    found |= key_debug(intel, "clip plane enable bitfield",
 408                       old_key->base.userclip_planes_enabled_gen_4_5,
 409                       key->base.userclip_planes_enabled_gen_4_5);
 410    found |= key_debug(intel, "copy edgeflag",
 411                       old_key->copy_edgeflag, key->copy_edgeflag);
 412    found |= key_debug(intel, "PointCoord replace",
 413                       old_key->point_coord_replace, key->point_coord_replace);
 414    found |= key_debug(intel, "vertex color clamping",
 415                       old_key->base.clamp_vertex_color, key->base.clamp_vertex_color);
 416
 417    found |= brw_debug_recompile_sampler_key(intel, &old_key->base.tex,
 418                                             &key->base.tex);
 419
 420    if (!found) {
 421       perf_debug("  Something else\n");
 422    }
 423 }
 424
 425 static void brw_upload_vs_prog(struct brw_context *brw)
 426 {
 427    struct intel_context *intel = &brw->intel;
 428    struct gl_context *ctx = &intel->ctx;
 429    struct brw_vs_prog_key key;
 430    /* BRW_NEW_VERTEX_PROGRAM */
 431    struct brw_vertex_program *vp =
 432       (struct brw_vertex_program *)brw->vertex_program;
 433    struct gl_program *prog = (struct gl_program *) brw->vertex_program;
 434    int i;
 435
 436    memset(&key, 0, sizeof(key));
 437
 438    /* Just upload the program verbatim for now.  Always send it all
 439     * the inputs it asks for, whether they are varying or not.
 440     */
 441    key.base.program_string_id = vp->id;
 442    key.base.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
 443    key.base.uses_clip_distance = vp->program.UsesClipDistance;
 444    if (key.base.userclip_active && !key.base.uses_clip_distance) {
 445       if (intel->gen < 6) {
 446          key.base.nr_userclip_plane_consts
 447             = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
 448          key.base.userclip_planes_enabled_gen_4_5
 449             = ctx->Transform.ClipPlanesEnabled;
 450       } else {
 451          key.base.nr_userclip_plane_consts
 452             = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
 453       }
 454    }
 455
 456    /* _NEW_POLYGON */
 457    if (intel->gen < 6) {
 458       key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 459                            ctx->Polygon.BackMode != GL_FILL);
 460    }
 461
 462    /* _NEW_LIGHT | _NEW_BUFFERS */
 463    key.base.clamp_vertex_color = ctx->Light._ClampVertexColor;
 464
 465    /* _NEW_POINT */
 466    if (intel->gen < 6 && ctx->Point.PointSprite) {
 467       for (i = 0; i < 8; i++) {
 468          if (ctx->Point.CoordReplace[i])
 469             key.point_coord_replace |= (1 << i);
 470       }
 471    }
 472
 473    /* _NEW_TEXTURE */
 474    brw_populate_sampler_prog_key_data(ctx, prog, &key.base.tex);
 475
 476    /* BRW_NEW_VERTICES */
 477    if (intel->gen < 8 && !intel->is_haswell) {
 478       /* Prior to Haswell, the hardware can't natively support GL_FIXED or
 479        * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
 480        */
 481       for (i = 0; i < VERT_ATTRIB_MAX; i++) {
 482          if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
 483             continue;
 484
 485          uint8_t wa_flags = 0;
 486
 487          switch (brw->vb.inputs[i].glarray->Type) {
 488
 489          case GL_FIXED:
 490             wa_flags = brw->vb.inputs[i].glarray->Size;
 491             break;
 492
 493          case GL_INT_2_10_10_10_REV:
 494             wa_flags |= BRW_ATTRIB_WA_SIGN;
 495             /* fallthough */
 496
 497          case GL_UNSIGNED_INT_2_10_10_10_REV:
 498             if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
 499                wa_flags |= BRW_ATTRIB_WA_BGRA;
 500
 501             if (brw->vb.inputs[i].glarray->Normalized)
 502                wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
 503             else if (!brw->vb.inputs[i].glarray->Integer)
 504                wa_flags |= BRW_ATTRIB_WA_SCALE;
 505
 506             break;
 507          }
 508
 509          key.gl_attrib_wa_flags[i] = wa_flags;
 510       }
 511    }
 512
 513    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 514                          &key, sizeof(key),
 515                          &brw->vs.prog_offset, &brw->vs.prog_data)) {
 516       bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
 517                                 vp, &key);
 518
 519       assert(success);
 520    }
 521    if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
 522               sizeof(brw->vue_map_geom_out)) != 0) {
 523       brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
 524       brw->state.dirty.brw |= BRW_NEW_VUE_MAP_GEOM_OUT;
 525    }
 526 }
 527
 528 /* See brw_vs.c:
 529  */
 530 const struct brw_tracked_state brw_vs_prog = {
 531    .dirty = {
 532       .mesa  = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
 533                 _NEW_TEXTURE |
 534                 _NEW_BUFFERS),
 535       .brw   = (BRW_NEW_VERTEX_PROGRAM |
 536                 BRW_NEW_VERTICES),
 537       .cache = 0
 538    },
 539    .emit = brw_upload_vs_prog
 540 };
 541
 542 bool
 543 brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 544 {
 545    struct brw_context *brw = brw_context(ctx);
 546    struct brw_vs_prog_key key;
 547    uint32_t old_prog_offset = brw->vs.prog_offset;
 548    struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
 549    bool success;
 550
 551    if (!prog->_LinkedShaders[MESA_SHADER_VERTEX])
 552       return true;
 553
 554    struct gl_vertex_program *vp = (struct gl_vertex_program *)
 555       prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
 556    struct brw_vertex_program *bvp = brw_vertex_program(vp);
 557
 558    memset(&key, 0, sizeof(key));
 559
 560    key.base.program_string_id = bvp->id;
 561    key.base.clamp_vertex_color = ctx->API == API_OPENGL_COMPAT;
 562
 563    for (int i = 0; i < MAX_SAMPLERS; i++) {
 564       if (vp->Base.ShadowSamplers & (1 << i)) {
 565          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
 566          key.base.tex.swizzles[i] =
 567             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
 568       } else {
 569          /* Color sampler: assume no swizzling. */
 570          key.base.tex.swizzles[i] = SWIZZLE_XYZW;
 571       }
 572    }
 573
 574    success = do_vs_prog(brw, prog, bvp, &key);
 575
 576    brw->vs.prog_offset = old_prog_offset;
 577    brw->vs.prog_data = old_prog_data;
 578
 579    return success;
 580 }
 581
 582
 583 void
 584 brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data)
 585 {
 586    ralloc_free((void *)prog_data->param);
 587    ralloc_free((void *)prog_data->pull_param);
 588 }
 589
 590
 591 void
 592 brw_vs_prog_data_free(const void *in_prog_data)
 593 {
 594    const struct brw_vs_prog_data *prog_data = in_prog_data;
 595
 596    brw_vec4_prog_data_free(&prog_data->base);
 597 }