src/mesa/main/ffvertex_prog.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \file ffvertex_prog.c
  30  *
  31  * Create a vertex program to execute the current fixed function T&L pipeline.
  32  * \author Keith Whitwell
  33  */
  34
  35
  36 #include "main/glheader.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39 #include "main/mfeatures.h"
  40 #include "main/enums.h"
  41 #include "main/ffvertex_prog.h"
  42 #include "program/program.h"
  43 #include "program/prog_cache.h"
  44 #include "program/prog_instruction.h"
  45 #include "program/prog_parameter.h"
  46 #include "program/prog_print.h"
  47 #include "program/prog_statevars.h"
  48
  49
  50 /** Max of number of lights and texture coord units */
  51 #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
  52
  53 struct state_key {
  54    unsigned light_color_material_mask:12;
  55    unsigned light_global_enabled:1;
  56    unsigned light_local_viewer:1;
  57    unsigned light_twoside:1;
  58    unsigned material_shininess_is_zero:1;
  59    unsigned need_eye_coords:1;
  60    unsigned normalize:1;
  61    unsigned rescale_normals:1;
  62
  63    unsigned fog_source_is_depth:1;
  64    unsigned fog_distance_mode:2;
  65    unsigned separate_specular:1;
  66    unsigned point_attenuated:1;
  67    unsigned point_array:1;
  68    unsigned texture_enabled_global:1;
  69    unsigned fragprog_inputs_read:12;
  70
  71    GLbitfield64 varying_vp_inputs;
  72
  73    struct {
  74       unsigned light_enabled:1;
  75       unsigned light_eyepos3_is_zero:1;
  76       unsigned light_spotcutoff_is_180:1;
  77       unsigned light_attenuated:1;
  78       unsigned texunit_really_enabled:1;
  79       unsigned texmat_enabled:1;
  80       unsigned coord_replace:1;
  81       unsigned texgen_enabled:4;
  82       unsigned texgen_mode0:4;
  83       unsigned texgen_mode1:4;
  84       unsigned texgen_mode2:4;
  85       unsigned texgen_mode3:4;
  86    } unit[NUM_UNITS];
  87 };
  88
  89
  90 #define TXG_NONE           0
  91 #define TXG_OBJ_LINEAR     1
  92 #define TXG_EYE_LINEAR     2
  93 #define TXG_SPHERE_MAP     3
  94 #define TXG_REFLECTION_MAP 4
  95 #define TXG_NORMAL_MAP     5
  96
  97 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
  98 {
  99    if (!enabled)
 100       return TXG_NONE;
 101
 102    switch (mode) {
 103    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
 104    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
 105    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 106    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 107    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 108    default: return TXG_NONE;
 109    }
 110 }
 111
 112 #define FDM_EYE_RADIAL    0
 113 #define FDM_EYE_PLANE     1
 114 #define FDM_EYE_PLANE_ABS 2
 115
 116 static GLuint translate_fog_distance_mode( GLenum mode )
 117 {
 118    switch (mode) {
 119    case GL_EYE_RADIAL_NV:
 120       return FDM_EYE_RADIAL;
 121    case GL_EYE_PLANE:
 122       return FDM_EYE_PLANE;
 123    default: /* shouldn't happen; fall through to a sensible default */
 124    case GL_EYE_PLANE_ABSOLUTE_NV:
 125       return FDM_EYE_PLANE_ABS;
 126    }
 127 }
 128
 129 static GLboolean check_active_shininess( struct gl_context *ctx,
 130                                          const struct state_key *key,
 131                                          GLuint side )
 132 {
 133    GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side;
 134
 135    if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
 136        (key->light_color_material_mask & (1 << attr)))
 137       return GL_TRUE;
 138
 139    if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr))
 140       return GL_TRUE;
 141
 142    if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
 143       return GL_TRUE;
 144
 145    return GL_FALSE;
 146 }
 147
 148
 149 static void make_state_key( struct gl_context *ctx, struct state_key *key )
 150 {
 151    const struct gl_fragment_program *fp;
 152    GLuint i;
 153
 154    memset(key, 0, sizeof(struct state_key));
 155    fp = ctx->FragmentProgram._Current;
 156
 157    /* This now relies on texenvprogram.c being active:
 158     */
 159    assert(fp);
 160
 161    key->need_eye_coords = ctx->_NeedEyeCoords;
 162
 163    key->fragprog_inputs_read = fp->Base.InputsRead;
 164    key->varying_vp_inputs = ctx->varying_vp_inputs;
 165
 166    if (ctx->RenderMode == GL_FEEDBACK) {
 167       /* make sure the vertprog emits color and tex0 */
 168       key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
 169    }
 170
 171    key->separate_specular = (ctx->Light.Model.ColorControl ==
 172                              GL_SEPARATE_SPECULAR_COLOR);
 173
 174    if (ctx->Light.Enabled) {
 175       key->light_global_enabled = 1;
 176
 177       if (ctx->Light.Model.LocalViewer)
 178          key->light_local_viewer = 1;
 179
 180       if (ctx->Light.Model.TwoSide)
 181          key->light_twoside = 1;
 182
 183       if (ctx->Light.ColorMaterialEnabled) {
 184          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
 185       }
 186
 187       for (i = 0; i < MAX_LIGHTS; i++) {
 188          struct gl_light *light = &ctx->Light.Light[i];
 189
 190          if (light->Enabled) {
 191             key->unit[i].light_enabled = 1;
 192
 193             if (light->EyePosition[3] == 0.0)
 194                key->unit[i].light_eyepos3_is_zero = 1;
 195
 196             if (light->SpotCutoff == 180.0)
 197                key->unit[i].light_spotcutoff_is_180 = 1;
 198
 199             if (light->ConstantAttenuation != 1.0 ||
 200                 light->LinearAttenuation != 0.0 ||
 201                 light->QuadraticAttenuation != 0.0)
 202                key->unit[i].light_attenuated = 1;
 203          }
 204       }
 205
 206       if (check_active_shininess(ctx, key, 0)) {
 207          key->material_shininess_is_zero = 0;
 208       }
 209       else if (key->light_twoside &&
 210                check_active_shininess(ctx, key, 1)) {
 211          key->material_shininess_is_zero = 0;
 212       }
 213       else {
 214          key->material_shininess_is_zero = 1;
 215       }
 216    }
 217
 218    if (ctx->Transform.Normalize)
 219       key->normalize = 1;
 220
 221    if (ctx->Transform.RescaleNormals)
 222       key->rescale_normals = 1;
 223
 224    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) {
 225       key->fog_source_is_depth = 1;
 226       key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode);
 227    }
 228
 229    if (ctx->Point._Attenuated)
 230       key->point_attenuated = 1;
 231
 232 #if FEATURE_point_size_array
 233    if (ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled)
 234       key->point_array = 1;
 235 #endif
 236
 237    if (ctx->Texture._TexGenEnabled ||
 238        ctx->Texture._TexMatEnabled ||
 239        ctx->Texture._EnabledUnits)
 240       key->texture_enabled_global = 1;
 241
 242    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
 243       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 244
 245       if (texUnit->_ReallyEnabled)
 246          key->unit[i].texunit_really_enabled = 1;
 247
 248       if (ctx->Point.PointSprite)
 249          if (ctx->Point.CoordReplace[i])
 250             key->unit[i].coord_replace = 1;
 251
 252       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
 253          key->unit[i].texmat_enabled = 1;
 254
 255       if (texUnit->TexGenEnabled) {
 256          key->unit[i].texgen_enabled = 1;
 257
 258          key->unit[i].texgen_mode0 =
 259             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 260                               texUnit->GenS.Mode );
 261          key->unit[i].texgen_mode1 =
 262             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 263                               texUnit->GenT.Mode );
 264          key->unit[i].texgen_mode2 =
 265             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 266                               texUnit->GenR.Mode );
 267          key->unit[i].texgen_mode3 =
 268             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 269                               texUnit->GenQ.Mode );
 270       }
 271    }
 272 }
 273
 274
 275
 276 /* Very useful debugging tool - produces annotated listing of
 277  * generated program with line/function references for each
 278  * instruction back into this file:
 279  */
 280 #define DISASSEM 0
 281
 282
 283 /* Use uregs to represent registers internally, translate to Mesa's
 284  * expected formats on emit.
 285  *
 286  * NOTE: These are passed by value extensively in this file rather
 287  * than as usual by pointer reference.  If this disturbs you, try
 288  * remembering they are just 32bits in size.
 289  *
 290  * GCC is smart enough to deal with these dword-sized structures in
 291  * much the same way as if I had defined them as dwords and was using
 292  * macros to access and set the fields.  This is much nicer and easier
 293  * to evolve.
 294  */
 295 struct ureg {
 296    GLuint file:4;
 297    GLint idx:9;      /* relative addressing may be negative */
 298                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
 299    GLuint negate:1;
 300    GLuint swz:12;
 301    GLuint pad:6;
 302 };
 303
 304
 305 struct tnl_program {
 306    const struct state_key *state;
 307    struct gl_vertex_program *program;
 308    GLint max_inst;  /** number of instructions allocated for program */
 309    GLboolean mvp_with_dp4;
 310
 311    GLuint temp_in_use;
 312    GLuint temp_reserved;
 313
 314    struct ureg eye_position;
 315    struct ureg eye_position_z;
 316    struct ureg eye_position_normalized;
 317    struct ureg transformed_normal;
 318    struct ureg identity;
 319
 320    GLuint materials;
 321    GLuint color_materials;
 322 };
 323
 324
 325 static const struct ureg undef = {
 326    PROGRAM_UNDEFINED,
 327    0,
 328    0,
 329    0,
 330    0
 331 };
 332
 333 /* Local shorthand:
 334  */
 335 #define X    SWIZZLE_X
 336 #define Y    SWIZZLE_Y
 337 #define Z    SWIZZLE_Z
 338 #define W    SWIZZLE_W
 339
 340
 341 /* Construct a ureg:
 342  */
 343 static struct ureg make_ureg(GLuint file, GLint idx)
 344 {
 345    struct ureg reg;
 346    reg.file = file;
 347    reg.idx = idx;
 348    reg.negate = 0;
 349    reg.swz = SWIZZLE_NOOP;
 350    reg.pad = 0;
 351    return reg;
 352 }
 353
 354
 355
 356 static struct ureg negate( struct ureg reg )
 357 {
 358    reg.negate ^= 1;
 359    return reg;
 360 }
 361
 362
 363 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 364 {
 365    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 366                            GET_SWZ(reg.swz, y),
 367                            GET_SWZ(reg.swz, z),
 368                            GET_SWZ(reg.swz, w));
 369    return reg;
 370 }
 371
 372
 373 static struct ureg swizzle1( struct ureg reg, int x )
 374 {
 375    return swizzle(reg, x, x, x, x);
 376 }
 377
 378
 379 static struct ureg get_temp( struct tnl_program *p )
 380 {
 381    int bit = ffs( ~p->temp_in_use );
 382    if (!bit) {
 383       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 384       exit(1);
 385    }
 386
 387    if ((GLuint) bit > p->program->Base.NumTemporaries)
 388       p->program->Base.NumTemporaries = bit;
 389
 390    p->temp_in_use |= 1<<(bit-1);
 391    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 392 }
 393
 394
 395 static struct ureg reserve_temp( struct tnl_program *p )
 396 {
 397    struct ureg temp = get_temp( p );
 398    p->temp_reserved |= 1<<temp.idx;
 399    return temp;
 400 }
 401
 402
 403 static void release_temp( struct tnl_program *p, struct ureg reg )
 404 {
 405    if (reg.file == PROGRAM_TEMPORARY) {
 406       p->temp_in_use &= ~(1<<reg.idx);
 407       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 408    }
 409 }
 410
 411 static void release_temps( struct tnl_program *p )
 412 {
 413    p->temp_in_use = p->temp_reserved;
 414 }
 415
 416
 417 static struct ureg register_param5(struct tnl_program *p,
 418                                    GLint s0,
 419                                    GLint s1,
 420                                    GLint s2,
 421                                    GLint s3,
 422                                    GLint s4)
 423 {
 424    gl_state_index tokens[STATE_LENGTH];
 425    GLint idx;
 426    tokens[0] = s0;
 427    tokens[1] = s1;
 428    tokens[2] = s2;
 429    tokens[3] = s3;
 430    tokens[4] = s4;
 431    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 432    return make_ureg(PROGRAM_STATE_VAR, idx);
 433 }
 434
 435
 436 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 437 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 438 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 439 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 440
 441
 442
 443 /**
 444  * \param input  one of VERT_ATTRIB_x tokens.
 445  */
 446 static struct ureg register_input( struct tnl_program *p, GLuint input )
 447 {
 448    assert(input < VERT_ATTRIB_MAX);
 449
 450    if (p->state->varying_vp_inputs & VERT_BIT(input)) {
 451       p->program->Base.InputsRead |= VERT_BIT(input);
 452       return make_ureg(PROGRAM_INPUT, input);
 453    }
 454    else {
 455       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
 456    }
 457 }
 458
 459
 460 /**
 461  * \param input  one of VERT_RESULT_x tokens.
 462  */
 463 static struct ureg register_output( struct tnl_program *p, GLuint output )
 464 {
 465    p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
 466    return make_ureg(PROGRAM_OUTPUT, output);
 467 }
 468
 469
 470 static struct ureg register_const4f( struct tnl_program *p,
 471                               GLfloat s0,
 472                               GLfloat s1,
 473                               GLfloat s2,
 474                               GLfloat s3)
 475 {
 476    gl_constant_value values[4];
 477    GLint idx;
 478    GLuint swizzle;
 479    values[0].f = s0;
 480    values[1].f = s1;
 481    values[2].f = s2;
 482    values[3].f = s3;
 483    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 484                                      &swizzle );
 485    ASSERT(swizzle == SWIZZLE_NOOP);
 486    return make_ureg(PROGRAM_CONSTANT, idx);
 487 }
 488
 489 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 490 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 491 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 492 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 493
 494 static GLboolean is_undef( struct ureg reg )
 495 {
 496    return reg.file == PROGRAM_UNDEFINED;
 497 }
 498
 499
 500 static struct ureg get_identity_param( struct tnl_program *p )
 501 {
 502    if (is_undef(p->identity))
 503       p->identity = register_const4f(p, 0,0,0,1);
 504
 505    return p->identity;
 506 }
 507
 508 static void register_matrix_param5( struct tnl_program *p,
 509                                     GLint s0, /* modelview, projection, etc */
 510                                     GLint s1, /* texture matrix number */
 511                                     GLint s2, /* first row */
 512                                     GLint s3, /* last row */
 513                                     GLint s4, /* inverse, transpose, etc */
 514                                     struct ureg *matrix )
 515 {
 516    GLint i;
 517
 518    /* This is a bit sad as the support is there to pull the whole
 519     * matrix out in one go:
 520     */
 521    for (i = 0; i <= s3 - s2; i++)
 522       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
 523 }
 524
 525
 526 static void emit_arg( struct prog_src_register *src,
 527                       struct ureg reg )
 528 {
 529    src->File = reg.file;
 530    src->Index = reg.idx;
 531    src->Swizzle = reg.swz;
 532    src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
 533    src->Abs = 0;
 534    src->RelAddr = 0;
 535    /* Check that bitfield sizes aren't exceeded */
 536    ASSERT(src->Index == reg.idx);
 537 }
 538
 539
 540 static void emit_dst( struct prog_dst_register *dst,
 541                       struct ureg reg, GLuint mask )
 542 {
 543    dst->File = reg.file;
 544    dst->Index = reg.idx;
 545    /* allow zero as a shorthand for xyzw */
 546    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 547    dst->CondMask = COND_TR;  /* always pass cond test */
 548    dst->CondSwizzle = SWIZZLE_NOOP;
 549    dst->CondSrc = 0;
 550    /* Check that bitfield sizes aren't exceeded */
 551    ASSERT(dst->Index == reg.idx);
 552 }
 553
 554
 555 static void debug_insn( struct prog_instruction *inst, const char *fn,
 556                         GLuint line )
 557 {
 558    if (DISASSEM) {
 559       static const char *last_fn;
 560
 561       if (fn != last_fn) {
 562          last_fn = fn;
 563          printf("%s:\n", fn);
 564       }
 565
 566       printf("%d:\t", line);
 567       _mesa_print_instruction(inst);
 568    }
 569 }
 570
 571
 572 static void emit_op3fn(struct tnl_program *p,
 573                        enum prog_opcode op,
 574                        struct ureg dest,
 575                        GLuint mask,
 576                        struct ureg src0,
 577                        struct ureg src1,
 578                        struct ureg src2,
 579                        const char *fn,
 580                        GLuint line)
 581 {
 582    GLuint nr;
 583    struct prog_instruction *inst;
 584
 585    assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
 586
 587    if (p->program->Base.NumInstructions == p->max_inst) {
 588       /* need to extend the program's instruction array */
 589       struct prog_instruction *newInst;
 590
 591       /* double the size */
 592       p->max_inst *= 2;
 593
 594       newInst = _mesa_alloc_instructions(p->max_inst);
 595       if (!newInst) {
 596          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
 597          return;
 598       }
 599
 600       _mesa_copy_instructions(newInst,
 601                               p->program->Base.Instructions,
 602                               p->program->Base.NumInstructions);
 603
 604       _mesa_free_instructions(p->program->Base.Instructions,
 605                               p->program->Base.NumInstructions);
 606
 607       p->program->Base.Instructions = newInst;
 608    }
 609
 610    nr = p->program->Base.NumInstructions++;
 611
 612    inst = &p->program->Base.Instructions[nr];
 613    inst->Opcode = (enum prog_opcode) op;
 614    inst->Data = 0;
 615
 616    emit_arg( &inst->SrcReg[0], src0 );
 617    emit_arg( &inst->SrcReg[1], src1 );
 618    emit_arg( &inst->SrcReg[2], src2 );
 619
 620    emit_dst( &inst->DstReg, dest, mask );
 621
 622    debug_insn(inst, fn, line);
 623 }
 624
 625
 626 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 627    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 628
 629 #define emit_op2(p, op, dst, mask, src0, src1) \
 630     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 631
 632 #define emit_op1(p, op, dst, mask, src0) \
 633     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 634
 635
 636 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 637 {
 638    if (reg.file == PROGRAM_TEMPORARY &&
 639        !(p->temp_reserved & (1<<reg.idx)))
 640       return reg;
 641    else {
 642       struct ureg temp = get_temp(p);
 643       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 644       return temp;
 645    }
 646 }
 647
 648
 649 /* Currently no tracking performed of input/output/register size or
 650  * active elements.  Could be used to reduce these operations, as
 651  * could the matrix type.
 652  */
 653 static void emit_matrix_transform_vec4( struct tnl_program *p,
 654                                         struct ureg dest,
 655                                         const struct ureg *mat,
 656                                         struct ureg src)
 657 {
 658    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 659    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 660    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 661    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 662 }
 663
 664
 665 /* This version is much easier to implement if writemasks are not
 666  * supported natively on the target or (like SSE), the target doesn't
 667  * have a clean/obvious dotproduct implementation.
 668  */
 669 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 670                                                   struct ureg dest,
 671                                                   const struct ureg *mat,
 672                                                   struct ureg src)
 673 {
 674    struct ureg tmp;
 675
 676    if (dest.file != PROGRAM_TEMPORARY)
 677       tmp = get_temp(p);
 678    else
 679       tmp = dest;
 680
 681    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 682    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 683    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 684    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 685
 686    if (dest.file != PROGRAM_TEMPORARY)
 687       release_temp(p, tmp);
 688 }
 689
 690
 691 static void emit_matrix_transform_vec3( struct tnl_program *p,
 692                                         struct ureg dest,
 693                                         const struct ureg *mat,
 694                                         struct ureg src)
 695 {
 696    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 697    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 698    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 699 }
 700
 701
 702 static void emit_normalize_vec3( struct tnl_program *p,
 703                                  struct ureg dest,
 704                                  struct ureg src )
 705 {
 706 #if 0
 707    /* XXX use this when drivers are ready for NRM3 */
 708    emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
 709 #else
 710    struct ureg tmp = get_temp(p);
 711    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
 712    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
 713    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
 714    release_temp(p, tmp);
 715 #endif
 716 }
 717
 718
 719 static void emit_passthrough( struct tnl_program *p,
 720                               GLuint input,
 721                               GLuint output )
 722 {
 723    struct ureg out = register_output(p, output);
 724    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 725 }
 726
 727
 728 static struct ureg get_eye_position( struct tnl_program *p )
 729 {
 730    if (is_undef(p->eye_position)) {
 731       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 732       struct ureg modelview[4];
 733
 734       p->eye_position = reserve_temp(p);
 735
 736       if (p->mvp_with_dp4) {
 737          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 738                                  0, modelview );
 739
 740          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 741       }
 742       else {
 743          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 744                                  STATE_MATRIX_TRANSPOSE, modelview );
 745
 746          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 747       }
 748    }
 749
 750    return p->eye_position;
 751 }
 752
 753
 754 static struct ureg get_eye_position_z( struct tnl_program *p )
 755 {
 756    if (!is_undef(p->eye_position))
 757       return swizzle1(p->eye_position, Z);
 758
 759    if (is_undef(p->eye_position_z)) {
 760       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 761       struct ureg modelview[4];
 762
 763       p->eye_position_z = reserve_temp(p);
 764
 765       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 766                               0, modelview );
 767
 768       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
 769    }
 770
 771    return p->eye_position_z;
 772 }
 773
 774
 775 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 776 {
 777    if (is_undef(p->eye_position_normalized)) {
 778       struct ureg eye = get_eye_position(p);
 779       p->eye_position_normalized = reserve_temp(p);
 780       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 781    }
 782
 783    return p->eye_position_normalized;
 784 }
 785
 786
 787 static struct ureg get_transformed_normal( struct tnl_program *p )
 788 {
 789    if (is_undef(p->transformed_normal) &&
 790        !p->state->need_eye_coords &&
 791        !p->state->normalize &&
 792        !(p->state->need_eye_coords == p->state->rescale_normals))
 793    {
 794       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
 795    }
 796    else if (is_undef(p->transformed_normal))
 797    {
 798       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 799       struct ureg mvinv[3];
 800       struct ureg transformed_normal = reserve_temp(p);
 801
 802       if (p->state->need_eye_coords) {
 803          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
 804                                  STATE_MATRIX_INVTRANS, mvinv );
 805
 806          /* Transform to eye space:
 807           */
 808          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
 809          normal = transformed_normal;
 810       }
 811
 812       /* Normalize/Rescale:
 813        */
 814       if (p->state->normalize) {
 815          emit_normalize_vec3( p, transformed_normal, normal );
 816          normal = transformed_normal;
 817       }
 818       else if (p->state->need_eye_coords == p->state->rescale_normals) {
 819          /* This is already adjusted for eye/non-eye rendering:
 820           */
 821          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 822                                                STATE_NORMAL_SCALE);
 823
 824          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
 825          normal = transformed_normal;
 826       }
 827
 828       assert(normal.file == PROGRAM_TEMPORARY);
 829       p->transformed_normal = normal;
 830    }
 831
 832    return p->transformed_normal;
 833 }
 834
 835
 836 static void build_hpos( struct tnl_program *p )
 837 {
 838    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 839    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 840    struct ureg mvp[4];
 841
 842    if (p->mvp_with_dp4) {
 843       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 844                               0, mvp );
 845       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 846    }
 847    else {
 848       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 849                               STATE_MATRIX_TRANSPOSE, mvp );
 850       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 851    }
 852 }
 853
 854
 855 static GLuint material_attrib( GLuint side, GLuint property )
 856 {
 857    return (property - STATE_AMBIENT) * 2 + side;
 858 }
 859
 860
 861 /**
 862  * Get a bitmask of which material values vary on a per-vertex basis.
 863  */
 864 static void set_material_flags( struct tnl_program *p )
 865 {
 866    p->color_materials = 0;
 867    p->materials = 0;
 868
 869    if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
 870       p->materials =
 871          p->color_materials = p->state->light_color_material_mask;
 872    }
 873
 874    p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0);
 875 }
 876
 877
 878 static struct ureg get_material( struct tnl_program *p, GLuint side,
 879                                  GLuint property )
 880 {
 881    GLuint attrib = material_attrib(side, property);
 882
 883    if (p->color_materials & (1<<attrib))
 884       return register_input(p, VERT_ATTRIB_COLOR0);
 885    else if (p->materials & (1<<attrib)) {
 886       /* Put material values in the GENERIC slots -- they are not used
 887        * for anything in fixed function mode.
 888        */
 889       return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
 890    }
 891    else
 892       return register_param3( p, STATE_MATERIAL, side, property );
 893 }
 894
 895 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
 896                                    MAT_BIT_FRONT_AMBIENT | \
 897                                    MAT_BIT_FRONT_DIFFUSE) << (side))
 898
 899
 900 /**
 901  * Either return a precalculated constant value or emit code to
 902  * calculate these values dynamically in the case where material calls
 903  * are present between begin/end pairs.
 904  *
 905  * Probably want to shift this to the program compilation phase - if
 906  * we always emitted the calculation here, a smart compiler could
 907  * detect that it was constant (given a certain set of inputs), and
 908  * lift it out of the main loop.  That way the programs created here
 909  * would be independent of the vertex_buffer details.
 910  */
 911 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 912 {
 913    if (p->materials & SCENE_COLOR_BITS(side)) {
 914       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 915       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 916       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 917       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 918       struct ureg tmp = make_temp(p, material_diffuse);
 919       emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
 920                material_ambient, material_emission);
 921       return tmp;
 922    }
 923    else
 924       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 925 }
 926
 927
 928 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 929                                   GLuint side, GLuint property )
 930 {
 931    GLuint attrib = material_attrib(side, property);
 932    if (p->materials & (1<<attrib)) {
 933       struct ureg light_value =
 934          register_param3(p, STATE_LIGHT, light, property);
 935       struct ureg material_value = get_material(p, side, property);
 936       struct ureg tmp = get_temp(p);
 937       emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
 938       return tmp;
 939    }
 940    else
 941       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 942 }
 943
 944
 945 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 946                                                 GLuint i,
 947                                                 struct ureg VPpli,
 948                                                 struct ureg dist )
 949 {
 950    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 951                                              STATE_ATTENUATION);
 952    struct ureg att = undef;
 953
 954    /* Calculate spot attenuation:
 955     */
 956    if (!p->state->unit[i].light_spotcutoff_is_180) {
 957       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
 958                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
 959       struct ureg spot = get_temp(p);
 960       struct ureg slt = get_temp(p);
 961
 962       att = get_temp(p);
 963
 964       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
 965       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
 966       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
 967       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 968
 969       release_temp(p, spot);
 970       release_temp(p, slt);
 971    }
 972
 973    /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62):
 974     *
 975     * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero)
 976     */
 977    if (p->state->unit[i].light_attenuated && !is_undef(dist)) {
 978       if (is_undef(att))
 979          att = get_temp(p);
 980       /* 1/d,d,d,1/d */
 981       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
 982       /* 1,d,d*d,1/d */
 983       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
 984       /* 1/dist-atten */
 985       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
 986
 987       if (!p->state->unit[i].light_spotcutoff_is_180) {
 988          /* dist-atten */
 989          emit_op1(p, OPCODE_RCP, dist, 0, dist);
 990          /* spot-atten * dist-atten */
 991          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
 992       }
 993       else {
 994          /* dist-atten */
 995          emit_op1(p, OPCODE_RCP, att, 0, dist);
 996       }
 997    }
 998
 999    return att;
1000 }
1001
1002
1003 /**
1004  * Compute:
1005  *   lit.y = MAX(0, dots.x)
1006  *   lit.z = SLT(0, dots.x)
1007  */
1008 static void emit_degenerate_lit( struct tnl_program *p,
1009                                  struct ureg lit,
1010                                  struct ureg dots )
1011 {
1012    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
1013
1014    /* Note that lit.x & lit.w will not be examined.  Note also that
1015     * dots.xyzw == dots.xxxx.
1016     */
1017
1018    /* MAX lit, id, dots;
1019     */
1020    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
1021
1022    /* result[2] = (in > 0 ? 1 : 0)
1023     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
1024     */
1025    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
1026 }
1027
1028
1029 /* Need to add some addtional parameters to allow lighting in object
1030  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
1031  * space lighting.
1032  */
1033 static void build_lighting( struct tnl_program *p )
1034 {
1035    const GLboolean twoside = p->state->light_twoside;
1036    const GLboolean separate = p->state->separate_specular;
1037    GLuint nr_lights = 0, count = 0;
1038    struct ureg normal = get_transformed_normal(p);
1039    struct ureg lit = get_temp(p);
1040    struct ureg dots = get_temp(p);
1041    struct ureg _col0 = undef, _col1 = undef;
1042    struct ureg _bfc0 = undef, _bfc1 = undef;
1043    GLuint i;
1044
1045    /*
1046     * NOTE:
1047     * dots.x = dot(normal, VPpli)
1048     * dots.y = dot(normal, halfAngle)
1049     * dots.z = back.shininess
1050     * dots.w = front.shininess
1051     */
1052
1053    for (i = 0; i < MAX_LIGHTS; i++)
1054       if (p->state->unit[i].light_enabled)
1055          nr_lights++;
1056
1057    set_material_flags(p);
1058
1059    {
1060       if (!p->state->material_shininess_is_zero) {
1061          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
1062          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
1063          release_temp(p, shininess);
1064       }
1065
1066       _col0 = make_temp(p, get_scenecolor(p, 0));
1067       if (separate)
1068          _col1 = make_temp(p, get_identity_param(p));
1069       else
1070          _col1 = _col0;
1071    }
1072
1073    if (twoside) {
1074       if (!p->state->material_shininess_is_zero) {
1075          /* Note that we negate the back-face specular exponent here.
1076           * The negation will be un-done later in the back-face code below.
1077           */
1078          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1079          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1080                   negate(swizzle1(shininess,X)));
1081          release_temp(p, shininess);
1082       }
1083
1084       _bfc0 = make_temp(p, get_scenecolor(p, 1));
1085       if (separate)
1086          _bfc1 = make_temp(p, get_identity_param(p));
1087       else
1088          _bfc1 = _bfc0;
1089    }
1090
1091    /* If no lights, still need to emit the scenecolor.
1092     */
1093    {
1094       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
1095       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1096    }
1097
1098    if (separate) {
1099       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
1100       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1101    }
1102
1103    if (twoside) {
1104       struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
1105       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1106    }
1107
1108    if (twoside && separate) {
1109       struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
1110       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1111    }
1112
1113    if (nr_lights == 0) {
1114       release_temps(p);
1115       return;
1116    }
1117
1118    for (i = 0; i < MAX_LIGHTS; i++) {
1119       if (p->state->unit[i].light_enabled) {
1120          struct ureg half = undef;
1121          struct ureg att = undef, VPpli = undef;
1122          struct ureg dist = undef;
1123
1124          count++;
1125          if (p->state->unit[i].light_eyepos3_is_zero) {
1126              VPpli = register_param3(p, STATE_INTERNAL,
1127                                      STATE_LIGHT_POSITION_NORMALIZED, i);
1128          } else {
1129             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1130                                                STATE_LIGHT_POSITION, i);
1131             struct ureg V = get_eye_position(p);
1132
1133             VPpli = get_temp(p);
1134             dist = get_temp(p);
1135
1136             /* Calculate VPpli vector
1137              */
1138             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1139
1140             /* Normalize VPpli.  The dist value also used in
1141              * attenuation below.
1142              */
1143             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1144             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1145             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1146          }
1147
1148          /* Calculate attenuation:
1149           */
1150          att = calculate_light_attenuation(p, i, VPpli, dist);
1151          release_temp(p, dist);
1152
1153          /* Calculate viewer direction, or use infinite viewer:
1154           */
1155          if (!p->state->material_shininess_is_zero) {
1156             if (p->state->light_local_viewer) {
1157                struct ureg eye_hat = get_eye_position_normalized(p);
1158                half = get_temp(p);
1159                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1160                emit_normalize_vec3(p, half, half);
1161             } else if (p->state->unit[i].light_eyepos3_is_zero) {
1162                half = register_param3(p, STATE_INTERNAL,
1163                                       STATE_LIGHT_HALF_VECTOR, i);
1164             } else {
1165                struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1166                half = get_temp(p);
1167                emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1168                emit_normalize_vec3(p, half, half);
1169             }
1170          }
1171
1172          /* Calculate dot products:
1173           */
1174          if (p->state->material_shininess_is_zero) {
1175             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1176          }
1177          else {
1178             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1179             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1180          }
1181
1182          /* Front face lighting:
1183           */
1184          {
1185             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1186             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1187             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1188             struct ureg res0, res1;
1189             GLuint mask0, mask1;
1190
1191             if (count == nr_lights) {
1192                if (separate) {
1193                   mask0 = WRITEMASK_XYZ;
1194                   mask1 = WRITEMASK_XYZ;
1195                   res0 = register_output( p, VERT_RESULT_COL0 );
1196                   res1 = register_output( p, VERT_RESULT_COL1 );
1197                }
1198                else {
1199                   mask0 = 0;
1200                   mask1 = WRITEMASK_XYZ;
1201                   res0 = _col0;
1202                   res1 = register_output( p, VERT_RESULT_COL0 );
1203                }
1204             }
1205             else {
1206                mask0 = 0;
1207                mask1 = 0;
1208                res0 = _col0;
1209                res1 = _col1;
1210             }
1211
1212             if (!is_undef(att)) {
1213                /* light is attenuated by distance */
1214                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1215                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1216                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1217             }
1218             else if (!p->state->material_shininess_is_zero) {
1219                /* there's a non-zero specular term */
1220                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1221                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1222             }
1223             else {
1224                /* no attenutation, no specular */
1225                emit_degenerate_lit(p, lit, dots);
1226                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1227             }
1228
1229             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1230             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1231
1232             release_temp(p, ambient);
1233             release_temp(p, diffuse);
1234             release_temp(p, specular);
1235          }
1236
1237          /* Back face lighting:
1238           */
1239          if (twoside) {
1240             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1241             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1242             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1243             struct ureg res0, res1;
1244             GLuint mask0, mask1;
1245
1246             if (count == nr_lights) {
1247                if (separate) {
1248                   mask0 = WRITEMASK_XYZ;
1249                   mask1 = WRITEMASK_XYZ;
1250                   res0 = register_output( p, VERT_RESULT_BFC0 );
1251                   res1 = register_output( p, VERT_RESULT_BFC1 );
1252                }
1253                else {
1254                   mask0 = 0;
1255                   mask1 = WRITEMASK_XYZ;
1256                   res0 = _bfc0;
1257                   res1 = register_output( p, VERT_RESULT_BFC0 );
1258                }
1259             }
1260             else {
1261                res0 = _bfc0;
1262                res1 = _bfc1;
1263                mask0 = 0;
1264                mask1 = 0;
1265             }
1266
1267             /* For the back face we need to negate the X and Y component
1268              * dot products.  dots.Z has the negated back-face specular
1269              * exponent.  We swizzle that into the W position.  This
1270              * negation makes the back-face specular term positive again.
1271              */
1272             dots = negate(swizzle(dots,X,Y,W,Z));
1273
1274             if (!is_undef(att)) {
1275                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1276                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1277                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1278             }
1279             else if (!p->state->material_shininess_is_zero) {
1280                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1281                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
1282             }
1283             else {
1284                emit_degenerate_lit(p, lit, dots);
1285                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1286             }
1287
1288             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1289             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1290             /* restore dots to its original state for subsequent lights
1291              * by negating and swizzling again.
1292              */
1293             dots = negate(swizzle(dots,X,Y,W,Z));
1294
1295             release_temp(p, ambient);
1296             release_temp(p, diffuse);
1297             release_temp(p, specular);
1298          }
1299
1300          release_temp(p, half);
1301          release_temp(p, VPpli);
1302          release_temp(p, att);
1303       }
1304    }
1305
1306    release_temps( p );
1307 }
1308
1309
1310 static void build_fog( struct tnl_program *p )
1311 {
1312    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1313    struct ureg input;
1314
1315    if (p->state->fog_source_is_depth) {
1316
1317       switch (p->state->fog_distance_mode) {
1318       case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */
1319         input = get_eye_position(p);
1320         emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input);
1321         emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog);
1322         emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog);
1323         break;
1324       case FDM_EYE_PLANE: /* Z = Ze */
1325         input = get_eye_position_z(p);
1326         emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
1327         break;
1328       case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */
1329         input = get_eye_position_z(p);
1330         emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1331         break;
1332       default: assert(0); break; /* can't happen */
1333       }
1334
1335    }
1336    else {
1337       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1338       emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1339    }
1340
1341    emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
1342 }
1343
1344
1345 static void build_reflect_texgen( struct tnl_program *p,
1346                                   struct ureg dest,
1347                                   GLuint writemask )
1348 {
1349    struct ureg normal = get_transformed_normal(p);
1350    struct ureg eye_hat = get_eye_position_normalized(p);
1351    struct ureg tmp = get_temp(p);
1352
1353    /* n.u */
1354    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1355    /* 2n.u */
1356    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1357    /* (-2n.u)n + u */
1358    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1359
1360    release_temp(p, tmp);
1361 }
1362
1363
1364 static void build_sphere_texgen( struct tnl_program *p,
1365                                  struct ureg dest,
1366                                  GLuint writemask )
1367 {
1368    struct ureg normal = get_transformed_normal(p);
1369    struct ureg eye_hat = get_eye_position_normalized(p);
1370    struct ureg tmp = get_temp(p);
1371    struct ureg half = register_scalar_const(p, .5);
1372    struct ureg r = get_temp(p);
1373    struct ureg inv_m = get_temp(p);
1374    struct ureg id = get_identity_param(p);
1375
1376    /* Could share the above calculations, but it would be
1377     * a fairly odd state for someone to set (both sphere and
1378     * reflection active for different texture coordinate
1379     * components.  Of course - if two texture units enable
1380     * reflect and/or sphere, things start to tilt in favour
1381     * of seperating this out:
1382     */
1383
1384    /* n.u */
1385    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1386    /* 2n.u */
1387    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1388    /* (-2n.u)n + u */
1389    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1390    /* r + 0,0,1 */
1391    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1392    /* rx^2 + ry^2 + (rz+1)^2 */
1393    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1394    /* 2/m */
1395    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1396    /* 1/m */
1397    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1398    /* r/m + 1/2 */
1399    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1400
1401    release_temp(p, tmp);
1402    release_temp(p, r);
1403    release_temp(p, inv_m);
1404 }
1405
1406
1407 static void build_texture_transform( struct tnl_program *p )
1408 {
1409    GLuint i, j;
1410
1411    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
1412
1413       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
1414          continue;
1415
1416       if (p->state->unit[i].coord_replace)
1417          continue;
1418
1419       if (p->state->unit[i].texgen_enabled ||
1420           p->state->unit[i].texmat_enabled) {
1421
1422          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1423          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1424          struct ureg out_texgen = undef;
1425
1426          if (p->state->unit[i].texgen_enabled) {
1427             GLuint copy_mask = 0;
1428             GLuint sphere_mask = 0;
1429             GLuint reflect_mask = 0;
1430             GLuint normal_mask = 0;
1431             GLuint modes[4];
1432
1433             if (texmat_enabled)
1434                out_texgen = get_temp(p);
1435             else
1436                out_texgen = out;
1437
1438             modes[0] = p->state->unit[i].texgen_mode0;
1439             modes[1] = p->state->unit[i].texgen_mode1;
1440             modes[2] = p->state->unit[i].texgen_mode2;
1441             modes[3] = p->state->unit[i].texgen_mode3;
1442
1443             for (j = 0; j < 4; j++) {
1444                switch (modes[j]) {
1445                case TXG_OBJ_LINEAR: {
1446                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1447                   struct ureg plane =
1448                      register_param3(p, STATE_TEXGEN, i,
1449                                      STATE_TEXGEN_OBJECT_S + j);
1450
1451                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1452                            obj, plane );
1453                   break;
1454                }
1455                case TXG_EYE_LINEAR: {
1456                   struct ureg eye = get_eye_position(p);
1457                   struct ureg plane =
1458                      register_param3(p, STATE_TEXGEN, i,
1459                                      STATE_TEXGEN_EYE_S + j);
1460
1461                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1462                            eye, plane );
1463                   break;
1464                }
1465                case TXG_SPHERE_MAP:
1466                   sphere_mask |= WRITEMASK_X << j;
1467                   break;
1468                case TXG_REFLECTION_MAP:
1469                   reflect_mask |= WRITEMASK_X << j;
1470                   break;
1471                case TXG_NORMAL_MAP:
1472                   normal_mask |= WRITEMASK_X << j;
1473                   break;
1474                case TXG_NONE:
1475                   copy_mask |= WRITEMASK_X << j;
1476                }
1477             }
1478
1479             if (sphere_mask) {
1480                build_sphere_texgen(p, out_texgen, sphere_mask);
1481             }
1482
1483             if (reflect_mask) {
1484                build_reflect_texgen(p, out_texgen, reflect_mask);
1485             }
1486
1487             if (normal_mask) {
1488                struct ureg normal = get_transformed_normal(p);
1489                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1490             }
1491
1492             if (copy_mask) {
1493                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1494                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1495             }
1496          }
1497
1498          if (texmat_enabled) {
1499             struct ureg texmat[4];
1500             struct ureg in = (!is_undef(out_texgen) ?
1501                               out_texgen :
1502                               register_input(p, VERT_ATTRIB_TEX0+i));
1503             if (p->mvp_with_dp4) {
1504                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1505                                        0, texmat );
1506                emit_matrix_transform_vec4( p, out, texmat, in );
1507             }
1508             else {
1509                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1510                                        STATE_MATRIX_TRANSPOSE, texmat );
1511                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1512             }
1513          }
1514
1515          release_temps(p);
1516       }
1517       else {
1518          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1519       }
1520    }
1521 }
1522
1523
1524 /**
1525  * Point size attenuation computation.
1526  */
1527 static void build_atten_pointsize( struct tnl_program *p )
1528 {
1529    struct ureg eye = get_eye_position_z(p);
1530    struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
1531    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1532    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1533    struct ureg ut = get_temp(p);
1534
1535    /* dist = |eyez| */
1536    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1537    /* p1 + dist * (p2 + dist * p3); */
1538    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1539                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1540    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1541                 ut, swizzle1(state_attenuation, X));
1542
1543    /* 1 / sqrt(factor) */
1544    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1545
1546 #if 0
1547    /* out = pointSize / sqrt(factor) */
1548    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1549 #else
1550    /* this is a good place to clamp the point size since there's likely
1551     * no hardware registers to clamp point size at rasterization time.
1552     */
1553    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1554    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1555    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1556 #endif
1557
1558    release_temp(p, ut);
1559 }
1560
1561
1562 /**
1563  * Pass-though per-vertex point size, from user's point size array.
1564  */
1565 static void build_array_pointsize( struct tnl_program *p )
1566 {
1567    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
1568    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1569    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
1570 }
1571
1572
1573 static void build_tnl_program( struct tnl_program *p )
1574 {
1575    /* Emit the program, starting with modelviewproject:
1576     */
1577    build_hpos(p);
1578
1579    /* Lighting calculations:
1580     */
1581    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1582       if (p->state->light_global_enabled)
1583          build_lighting(p);
1584       else {
1585          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1586             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1587
1588          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1589             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1590       }
1591    }
1592
1593    if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC)
1594       build_fog(p);
1595
1596    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1597       build_texture_transform(p);
1598
1599    if (p->state->point_attenuated)
1600       build_atten_pointsize(p);
1601    else if (p->state->point_array)
1602       build_array_pointsize(p);
1603
1604    /* Finish up:
1605     */
1606    emit_op1(p, OPCODE_END, undef, 0, undef);
1607
1608    /* Disassemble:
1609     */
1610    if (DISASSEM) {
1611       printf ("\n");
1612    }
1613 }
1614
1615
1616 static void
1617 create_new_program( const struct state_key *key,
1618                     struct gl_vertex_program *program,
1619                     GLboolean mvp_with_dp4,
1620                     GLuint max_temps)
1621 {
1622    struct tnl_program p;
1623
1624    memset(&p, 0, sizeof(p));
1625    p.state = key;
1626    p.program = program;
1627    p.eye_position = undef;
1628    p.eye_position_z = undef;
1629    p.eye_position_normalized = undef;
1630    p.transformed_normal = undef;
1631    p.identity = undef;
1632    p.temp_in_use = 0;
1633    p.mvp_with_dp4 = mvp_with_dp4;
1634
1635    if (max_temps >= sizeof(int) * 8)
1636       p.temp_reserved = 0;
1637    else
1638       p.temp_reserved = ~((1<<max_temps)-1);
1639
1640    /* Start by allocating 32 instructions.
1641     * If we need more, we'll grow the instruction array as needed.
1642     */
1643    p.max_inst = 32;
1644    p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
1645    p.program->Base.String = NULL;
1646    p.program->Base.NumInstructions =
1647    p.program->Base.NumTemporaries =
1648    p.program->Base.NumParameters =
1649    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1650    p.program->Base.Parameters = _mesa_new_parameter_list();
1651    p.program->Base.InputsRead = 0;
1652    p.program->Base.OutputsWritten = 0;
1653
1654    build_tnl_program( &p );
1655 }
1656
1657
1658 /**
1659  * Return a vertex program which implements the current fixed-function
1660  * transform/lighting/texgen operations.
1661  * XXX move this into core mesa (main/)
1662  */
1663 struct gl_vertex_program *
1664 _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
1665 {
1666    struct gl_vertex_program *prog;
1667    struct state_key key;
1668
1669    /* Grab all the relevent state and put it in a single structure:
1670     */
1671    make_state_key(ctx, &key);
1672
1673    /* Look for an already-prepared program for this state:
1674     */
1675    prog = (struct gl_vertex_program *)
1676       _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
1677
1678    if (!prog) {
1679       /* OK, we'll have to build a new one */
1680       if (0)
1681          printf("Build new TNL program\n");
1682
1683       prog = (struct gl_vertex_program *)
1684          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1685       if (!prog)
1686          return NULL;
1687
1688       create_new_program( &key, prog,
1689                           ctx->mvp_with_dp4,
1690                           ctx->Const.VertexProgram.MaxTemps );
1691
1692 #if 0
1693       if (ctx->Driver.ProgramStringNotify)
1694          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1695                                           &prog->Base );
1696 #endif
1697       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
1698                                  &key, sizeof(key), &prog->Base);
1699    }
1700
1701    return prog;
1702 }