src/mesa/main/ffvertex_prog.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \file ffvertex_prog.c
  30  *
  31  * Create a vertex program to execute the current fixed function T&L pipeline.
  32  * \author Keith Whitwell
  33  */
  34
  35
  36 #include "main/glheader.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39 #include "main/enums.h"
  40 #include "main/ffvertex_prog.h"
  41 #include "shader/program.h"
  42 #include "shader/prog_cache.h"
  43 #include "shader/prog_instruction.h"
  44 #include "shader/prog_parameter.h"
  45 #include "shader/prog_print.h"
  46 #include "shader/prog_statevars.h"
  47
  48
  49 struct state_key {
  50    unsigned light_color_material_mask:12;
  51    unsigned light_material_mask:12;
  52    unsigned light_global_enabled:1;
  53    unsigned light_local_viewer:1;
  54    unsigned light_twoside:1;
  55    unsigned light_color_material:1;
  56    unsigned material_shininess_is_zero:1;
  57    unsigned need_eye_coords:1;
  58    unsigned normalize:1;
  59    unsigned rescale_normals:1;
  60
  61    unsigned fog_source_is_depth:1;
  62    unsigned tnl_do_vertex_fog:1;
  63    unsigned separate_specular:1;
  64    unsigned fog_mode:2;
  65    unsigned point_attenuated:1;
  66    unsigned point_array:1;
  67    unsigned texture_enabled_global:1;
  68    unsigned fragprog_inputs_read:12;
  69
  70    unsigned varying_vp_inputs;
  71
  72    struct {
  73       unsigned light_enabled:1;
  74       unsigned light_eyepos3_is_zero:1;
  75       unsigned light_spotcutoff_is_180:1;
  76       unsigned light_attenuated:1;
  77       unsigned texunit_really_enabled:1;
  78       unsigned texmat_enabled:1;
  79       unsigned texgen_enabled:4;
  80       unsigned texgen_mode0:4;
  81       unsigned texgen_mode1:4;
  82       unsigned texgen_mode2:4;
  83       unsigned texgen_mode3:4;
  84    } unit[8];
  85 };
  86
  87
  88
  89 #define FOG_NONE   0
  90 #define FOG_LINEAR 1
  91 #define FOG_EXP    2
  92 #define FOG_EXP2   3
  93
  94 static GLuint translate_fog_mode( GLenum mode )
  95 {
  96    switch (mode) {
  97    case GL_LINEAR: return FOG_LINEAR;
  98    case GL_EXP: return FOG_EXP;
  99    case GL_EXP2: return FOG_EXP2;
 100    default: return FOG_NONE;
 101    }
 102 }
 103
 104
 105 #define TXG_NONE           0
 106 #define TXG_OBJ_LINEAR     1
 107 #define TXG_EYE_LINEAR     2
 108 #define TXG_SPHERE_MAP     3
 109 #define TXG_REFLECTION_MAP 4
 110 #define TXG_NORMAL_MAP     5
 111
 112 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
 113 {
 114    if (!enabled)
 115       return TXG_NONE;
 116
 117    switch (mode) {
 118    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
 119    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
 120    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 121    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 122    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 123    default: return TXG_NONE;
 124    }
 125 }
 126
 127
 128 /**
 129  * Returns bitmask of flags indicating which materials are set per-vertex
 130  * in the current VB.
 131  * XXX get these from the VBO...
 132  */
 133 static GLbitfield
 134 tnl_get_per_vertex_materials(GLcontext *ctx)
 135 {
 136    GLbitfield mask = 0x0;
 137 #if 0
 138    TNLcontext *tnl = TNL_CONTEXT(ctx);
 139    struct vertex_buffer *VB = &tnl->vb;
 140    GLuint i;
 141
 142    for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++)
 143       if (VB->AttribPtr[i] && VB->AttribPtr[i]->stride)
 144          mask |= 1 << (i - _TNL_FIRST_MAT);
 145 #endif
 146    return mask;
 147 }
 148
 149
 150 /**
 151  * Should fog be computed per-vertex?
 152  */
 153 static GLboolean
 154 tnl_get_per_vertex_fog(GLcontext *ctx)
 155 {
 156 #if 0
 157    TNLcontext *tnl = TNL_CONTEXT(ctx);
 158    return tnl->_DoVertexFog;
 159 #else
 160    return GL_FALSE;
 161 #endif
 162 }
 163
 164
 165 static GLboolean check_active_shininess( GLcontext *ctx,
 166                                          const struct state_key *key,
 167                                          GLuint side )
 168 {
 169    GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
 170
 171    if (key->light_color_material_mask & bit)
 172       return GL_TRUE;
 173
 174    if (key->light_material_mask & bit)
 175       return GL_TRUE;
 176
 177    if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
 178       return GL_TRUE;
 179
 180    return GL_FALSE;
 181 }
 182
 183
 184 static void make_state_key( GLcontext *ctx, struct state_key *key )
 185 {
 186    const struct gl_fragment_program *fp;
 187    GLuint i;
 188
 189    memset(key, 0, sizeof(struct state_key));
 190    fp = ctx->FragmentProgram._Current;
 191
 192    /* This now relies on texenvprogram.c being active:
 193     */
 194    assert(fp);
 195
 196    key->need_eye_coords = ctx->_NeedEyeCoords;
 197
 198    key->fragprog_inputs_read = fp->Base.InputsRead;
 199    key->varying_vp_inputs = ctx->varying_vp_inputs;
 200
 201    if (ctx->RenderMode == GL_FEEDBACK) {
 202       /* make sure the vertprog emits color and tex0 */
 203       key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
 204    }
 205
 206    key->separate_specular = (ctx->Light.Model.ColorControl ==
 207                              GL_SEPARATE_SPECULAR_COLOR);
 208
 209    if (ctx->Light.Enabled) {
 210       key->light_global_enabled = 1;
 211
 212       if (ctx->Light.Model.LocalViewer)
 213          key->light_local_viewer = 1;
 214
 215       if (ctx->Light.Model.TwoSide)
 216          key->light_twoside = 1;
 217
 218       if (ctx->Light.ColorMaterialEnabled) {
 219          key->light_color_material = 1;
 220          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
 221       }
 222
 223       key->light_material_mask = tnl_get_per_vertex_materials(ctx);
 224
 225       for (i = 0; i < MAX_LIGHTS; i++) {
 226          struct gl_light *light = &ctx->Light.Light[i];
 227
 228          if (light->Enabled) {
 229             key->unit[i].light_enabled = 1;
 230
 231             if (light->EyePosition[3] == 0.0)
 232                key->unit[i].light_eyepos3_is_zero = 1;
 233
 234             if (light->SpotCutoff == 180.0)
 235                key->unit[i].light_spotcutoff_is_180 = 1;
 236
 237             if (light->ConstantAttenuation != 1.0 ||
 238                 light->LinearAttenuation != 0.0 ||
 239                 light->QuadraticAttenuation != 0.0)
 240                key->unit[i].light_attenuated = 1;
 241          }
 242       }
 243
 244       if (check_active_shininess(ctx, key, 0)) {
 245          key->material_shininess_is_zero = 0;
 246       }
 247       else if (key->light_twoside &&
 248                check_active_shininess(ctx, key, 1)) {
 249          key->material_shininess_is_zero = 0;
 250       }
 251       else {
 252          key->material_shininess_is_zero = 1;
 253       }
 254    }
 255
 256    if (ctx->Transform.Normalize)
 257       key->normalize = 1;
 258
 259    if (ctx->Transform.RescaleNormals)
 260       key->rescale_normals = 1;
 261
 262    key->fog_mode = translate_fog_mode(fp->FogOption);
 263
 264    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
 265       key->fog_source_is_depth = 1;
 266
 267    key->tnl_do_vertex_fog = tnl_get_per_vertex_fog(ctx);
 268
 269    if (ctx->Point._Attenuated)
 270       key->point_attenuated = 1;
 271
 272 #if FEATURE_point_size_array
 273    if (ctx->Array.ArrayObj->PointSize.Enabled)
 274       key->point_array = 1;
 275 #endif
 276
 277    if (ctx->Texture._TexGenEnabled ||
 278        ctx->Texture._TexMatEnabled ||
 279        ctx->Texture._EnabledUnits)
 280       key->texture_enabled_global = 1;
 281
 282    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
 283       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 284
 285       if (texUnit->_ReallyEnabled)
 286          key->unit[i].texunit_really_enabled = 1;
 287
 288       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
 289          key->unit[i].texmat_enabled = 1;
 290
 291       if (texUnit->TexGenEnabled) {
 292          key->unit[i].texgen_enabled = 1;
 293
 294          key->unit[i].texgen_mode0 =
 295             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 296                               texUnit->GenModeS );
 297          key->unit[i].texgen_mode1 =
 298             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 299                               texUnit->GenModeT );
 300          key->unit[i].texgen_mode2 =
 301             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 302                               texUnit->GenModeR );
 303          key->unit[i].texgen_mode3 =
 304             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 305                               texUnit->GenModeQ );
 306       }
 307    }
 308 }
 309
 310
 311
 312 /* Very useful debugging tool - produces annotated listing of
 313  * generated program with line/function references for each
 314  * instruction back into this file:
 315  */
 316 #define DISASSEM 0
 317
 318 /* Should be tunable by the driver - do we want to do matrix
 319  * multiplications with DP4's or with MUL/MAD's?  SSE works better
 320  * with the latter, drivers may differ.
 321  */
 322 #define PREFER_DP4 0
 323
 324
 325 /* Use uregs to represent registers internally, translate to Mesa's
 326  * expected formats on emit.
 327  *
 328  * NOTE: These are passed by value extensively in this file rather
 329  * than as usual by pointer reference.  If this disturbs you, try
 330  * remembering they are just 32bits in size.
 331  *
 332  * GCC is smart enough to deal with these dword-sized structures in
 333  * much the same way as if I had defined them as dwords and was using
 334  * macros to access and set the fields.  This is much nicer and easier
 335  * to evolve.
 336  */
 337 struct ureg {
 338    GLuint file:4;
 339    GLint idx:9;      /* relative addressing may be negative */
 340                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
 341    GLuint negate:1;
 342    GLuint swz:12;
 343    GLuint pad:6;
 344 };
 345
 346
 347 struct tnl_program {
 348    const struct state_key *state;
 349    struct gl_vertex_program *program;
 350    GLint max_inst;  /** number of instructions allocated for program */
 351
 352    GLuint temp_in_use;
 353    GLuint temp_reserved;
 354
 355    struct ureg eye_position;
 356    struct ureg eye_position_z;
 357    struct ureg eye_position_normalized;
 358    struct ureg transformed_normal;
 359    struct ureg identity;
 360
 361    GLuint materials;
 362    GLuint color_materials;
 363 };
 364
 365
 366 static const struct ureg undef = {
 367    PROGRAM_UNDEFINED,
 368    0,
 369    0,
 370    0,
 371    0
 372 };
 373
 374 /* Local shorthand:
 375  */
 376 #define X    SWIZZLE_X
 377 #define Y    SWIZZLE_Y
 378 #define Z    SWIZZLE_Z
 379 #define W    SWIZZLE_W
 380
 381
 382 /* Construct a ureg:
 383  */
 384 static struct ureg make_ureg(GLuint file, GLint idx)
 385 {
 386    struct ureg reg;
 387    reg.file = file;
 388    reg.idx = idx;
 389    reg.negate = 0;
 390    reg.swz = SWIZZLE_NOOP;
 391    reg.pad = 0;
 392    return reg;
 393 }
 394
 395
 396
 397 static struct ureg negate( struct ureg reg )
 398 {
 399    reg.negate ^= 1;
 400    return reg;
 401 }
 402
 403
 404 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 405 {
 406    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 407                            GET_SWZ(reg.swz, y),
 408                            GET_SWZ(reg.swz, z),
 409                            GET_SWZ(reg.swz, w));
 410
 411    return reg;
 412 }
 413
 414
 415 static struct ureg swizzle1( struct ureg reg, int x )
 416 {
 417    return swizzle(reg, x, x, x, x);
 418 }
 419
 420
 421 static struct ureg get_temp( struct tnl_program *p )
 422 {
 423    int bit = _mesa_ffs( ~p->temp_in_use );
 424    if (!bit) {
 425       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 426       _mesa_exit(1);
 427    }
 428
 429    if ((GLuint) bit > p->program->Base.NumTemporaries)
 430       p->program->Base.NumTemporaries = bit;
 431
 432    p->temp_in_use |= 1<<(bit-1);
 433    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 434 }
 435
 436
 437 static struct ureg reserve_temp( struct tnl_program *p )
 438 {
 439    struct ureg temp = get_temp( p );
 440    p->temp_reserved |= 1<<temp.idx;
 441    return temp;
 442 }
 443
 444
 445 static void release_temp( struct tnl_program *p, struct ureg reg )
 446 {
 447    if (reg.file == PROGRAM_TEMPORARY) {
 448       p->temp_in_use &= ~(1<<reg.idx);
 449       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 450    }
 451 }
 452
 453 static void release_temps( struct tnl_program *p )
 454 {
 455    p->temp_in_use = p->temp_reserved;
 456 }
 457
 458
 459 static struct ureg register_param5(struct tnl_program *p,
 460                                    GLint s0,
 461                                    GLint s1,
 462                                    GLint s2,
 463                                    GLint s3,
 464                                    GLint s4)
 465 {
 466    gl_state_index tokens[STATE_LENGTH];
 467    GLint idx;
 468    tokens[0] = s0;
 469    tokens[1] = s1;
 470    tokens[2] = s2;
 471    tokens[3] = s3;
 472    tokens[4] = s4;
 473    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 474    return make_ureg(PROGRAM_STATE_VAR, idx);
 475 }
 476
 477
 478 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 479 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 480 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 481 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 482
 483
 484
 485 /**
 486  * \param input  one of VERT_ATTRIB_x tokens.
 487  */
 488 static struct ureg register_input( struct tnl_program *p, GLuint input )
 489 {
 490    /* Material attribs are passed here as inputs >= 32
 491     */
 492    if (input >= 32 || (p->state->varying_vp_inputs & (1<<input))) {
 493       p->program->Base.InputsRead |= (1<<input);
 494       return make_ureg(PROGRAM_INPUT, input);
 495    }
 496    else {
 497       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
 498    }
 499 }
 500
 501
 502 /**
 503  * \param input  one of VERT_RESULT_x tokens.
 504  */
 505 static struct ureg register_output( struct tnl_program *p, GLuint output )
 506 {
 507    p->program->Base.OutputsWritten |= (1<<output);
 508    return make_ureg(PROGRAM_OUTPUT, output);
 509 }
 510
 511
 512 static struct ureg register_const4f( struct tnl_program *p,
 513                               GLfloat s0,
 514                               GLfloat s1,
 515                               GLfloat s2,
 516                               GLfloat s3)
 517 {
 518    GLfloat values[4];
 519    GLint idx;
 520    GLuint swizzle;
 521    values[0] = s0;
 522    values[1] = s1;
 523    values[2] = s2;
 524    values[3] = s3;
 525    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 526                                      &swizzle );
 527    ASSERT(swizzle == SWIZZLE_NOOP);
 528    return make_ureg(PROGRAM_CONSTANT, idx);
 529 }
 530
 531 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 532 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 533 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 534 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 535
 536 static GLboolean is_undef( struct ureg reg )
 537 {
 538    return reg.file == PROGRAM_UNDEFINED;
 539 }
 540
 541
 542 static struct ureg get_identity_param( struct tnl_program *p )
 543 {
 544    if (is_undef(p->identity))
 545       p->identity = register_const4f(p, 0,0,0,1);
 546
 547    return p->identity;
 548 }
 549
 550 static void register_matrix_param5( struct tnl_program *p,
 551                                     GLint s0, /* modelview, projection, etc */
 552                                     GLint s1, /* texture matrix number */
 553                                     GLint s2, /* first row */
 554                                     GLint s3, /* last row */
 555                                     GLint s4, /* inverse, transpose, etc */
 556                                     struct ureg *matrix )
 557 {
 558    GLint i;
 559
 560    /* This is a bit sad as the support is there to pull the whole
 561     * matrix out in one go:
 562     */
 563    for (i = 0; i <= s3 - s2; i++)
 564       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
 565 }
 566
 567
 568 static void emit_arg( struct prog_src_register *src,
 569                       struct ureg reg )
 570 {
 571    src->File = reg.file;
 572    src->Index = reg.idx;
 573    src->Swizzle = reg.swz;
 574    src->NegateBase = reg.negate ? NEGATE_XYZW : 0;
 575    src->Abs = 0;
 576    src->NegateAbs = 0;
 577    src->RelAddr = 0;
 578    /* Check that bitfield sizes aren't exceeded */
 579    ASSERT(src->Index == reg.idx);
 580 }
 581
 582
 583 static void emit_dst( struct prog_dst_register *dst,
 584                       struct ureg reg, GLuint mask )
 585 {
 586    dst->File = reg.file;
 587    dst->Index = reg.idx;
 588    /* allow zero as a shorthand for xyzw */
 589    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 590    dst->CondMask = COND_TR;  /* always pass cond test */
 591    dst->CondSwizzle = SWIZZLE_NOOP;
 592    dst->CondSrc = 0;
 593    dst->pad = 0;
 594    /* Check that bitfield sizes aren't exceeded */
 595    ASSERT(dst->Index == reg.idx);
 596 }
 597
 598
 599 static void debug_insn( struct prog_instruction *inst, const char *fn,
 600                         GLuint line )
 601 {
 602    if (DISASSEM) {
 603       static const char *last_fn;
 604
 605       if (fn != last_fn) {
 606          last_fn = fn;
 607          _mesa_printf("%s:\n", fn);
 608       }
 609
 610       _mesa_printf("%d:\t", line);
 611       _mesa_print_instruction(inst);
 612    }
 613 }
 614
 615
 616 static void emit_op3fn(struct tnl_program *p,
 617                        enum prog_opcode op,
 618                        struct ureg dest,
 619                        GLuint mask,
 620                        struct ureg src0,
 621                        struct ureg src1,
 622                        struct ureg src2,
 623                        const char *fn,
 624                        GLuint line)
 625 {
 626    GLuint nr;
 627    struct prog_instruction *inst;
 628
 629    assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
 630
 631    if (p->program->Base.NumInstructions == p->max_inst) {
 632       /* need to extend the program's instruction array */
 633       struct prog_instruction *newInst;
 634
 635       /* double the size */
 636       p->max_inst *= 2;
 637
 638       newInst = _mesa_alloc_instructions(p->max_inst);
 639       if (!newInst) {
 640          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
 641          return;
 642       }
 643
 644       _mesa_copy_instructions(newInst,
 645                               p->program->Base.Instructions,
 646                               p->program->Base.NumInstructions);
 647
 648       _mesa_free_instructions(p->program->Base.Instructions,
 649                               p->program->Base.NumInstructions);
 650
 651       p->program->Base.Instructions = newInst;
 652    }
 653
 654    nr = p->program->Base.NumInstructions++;
 655
 656    inst = &p->program->Base.Instructions[nr];
 657    inst->Opcode = (enum prog_opcode) op;
 658    inst->StringPos = 0;
 659    inst->Data = 0;
 660
 661    emit_arg( &inst->SrcReg[0], src0 );
 662    emit_arg( &inst->SrcReg[1], src1 );
 663    emit_arg( &inst->SrcReg[2], src2 );
 664
 665    emit_dst( &inst->DstReg, dest, mask );
 666
 667    debug_insn(inst, fn, line);
 668 }
 669
 670
 671 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 672    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 673
 674 #define emit_op2(p, op, dst, mask, src0, src1) \
 675     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 676
 677 #define emit_op1(p, op, dst, mask, src0) \
 678     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 679
 680
 681 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 682 {
 683    if (reg.file == PROGRAM_TEMPORARY &&
 684        !(p->temp_reserved & (1<<reg.idx)))
 685       return reg;
 686    else {
 687       struct ureg temp = get_temp(p);
 688       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 689       return temp;
 690    }
 691 }
 692
 693
 694 /* Currently no tracking performed of input/output/register size or
 695  * active elements.  Could be used to reduce these operations, as
 696  * could the matrix type.
 697  */
 698 static void emit_matrix_transform_vec4( struct tnl_program *p,
 699                                         struct ureg dest,
 700                                         const struct ureg *mat,
 701                                         struct ureg src)
 702 {
 703    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 704    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 705    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 706    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 707 }
 708
 709
 710 /* This version is much easier to implement if writemasks are not
 711  * supported natively on the target or (like SSE), the target doesn't
 712  * have a clean/obvious dotproduct implementation.
 713  */
 714 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 715                                                   struct ureg dest,
 716                                                   const struct ureg *mat,
 717                                                   struct ureg src)
 718 {
 719    struct ureg tmp;
 720
 721    if (dest.file != PROGRAM_TEMPORARY)
 722       tmp = get_temp(p);
 723    else
 724       tmp = dest;
 725
 726    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 727    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 728    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 729    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 730
 731    if (dest.file != PROGRAM_TEMPORARY)
 732       release_temp(p, tmp);
 733 }
 734
 735
 736 static void emit_matrix_transform_vec3( struct tnl_program *p,
 737                                         struct ureg dest,
 738                                         const struct ureg *mat,
 739                                         struct ureg src)
 740 {
 741    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 742    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 743    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 744 }
 745
 746
 747 static void emit_normalize_vec3( struct tnl_program *p,
 748                                  struct ureg dest,
 749                                  struct ureg src )
 750 {
 751 #if 0
 752    /* XXX use this when drivers are ready for NRM3 */
 753    emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
 754 #else
 755    struct ureg tmp = get_temp(p);
 756    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
 757    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
 758    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
 759    release_temp(p, tmp);
 760 #endif
 761 }
 762
 763
 764 static void emit_passthrough( struct tnl_program *p,
 765                               GLuint input,
 766                               GLuint output )
 767 {
 768    struct ureg out = register_output(p, output);
 769    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 770 }
 771
 772
 773 static struct ureg get_eye_position( struct tnl_program *p )
 774 {
 775    if (is_undef(p->eye_position)) {
 776       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 777       struct ureg modelview[4];
 778
 779       p->eye_position = reserve_temp(p);
 780
 781       if (PREFER_DP4) {
 782          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 783                                  0, modelview );
 784
 785          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 786       }
 787       else {
 788          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 789                                  STATE_MATRIX_TRANSPOSE, modelview );
 790
 791          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 792       }
 793    }
 794
 795    return p->eye_position;
 796 }
 797
 798
 799 static struct ureg get_eye_position_z( struct tnl_program *p )
 800 {
 801    if (!is_undef(p->eye_position))
 802       return swizzle1(p->eye_position, Z);
 803
 804    if (is_undef(p->eye_position_z)) {
 805       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 806       struct ureg modelview[4];
 807
 808       p->eye_position_z = reserve_temp(p);
 809
 810       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 811                               0, modelview );
 812
 813       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
 814    }
 815
 816    return p->eye_position_z;
 817 }
 818
 819
 820 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 821 {
 822    if (is_undef(p->eye_position_normalized)) {
 823       struct ureg eye = get_eye_position(p);
 824       p->eye_position_normalized = reserve_temp(p);
 825       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 826    }
 827
 828    return p->eye_position_normalized;
 829 }
 830
 831
 832 static struct ureg get_transformed_normal( struct tnl_program *p )
 833 {
 834    if (is_undef(p->transformed_normal) &&
 835        !p->state->need_eye_coords &&
 836        !p->state->normalize &&
 837        !(p->state->need_eye_coords == p->state->rescale_normals))
 838    {
 839       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
 840    }
 841    else if (is_undef(p->transformed_normal))
 842    {
 843       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 844       struct ureg mvinv[3];
 845       struct ureg transformed_normal = reserve_temp(p);
 846
 847       if (p->state->need_eye_coords) {
 848          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
 849                                  STATE_MATRIX_INVTRANS, mvinv );
 850
 851          /* Transform to eye space:
 852           */
 853          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
 854          normal = transformed_normal;
 855       }
 856
 857       /* Normalize/Rescale:
 858        */
 859       if (p->state->normalize) {
 860          emit_normalize_vec3( p, transformed_normal, normal );
 861          normal = transformed_normal;
 862       }
 863       else if (p->state->need_eye_coords == p->state->rescale_normals) {
 864          /* This is already adjusted for eye/non-eye rendering:
 865           */
 866          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 867                                                STATE_NORMAL_SCALE);
 868
 869          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
 870          normal = transformed_normal;
 871       }
 872
 873       assert(normal.file == PROGRAM_TEMPORARY);
 874       p->transformed_normal = normal;
 875    }
 876
 877    return p->transformed_normal;
 878 }
 879
 880
 881 static void build_hpos( struct tnl_program *p )
 882 {
 883    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 884    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 885    struct ureg mvp[4];
 886
 887    if (PREFER_DP4) {
 888       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 889                               0, mvp );
 890       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 891    }
 892    else {
 893       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 894                               STATE_MATRIX_TRANSPOSE, mvp );
 895       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 896    }
 897 }
 898
 899
 900 static GLuint material_attrib( GLuint side, GLuint property )
 901 {
 902    return ((property - STATE_AMBIENT) * 2 +
 903            side);
 904 }
 905
 906
 907 /**
 908  * Get a bitmask of which material values vary on a per-vertex basis.
 909  */
 910 static void set_material_flags( struct tnl_program *p )
 911 {
 912    p->color_materials = 0;
 913    p->materials = 0;
 914
 915    if (p->state->light_color_material) {
 916       p->materials =
 917          p->color_materials = p->state->light_color_material_mask;
 918    }
 919
 920    p->materials |= p->state->light_material_mask;
 921 }
 922
 923
 924 /* XXX temporary!!! */
 925 #define _TNL_ATTRIB_MAT_FRONT_AMBIENT 32
 926
 927 static struct ureg get_material( struct tnl_program *p, GLuint side,
 928                                  GLuint property )
 929 {
 930    GLuint attrib = material_attrib(side, property);
 931
 932    if (p->color_materials & (1<<attrib))
 933       return register_input(p, VERT_ATTRIB_COLOR0);
 934    else if (p->materials & (1<<attrib))
 935       return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
 936    else
 937       return register_param3( p, STATE_MATERIAL, side, property );
 938 }
 939
 940 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
 941                                    MAT_BIT_FRONT_AMBIENT | \
 942                                    MAT_BIT_FRONT_DIFFUSE) << (side))
 943
 944
 945 /**
 946  * Either return a precalculated constant value or emit code to
 947  * calculate these values dynamically in the case where material calls
 948  * are present between begin/end pairs.
 949  *
 950  * Probably want to shift this to the program compilation phase - if
 951  * we always emitted the calculation here, a smart compiler could
 952  * detect that it was constant (given a certain set of inputs), and
 953  * lift it out of the main loop.  That way the programs created here
 954  * would be independent of the vertex_buffer details.
 955  */
 956 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 957 {
 958    if (p->materials & SCENE_COLOR_BITS(side)) {
 959       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 960       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 961       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 962       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 963       struct ureg tmp = make_temp(p, material_diffuse);
 964       emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient,
 965                material_ambient, material_emission);
 966       return tmp;
 967    }
 968    else
 969       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 970 }
 971
 972
 973 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 974                                   GLuint side, GLuint property )
 975 {
 976    GLuint attrib = material_attrib(side, property);
 977    if (p->materials & (1<<attrib)) {
 978       struct ureg light_value =
 979          register_param3(p, STATE_LIGHT, light, property);
 980       struct ureg material_value = get_material(p, side, property);
 981       struct ureg tmp = get_temp(p);
 982       emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
 983       return tmp;
 984    }
 985    else
 986       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 987 }
 988
 989
 990 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 991                                                 GLuint i,
 992                                                 struct ureg VPpli,
 993                                                 struct ureg dist )
 994 {
 995    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 996                                              STATE_ATTENUATION);
 997    struct ureg att = get_temp(p);
 998
 999    /* Calculate spot attenuation:
1000     */
1001    if (!p->state->unit[i].light_spotcutoff_is_180) {
1002       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
1003                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
1004       struct ureg spot = get_temp(p);
1005       struct ureg slt = get_temp(p);
1006
1007       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
1008       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
1009       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
1010       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
1011
1012       release_temp(p, spot);
1013       release_temp(p, slt);
1014    }
1015
1016    /* Calculate distance attenuation:
1017     */
1018    if (p->state->unit[i].light_attenuated) {
1019
1020       /* 1/d,d,d,1/d */
1021       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
1022       /* 1,d,d*d,1/d */
1023       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
1024       /* 1/dist-atten */
1025       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
1026
1027       if (!p->state->unit[i].light_spotcutoff_is_180) {
1028          /* dist-atten */
1029          emit_op1(p, OPCODE_RCP, dist, 0, dist);
1030          /* spot-atten * dist-atten */
1031          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
1032       } else {
1033          /* dist-atten */
1034          emit_op1(p, OPCODE_RCP, att, 0, dist);
1035       }
1036    }
1037
1038    return att;
1039 }
1040
1041
1042 /**
1043  * Compute:
1044  *   lit.y = MAX(0, dots.x)
1045  *   lit.z = SLT(0, dots.x)
1046  */
1047 static void emit_degenerate_lit( struct tnl_program *p,
1048                                  struct ureg lit,
1049                                  struct ureg dots )
1050 {
1051    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
1052
1053    /* Note that lit.x & lit.w will not be examined.  Note also that
1054     * dots.xyzw == dots.xxxx.
1055     */
1056
1057    /* MAX lit, id, dots;
1058     */
1059    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
1060
1061    /* result[2] = (in > 0 ? 1 : 0)
1062     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
1063     */
1064    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
1065 }
1066
1067
1068 /* Need to add some addtional parameters to allow lighting in object
1069  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
1070  * space lighting.
1071  */
1072 static void build_lighting( struct tnl_program *p )
1073 {
1074    const GLboolean twoside = p->state->light_twoside;
1075    const GLboolean separate = p->state->separate_specular;
1076    GLuint nr_lights = 0, count = 0;
1077    struct ureg normal = get_transformed_normal(p);
1078    struct ureg lit = get_temp(p);
1079    struct ureg dots = get_temp(p);
1080    struct ureg _col0 = undef, _col1 = undef;
1081    struct ureg _bfc0 = undef, _bfc1 = undef;
1082    GLuint i;
1083
1084    /*
1085     * NOTE:
1086     * dot.x = dot(normal, VPpli)
1087     * dot.y = dot(normal, halfAngle)
1088     * dot.z = back.shininess
1089     * dot.w = front.shininess
1090     */
1091
1092    for (i = 0; i < MAX_LIGHTS; i++)
1093       if (p->state->unit[i].light_enabled)
1094          nr_lights++;
1095
1096    set_material_flags(p);
1097
1098    {
1099       if (!p->state->material_shininess_is_zero) {
1100          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
1101          emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
1102          release_temp(p, shininess);
1103       }
1104
1105       _col0 = make_temp(p, get_scenecolor(p, 0));
1106       if (separate)
1107          _col1 = make_temp(p, get_identity_param(p));
1108       else
1109          _col1 = _col0;
1110
1111    }
1112
1113    if (twoside) {
1114       if (!p->state->material_shininess_is_zero) {
1115          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1116          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1117                   negate(swizzle1(shininess,X)));
1118          release_temp(p, shininess);
1119       }
1120
1121       _bfc0 = make_temp(p, get_scenecolor(p, 1));
1122       if (separate)
1123          _bfc1 = make_temp(p, get_identity_param(p));
1124       else
1125          _bfc1 = _bfc0;
1126    }
1127
1128    /* If no lights, still need to emit the scenecolor.
1129     */
1130    {
1131       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
1132       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1133    }
1134
1135    if (separate) {
1136       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
1137       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1138    }
1139
1140    if (twoside) {
1141       struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
1142       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1143    }
1144
1145    if (twoside && separate) {
1146       struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
1147       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1148    }
1149
1150    if (nr_lights == 0) {
1151       release_temps(p);
1152       return;
1153    }
1154
1155    for (i = 0; i < MAX_LIGHTS; i++) {
1156       if (p->state->unit[i].light_enabled) {
1157          struct ureg half = undef;
1158          struct ureg att = undef, VPpli = undef;
1159
1160          count++;
1161
1162          if (p->state->unit[i].light_eyepos3_is_zero) {
1163             /* Can used precomputed constants in this case.
1164              * Attenuation never applies to infinite lights.
1165              */
1166             VPpli = register_param3(p, STATE_INTERNAL,
1167                                     STATE_LIGHT_POSITION_NORMALIZED, i);
1168
1169             if (!p->state->material_shininess_is_zero) {
1170                if (p->state->light_local_viewer) {
1171                   struct ureg eye_hat = get_eye_position_normalized(p);
1172                   half = get_temp(p);
1173                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1174                   emit_normalize_vec3(p, half, half);
1175                } else {
1176                   half = register_param3(p, STATE_INTERNAL,
1177                                          STATE_LIGHT_HALF_VECTOR, i);
1178                }
1179             }
1180          }
1181          else {
1182             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1183                                                STATE_LIGHT_POSITION, i);
1184             struct ureg V = get_eye_position(p);
1185             struct ureg dist = get_temp(p);
1186
1187             VPpli = get_temp(p);
1188
1189             /* Calculate VPpli vector
1190              */
1191             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1192
1193             /* Normalize VPpli.  The dist value also used in
1194              * attenuation below.
1195              */
1196             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1197             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1198             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1199
1200             /* Calculate attenuation:
1201              */
1202             if (!p->state->unit[i].light_spotcutoff_is_180 ||
1203                 p->state->unit[i].light_attenuated) {
1204                att = calculate_light_attenuation(p, i, VPpli, dist);
1205             }
1206
1207             /* Calculate viewer direction, or use infinite viewer:
1208              */
1209             if (!p->state->material_shininess_is_zero) {
1210                half = get_temp(p);
1211
1212                if (p->state->light_local_viewer) {
1213                   struct ureg eye_hat = get_eye_position_normalized(p);
1214                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1215                }
1216                else {
1217                   struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1218                   emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1219                }
1220
1221                emit_normalize_vec3(p, half, half);
1222             }
1223
1224             release_temp(p, dist);
1225          }
1226
1227          /* Calculate dot products:
1228           */
1229          if (p->state->material_shininess_is_zero) {
1230             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1231          }
1232          else {
1233             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1234             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1235          }
1236
1237          /* Front face lighting:
1238           */
1239          {
1240             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1241             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1242             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1243             struct ureg res0, res1;
1244             GLuint mask0, mask1;
1245
1246             if (count == nr_lights) {
1247                if (separate) {
1248                   mask0 = WRITEMASK_XYZ;
1249                   mask1 = WRITEMASK_XYZ;
1250                   res0 = register_output( p, VERT_RESULT_COL0 );
1251                   res1 = register_output( p, VERT_RESULT_COL1 );
1252                }
1253                else {
1254                   mask0 = 0;
1255                   mask1 = WRITEMASK_XYZ;
1256                   res0 = _col0;
1257                   res1 = register_output( p, VERT_RESULT_COL0 );
1258                }
1259             } else {
1260                mask0 = 0;
1261                mask1 = 0;
1262                res0 = _col0;
1263                res1 = _col1;
1264             }
1265
1266             if (!is_undef(att)) {
1267                /* light is attenuated by distance */
1268                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1269                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1270                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1271             }
1272             else if (!p->state->material_shininess_is_zero) {
1273                /* there's a non-zero specular term */
1274                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1275                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1276             }
1277             else {
1278                /* no attenutation, no specular */
1279                emit_degenerate_lit(p, lit, dots);
1280                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1281             }
1282
1283             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1284             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1285
1286             release_temp(p, ambient);
1287             release_temp(p, diffuse);
1288             release_temp(p, specular);
1289          }
1290
1291          /* Back face lighting:
1292           */
1293          if (twoside) {
1294             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1295             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1296             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1297             struct ureg res0, res1;
1298             GLuint mask0, mask1;
1299
1300             if (count == nr_lights) {
1301                if (separate) {
1302                   mask0 = WRITEMASK_XYZ;
1303                   mask1 = WRITEMASK_XYZ;
1304                   res0 = register_output( p, VERT_RESULT_BFC0 );
1305                   res1 = register_output( p, VERT_RESULT_BFC1 );
1306                }
1307                else {
1308                   mask0 = 0;
1309                   mask1 = WRITEMASK_XYZ;
1310                   res0 = _bfc0;
1311                   res1 = register_output( p, VERT_RESULT_BFC0 );
1312                }
1313             } else {
1314                res0 = _bfc0;
1315                res1 = _bfc1;
1316                mask0 = 0;
1317                mask1 = 0;
1318             }
1319
1320             dots = negate(swizzle(dots,X,Y,W,Z));
1321
1322             if (!is_undef(att)) {
1323                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1324                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1325                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1326             }
1327             else if (!p->state->material_shininess_is_zero) {
1328                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1329                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1330             }
1331             else {
1332                emit_degenerate_lit(p, lit, dots);
1333                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1334             }
1335
1336             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1337             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1338             /* restore negate flag for next lighting */
1339             dots = negate(dots);
1340
1341             release_temp(p, ambient);
1342             release_temp(p, diffuse);
1343             release_temp(p, specular);
1344          }
1345
1346          release_temp(p, half);
1347          release_temp(p, VPpli);
1348          release_temp(p, att);
1349       }
1350    }
1351
1352    release_temps( p );
1353 }
1354
1355
1356 static void build_fog( struct tnl_program *p )
1357 {
1358    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1359    struct ureg input;
1360
1361    if (p->state->fog_source_is_depth) {
1362       input = get_eye_position_z(p);
1363    }
1364    else {
1365       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1366    }
1367
1368    if (p->state->fog_mode && p->state->tnl_do_vertex_fog) {
1369       struct ureg params = register_param2(p, STATE_INTERNAL,
1370                                            STATE_FOG_PARAMS_OPTIMIZED);
1371       struct ureg tmp = get_temp(p);
1372       GLboolean useabs = (p->state->fog_mode != FOG_EXP2);
1373
1374       if (useabs) {
1375          emit_op1(p, OPCODE_ABS, tmp, 0, input);
1376       }
1377
1378       switch (p->state->fog_mode) {
1379       case FOG_LINEAR: {
1380          struct ureg id = get_identity_param(p);
1381          emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
1382                         swizzle1(params,X), swizzle1(params,Y));
1383          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
1384          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
1385          break;
1386       }
1387       case FOG_EXP:
1388          emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
1389                         swizzle1(params,Z));
1390          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1391          break;
1392       case FOG_EXP2:
1393          emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
1394          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
1395          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1396          break;
1397       }
1398
1399       release_temp(p, tmp);
1400    }
1401    else {
1402       /* results = incoming fog coords (compute fog per-fragment later)
1403        *
1404        * KW:  Is it really necessary to do anything in this case?
1405        * BP: Yes, we always need to compute the absolute value, unless
1406        * we want to push that down into the fragment program...
1407        */
1408       GLboolean useabs = GL_TRUE;
1409       emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input);
1410    }
1411 }
1412
1413
1414 static void build_reflect_texgen( struct tnl_program *p,
1415                                   struct ureg dest,
1416                                   GLuint writemask )
1417 {
1418    struct ureg normal = get_transformed_normal(p);
1419    struct ureg eye_hat = get_eye_position_normalized(p);
1420    struct ureg tmp = get_temp(p);
1421
1422    /* n.u */
1423    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1424    /* 2n.u */
1425    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1426    /* (-2n.u)n + u */
1427    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1428
1429    release_temp(p, tmp);
1430 }
1431
1432
1433 static void build_sphere_texgen( struct tnl_program *p,
1434                                  struct ureg dest,
1435                                  GLuint writemask )
1436 {
1437    struct ureg normal = get_transformed_normal(p);
1438    struct ureg eye_hat = get_eye_position_normalized(p);
1439    struct ureg tmp = get_temp(p);
1440    struct ureg half = register_scalar_const(p, .5);
1441    struct ureg r = get_temp(p);
1442    struct ureg inv_m = get_temp(p);
1443    struct ureg id = get_identity_param(p);
1444
1445    /* Could share the above calculations, but it would be
1446     * a fairly odd state for someone to set (both sphere and
1447     * reflection active for different texture coordinate
1448     * components.  Of course - if two texture units enable
1449     * reflect and/or sphere, things start to tilt in favour
1450     * of seperating this out:
1451     */
1452
1453    /* n.u */
1454    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1455    /* 2n.u */
1456    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1457    /* (-2n.u)n + u */
1458    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1459    /* r + 0,0,1 */
1460    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1461    /* rx^2 + ry^2 + (rz+1)^2 */
1462    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1463    /* 2/m */
1464    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1465    /* 1/m */
1466    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1467    /* r/m + 1/2 */
1468    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1469
1470    release_temp(p, tmp);
1471    release_temp(p, r);
1472    release_temp(p, inv_m);
1473 }
1474
1475
1476 static void build_texture_transform( struct tnl_program *p )
1477 {
1478    GLuint i, j;
1479
1480    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
1481
1482       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
1483          continue;
1484
1485       if (p->state->unit[i].texgen_enabled ||
1486           p->state->unit[i].texmat_enabled) {
1487
1488          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1489          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1490          struct ureg out_texgen = undef;
1491
1492          if (p->state->unit[i].texgen_enabled) {
1493             GLuint copy_mask = 0;
1494             GLuint sphere_mask = 0;
1495             GLuint reflect_mask = 0;
1496             GLuint normal_mask = 0;
1497             GLuint modes[4];
1498
1499             if (texmat_enabled)
1500                out_texgen = get_temp(p);
1501             else
1502                out_texgen = out;
1503
1504             modes[0] = p->state->unit[i].texgen_mode0;
1505             modes[1] = p->state->unit[i].texgen_mode1;
1506             modes[2] = p->state->unit[i].texgen_mode2;
1507             modes[3] = p->state->unit[i].texgen_mode3;
1508
1509             for (j = 0; j < 4; j++) {
1510                switch (modes[j]) {
1511                case TXG_OBJ_LINEAR: {
1512                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1513                   struct ureg plane =
1514                      register_param3(p, STATE_TEXGEN, i,
1515                                      STATE_TEXGEN_OBJECT_S + j);
1516
1517                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1518                            obj, plane );
1519                   break;
1520                }
1521                case TXG_EYE_LINEAR: {
1522                   struct ureg eye = get_eye_position(p);
1523                   struct ureg plane =
1524                      register_param3(p, STATE_TEXGEN, i,
1525                                      STATE_TEXGEN_EYE_S + j);
1526
1527                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1528                            eye, plane );
1529                   break;
1530                }
1531                case TXG_SPHERE_MAP:
1532                   sphere_mask |= WRITEMASK_X << j;
1533                   break;
1534                case TXG_REFLECTION_MAP:
1535                   reflect_mask |= WRITEMASK_X << j;
1536                   break;
1537                case TXG_NORMAL_MAP:
1538                   normal_mask |= WRITEMASK_X << j;
1539                   break;
1540                case TXG_NONE:
1541                   copy_mask |= WRITEMASK_X << j;
1542                }
1543             }
1544
1545             if (sphere_mask) {
1546                build_sphere_texgen(p, out_texgen, sphere_mask);
1547             }
1548
1549             if (reflect_mask) {
1550                build_reflect_texgen(p, out_texgen, reflect_mask);
1551             }
1552
1553             if (normal_mask) {
1554                struct ureg normal = get_transformed_normal(p);
1555                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1556             }
1557
1558             if (copy_mask) {
1559                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1560                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1561             }
1562          }
1563
1564          if (texmat_enabled) {
1565             struct ureg texmat[4];
1566             struct ureg in = (!is_undef(out_texgen) ?
1567                               out_texgen :
1568                               register_input(p, VERT_ATTRIB_TEX0+i));
1569             if (PREFER_DP4) {
1570                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1571                                        0, texmat );
1572                emit_matrix_transform_vec4( p, out, texmat, in );
1573             }
1574             else {
1575                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1576                                        STATE_MATRIX_TRANSPOSE, texmat );
1577                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1578             }
1579          }
1580
1581          release_temps(p);
1582       }
1583       else {
1584          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1585       }
1586    }
1587 }
1588
1589
1590 /**
1591  * Point size attenuation computation.
1592  */
1593 static void build_atten_pointsize( struct tnl_program *p )
1594 {
1595    struct ureg eye = get_eye_position_z(p);
1596    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1597    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1598    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1599    struct ureg ut = get_temp(p);
1600
1601    /* dist = |eyez| */
1602    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1603    /* p1 + dist * (p2 + dist * p3); */
1604    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1605                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1606    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1607                 ut, swizzle1(state_attenuation, X));
1608
1609    /* 1 / sqrt(factor) */
1610    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1611
1612 #if 0
1613    /* out = pointSize / sqrt(factor) */
1614    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1615 #else
1616    /* this is a good place to clamp the point size since there's likely
1617     * no hardware registers to clamp point size at rasterization time.
1618     */
1619    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1620    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1621    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1622 #endif
1623
1624    release_temp(p, ut);
1625 }
1626
1627
1628 /**
1629  * Emit constant point size.
1630  */
1631 static void build_constant_pointsize( struct tnl_program *p )
1632 {
1633    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1634    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1635    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, state_size);
1636 }
1637
1638
1639 /**
1640  * Pass-though per-vertex point size, from user's point size array.
1641  */
1642 static void build_array_pointsize( struct tnl_program *p )
1643 {
1644    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
1645    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1646    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
1647 }
1648
1649
1650 static void build_tnl_program( struct tnl_program *p )
1651 {   /* Emit the program, starting with modelviewproject:
1652     */
1653    build_hpos(p);
1654
1655    /* Lighting calculations:
1656     */
1657    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1658       if (p->state->light_global_enabled)
1659          build_lighting(p);
1660       else {
1661          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1662             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1663
1664          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1665             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1666       }
1667    }
1668
1669    if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
1670        p->state->fog_mode != FOG_NONE)
1671       build_fog(p);
1672
1673    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1674       build_texture_transform(p);
1675
1676    if (p->state->point_attenuated)
1677       build_atten_pointsize(p);
1678    else if (p->state->point_array)
1679       build_array_pointsize(p);
1680 #if 0
1681    else
1682       build_constant_pointsize(p);
1683 #else
1684    (void) build_constant_pointsize;
1685 #endif
1686
1687    /* Finish up:
1688     */
1689    emit_op1(p, OPCODE_END, undef, 0, undef);
1690
1691    /* Disassemble:
1692     */
1693    if (DISASSEM) {
1694       _mesa_printf ("\n");
1695    }
1696 }
1697
1698
1699 static void
1700 create_new_program( const struct state_key *key,
1701                     struct gl_vertex_program *program,
1702                     GLuint max_temps)
1703 {
1704    struct tnl_program p;
1705
1706    _mesa_memset(&p, 0, sizeof(p));
1707    p.state = key;
1708    p.program = program;
1709    p.eye_position = undef;
1710    p.eye_position_z = undef;
1711    p.eye_position_normalized = undef;
1712    p.transformed_normal = undef;
1713    p.identity = undef;
1714    p.temp_in_use = 0;
1715
1716    if (max_temps >= sizeof(int) * 8)
1717       p.temp_reserved = 0;
1718    else
1719       p.temp_reserved = ~((1<<max_temps)-1);
1720
1721    /* Start by allocating 32 instructions.
1722     * If we need more, we'll grow the instruction array as needed.
1723     */
1724    p.max_inst = 32;
1725    p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
1726    p.program->Base.String = NULL;
1727    p.program->Base.NumInstructions =
1728    p.program->Base.NumTemporaries =
1729    p.program->Base.NumParameters =
1730    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1731    p.program->Base.Parameters = _mesa_new_parameter_list();
1732    p.program->Base.InputsRead = 0;
1733    p.program->Base.OutputsWritten = 0;
1734
1735    build_tnl_program( &p );
1736 }
1737
1738
1739 /**
1740  * Return a vertex program which implements the current fixed-function
1741  * transform/lighting/texgen operations.
1742  * XXX move this into core mesa (main/)
1743  */
1744 struct gl_vertex_program *
1745 _mesa_get_fixed_func_vertex_program(GLcontext *ctx)
1746 {
1747    struct gl_vertex_program *prog;
1748    struct state_key key;
1749
1750    /* Grab all the relevent state and put it in a single structure:
1751     */
1752    make_state_key(ctx, &key);
1753
1754    /* Look for an already-prepared program for this state:
1755     */
1756    prog = (struct gl_vertex_program *)
1757       _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
1758
1759    if (!prog) {
1760       /* OK, we'll have to build a new one */
1761       if (0)
1762          _mesa_printf("Build new TNL program\n");
1763
1764       prog = (struct gl_vertex_program *)
1765          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1766       if (!prog)
1767          return NULL;
1768
1769       create_new_program( &key, prog,
1770                           ctx->Const.VertexProgram.MaxTemps );
1771
1772 #if 0
1773       if (ctx->Driver.ProgramStringNotify)
1774          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1775                                           &prog->Base );
1776 #endif
1777       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
1778                                  &key, sizeof(key), &prog->Base);
1779    }
1780
1781    return prog;
1782 }