src/mesa/main/ffvertex_prog.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \file ffvertex_prog.
  30  *
  31  * Create a vertex program to execute the current fixed function T&L pipeline.
  32  * \author Keith Whitwell
  33  */
  34
  35
  36 #include "main/glheader.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39 #include "main/enums.h"
  40 #include "main/ffvertex_prog.h"
  41 #include "shader/program.h"
  42 #include "shader/prog_cache.h"
  43 #include "shader/prog_instruction.h"
  44 #include "shader/prog_parameter.h"
  45 #include "shader/prog_print.h"
  46 #include "shader/prog_statevars.h"
  47
  48
  49 struct state_key {
  50    unsigned light_global_enabled:1;
  51    unsigned light_local_viewer:1;
  52    unsigned light_twoside:1;
  53    unsigned light_color_material:1;
  54    unsigned light_color_material_mask:12;
  55    unsigned light_material_mask:12;
  56    unsigned material_shininess_is_zero:1;
  57
  58    unsigned need_eye_coords:1;
  59    unsigned normalize:1;
  60    unsigned rescale_normals:1;
  61    unsigned fog_source_is_depth:1;
  62    unsigned tnl_do_vertex_fog:1;
  63    unsigned separate_specular:1;
  64    unsigned fog_mode:2;
  65    unsigned point_attenuated:1;
  66    unsigned texture_enabled_global:1;
  67    unsigned fragprog_inputs_read:12;
  68
  69    struct {
  70       unsigned light_enabled:1;
  71       unsigned light_eyepos3_is_zero:1;
  72       unsigned light_spotcutoff_is_180:1;
  73       unsigned light_attenuated:1;
  74       unsigned texunit_really_enabled:1;
  75       unsigned texmat_enabled:1;
  76       unsigned texgen_enabled:4;
  77       unsigned texgen_mode0:4;
  78       unsigned texgen_mode1:4;
  79       unsigned texgen_mode2:4;
  80       unsigned texgen_mode3:4;
  81    } unit[8];
  82 };
  83
  84
  85
  86 #define FOG_NONE   0
  87 #define FOG_LINEAR 1
  88 #define FOG_EXP    2
  89 #define FOG_EXP2   3
  90
  91 static GLuint translate_fog_mode( GLenum mode )
  92 {
  93    switch (mode) {
  94    case GL_LINEAR: return FOG_LINEAR;
  95    case GL_EXP: return FOG_EXP;
  96    case GL_EXP2: return FOG_EXP2;
  97    default: return FOG_NONE;
  98    }
  99 }
 100
 101 #define TXG_NONE           0
 102 #define TXG_OBJ_LINEAR     1
 103 #define TXG_EYE_LINEAR     2
 104 #define TXG_SPHERE_MAP     3
 105 #define TXG_REFLECTION_MAP 4
 106 #define TXG_NORMAL_MAP     5
 107
 108 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
 109 {
 110    if (!enabled)
 111       return TXG_NONE;
 112
 113    switch (mode) {
 114    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
 115    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
 116    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 117    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 118    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 119    default: return TXG_NONE;
 120    }
 121 }
 122
 123
 124 /**
 125  * Returns bitmask of flags indicating which materials are set per-vertex
 126  * in the current VB.
 127  * XXX get these from the VBO...
 128  */
 129 static GLbitfield
 130 tnl_get_per_vertex_materials(GLcontext *ctx)
 131 {
 132    GLbitfield mask = 0x0;
 133 #if 0
 134    TNLcontext *tnl = TNL_CONTEXT(ctx);
 135    struct vertex_buffer *VB = &tnl->vb;
 136    GLuint i;
 137
 138    for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++)
 139       if (VB->AttribPtr[i] && VB->AttribPtr[i]->stride)
 140          mask |= 1 << (i - _TNL_FIRST_MAT);
 141 #endif
 142    return mask;
 143 }
 144
 145 /**
 146  * Should fog be computed per-vertex?
 147  */
 148 static GLboolean
 149 tnl_get_per_vertex_fog(GLcontext *ctx)
 150 {
 151 #if 0
 152    TNLcontext *tnl = TNL_CONTEXT(ctx);
 153    return tnl->_DoVertexFog;
 154 #else
 155    return GL_FALSE;
 156 #endif
 157 }
 158
 159 static GLboolean check_active_shininess( GLcontext *ctx,
 160                                          const struct state_key *key,
 161                                          GLuint side )
 162 {
 163    GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
 164
 165    if (key->light_color_material_mask & bit)
 166       return GL_TRUE;
 167
 168    if (key->light_material_mask & bit)
 169       return GL_TRUE;
 170
 171    if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
 172       return GL_TRUE;
 173
 174    return GL_FALSE;
 175 }
 176
 177
 178
 179
 180 static struct state_key *make_state_key( GLcontext *ctx )
 181 {
 182    const struct gl_fragment_program *fp;
 183    struct state_key *key = CALLOC_STRUCT(state_key);
 184    GLuint i;
 185
 186    fp = ctx->FragmentProgram._Current;
 187
 188    /* This now relies on texenvprogram.c being active:
 189     */
 190    assert(fp);
 191
 192    key->need_eye_coords = ctx->_NeedEyeCoords;
 193
 194    key->fragprog_inputs_read = fp->Base.InputsRead;
 195
 196    if (ctx->RenderMode == GL_FEEDBACK) {
 197       /* make sure the vertprog emits color and tex0 */
 198       key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
 199    }
 200
 201    key->separate_specular = (ctx->Light.Model.ColorControl ==
 202                              GL_SEPARATE_SPECULAR_COLOR);
 203
 204    if (ctx->Light.Enabled) {
 205       key->light_global_enabled = 1;
 206
 207       if (ctx->Light.Model.LocalViewer)
 208          key->light_local_viewer = 1;
 209
 210       if (ctx->Light.Model.TwoSide)
 211          key->light_twoside = 1;
 212
 213       if (ctx->Light.ColorMaterialEnabled) {
 214          key->light_color_material = 1;
 215          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
 216       }
 217
 218       key->light_material_mask = tnl_get_per_vertex_materials(ctx);
 219
 220       for (i = 0; i < MAX_LIGHTS; i++) {
 221          struct gl_light *light = &ctx->Light.Light[i];
 222
 223          if (light->Enabled) {
 224             key->unit[i].light_enabled = 1;
 225
 226             if (light->EyePosition[3] == 0.0)
 227                key->unit[i].light_eyepos3_is_zero = 1;
 228
 229             if (light->SpotCutoff == 180.0)
 230                key->unit[i].light_spotcutoff_is_180 = 1;
 231
 232             if (light->ConstantAttenuation != 1.0 ||
 233                 light->LinearAttenuation != 0.0 ||
 234                 light->QuadraticAttenuation != 0.0)
 235                key->unit[i].light_attenuated = 1;
 236          }
 237       }
 238
 239       if (check_active_shininess(ctx, key, 0)) {
 240          key->material_shininess_is_zero = 0;
 241       }
 242       else if (key->light_twoside &&
 243                check_active_shininess(ctx, key, 1)) {
 244          key->material_shininess_is_zero = 0;
 245       }
 246       else {
 247          key->material_shininess_is_zero = 1;
 248       }
 249    }
 250
 251    if (ctx->Transform.Normalize)
 252       key->normalize = 1;
 253
 254    if (ctx->Transform.RescaleNormals)
 255       key->rescale_normals = 1;
 256
 257    key->fog_mode = translate_fog_mode(fp->FogOption);
 258
 259    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
 260       key->fog_source_is_depth = 1;
 261
 262    key->tnl_do_vertex_fog = tnl_get_per_vertex_fog(ctx);
 263
 264    if (ctx->Point._Attenuated)
 265       key->point_attenuated = 1;
 266
 267    if (ctx->Texture._TexGenEnabled ||
 268        ctx->Texture._TexMatEnabled ||
 269        ctx->Texture._EnabledUnits)
 270       key->texture_enabled_global = 1;
 271
 272    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
 273       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 274
 275       if (texUnit->_ReallyEnabled)
 276          key->unit[i].texunit_really_enabled = 1;
 277
 278       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
 279          key->unit[i].texmat_enabled = 1;
 280
 281       if (texUnit->TexGenEnabled) {
 282          key->unit[i].texgen_enabled = 1;
 283
 284          key->unit[i].texgen_mode0 =
 285             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 286                               texUnit->GenModeS );
 287          key->unit[i].texgen_mode1 =
 288             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 289                               texUnit->GenModeT );
 290          key->unit[i].texgen_mode2 =
 291             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 292                               texUnit->GenModeR );
 293          key->unit[i].texgen_mode3 =
 294             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 295                               texUnit->GenModeQ );
 296       }
 297    }
 298
 299    return key;
 300 }
 301
 302
 303
 304 /* Very useful debugging tool - produces annotated listing of
 305  * generated program with line/function references for each
 306  * instruction back into this file:
 307  */
 308 #define DISASSEM 1
 309
 310 /* Should be tunable by the driver - do we want to do matrix
 311  * multiplications with DP4's or with MUL/MAD's?  SSE works better
 312  * with the latter, drivers may differ.
 313  */
 314 #define PREFER_DP4 0
 315
 316 #define MAX_INSN 256
 317
 318 /* Use uregs to represent registers internally, translate to Mesa's
 319  * expected formats on emit.
 320  *
 321  * NOTE: These are passed by value extensively in this file rather
 322  * than as usual by pointer reference.  If this disturbs you, try
 323  * remembering they are just 32bits in size.
 324  *
 325  * GCC is smart enough to deal with these dword-sized structures in
 326  * much the same way as if I had defined them as dwords and was using
 327  * macros to access and set the fields.  This is much nicer and easier
 328  * to evolve.
 329  */
 330 struct ureg {
 331    GLuint file:4;
 332    GLint idx:8;      /* relative addressing may be negative */
 333    GLuint negate:1;
 334    GLuint swz:12;
 335    GLuint pad:7;
 336 };
 337
 338
 339 struct tnl_program {
 340    const struct state_key *state;
 341    struct gl_vertex_program *program;
 342
 343    GLuint temp_in_use;
 344    GLuint temp_reserved;
 345
 346    struct ureg eye_position;
 347    struct ureg eye_position_normalized;
 348    struct ureg transformed_normal;
 349    struct ureg identity;
 350
 351    GLuint materials;
 352    GLuint color_materials;
 353 };
 354
 355
 356 static const struct ureg undef = {
 357    PROGRAM_UNDEFINED,
 358    ~0,
 359    0,
 360    0,
 361    0
 362 };
 363
 364 /* Local shorthand:
 365  */
 366 #define X    SWIZZLE_X
 367 #define Y    SWIZZLE_Y
 368 #define Z    SWIZZLE_Z
 369 #define W    SWIZZLE_W
 370
 371
 372 /* Construct a ureg:
 373  */
 374 static struct ureg make_ureg(GLuint file, GLint idx)
 375 {
 376    struct ureg reg;
 377    reg.file = file;
 378    reg.idx = idx;
 379    reg.negate = 0;
 380    reg.swz = SWIZZLE_NOOP;
 381    reg.pad = 0;
 382    return reg;
 383 }
 384
 385
 386
 387 static struct ureg negate( struct ureg reg )
 388 {
 389    reg.negate ^= 1;
 390    return reg;
 391 }
 392
 393
 394 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 395 {
 396    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 397                            GET_SWZ(reg.swz, y),
 398                            GET_SWZ(reg.swz, z),
 399                            GET_SWZ(reg.swz, w));
 400
 401    return reg;
 402 }
 403
 404 static struct ureg swizzle1( struct ureg reg, int x )
 405 {
 406    return swizzle(reg, x, x, x, x);
 407 }
 408
 409 static struct ureg get_temp( struct tnl_program *p )
 410 {
 411    int bit = _mesa_ffs( ~p->temp_in_use );
 412    if (!bit) {
 413       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 414       _mesa_exit(1);
 415    }
 416
 417    if ((GLuint) bit > p->program->Base.NumTemporaries)
 418       p->program->Base.NumTemporaries = bit;
 419
 420    p->temp_in_use |= 1<<(bit-1);
 421    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 422 }
 423
 424 static struct ureg reserve_temp( struct tnl_program *p )
 425 {
 426    struct ureg temp = get_temp( p );
 427    p->temp_reserved |= 1<<temp.idx;
 428    return temp;
 429 }
 430
 431 static void release_temp( struct tnl_program *p, struct ureg reg )
 432 {
 433    if (reg.file == PROGRAM_TEMPORARY) {
 434       p->temp_in_use &= ~(1<<reg.idx);
 435       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 436    }
 437 }
 438
 439 static void release_temps( struct tnl_program *p )
 440 {
 441    p->temp_in_use = p->temp_reserved;
 442 }
 443
 444
 445
 446 static struct ureg register_input( struct tnl_program *p, GLuint input )
 447 {
 448    p->program->Base.InputsRead |= (1<<input);
 449    return make_ureg(PROGRAM_INPUT, input);
 450 }
 451
 452 static struct ureg register_output( struct tnl_program *p, GLuint output )
 453 {
 454    p->program->Base.OutputsWritten |= (1<<output);
 455    return make_ureg(PROGRAM_OUTPUT, output);
 456 }
 457
 458 static struct ureg register_const4f( struct tnl_program *p,
 459                               GLfloat s0,
 460                               GLfloat s1,
 461                               GLfloat s2,
 462                               GLfloat s3)
 463 {
 464    GLfloat values[4];
 465    GLint idx;
 466    GLuint swizzle;
 467    values[0] = s0;
 468    values[1] = s1;
 469    values[2] = s2;
 470    values[3] = s3;
 471    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 472                                      &swizzle );
 473    ASSERT(swizzle == SWIZZLE_NOOP);
 474    return make_ureg(PROGRAM_CONSTANT, idx);
 475 }
 476
 477 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 478 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 479 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 480 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 481
 482 static GLboolean is_undef( struct ureg reg )
 483 {
 484    return reg.file == PROGRAM_UNDEFINED;
 485 }
 486
 487 static struct ureg get_identity_param( struct tnl_program *p )
 488 {
 489    if (is_undef(p->identity))
 490       p->identity = register_const4f(p, 0,0,0,1);
 491
 492    return p->identity;
 493 }
 494
 495 static struct ureg register_param5(struct tnl_program *p,
 496                                    GLint s0,
 497                                    GLint s1,
 498                                    GLint s2,
 499                                    GLint s3,
 500                                    GLint s4)
 501 {
 502    gl_state_index tokens[STATE_LENGTH];
 503    GLint idx;
 504    tokens[0] = s0;
 505    tokens[1] = s1;
 506    tokens[2] = s2;
 507    tokens[3] = s3;
 508    tokens[4] = s4;
 509    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 510    return make_ureg(PROGRAM_STATE_VAR, idx);
 511 }
 512
 513
 514 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 515 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 516 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 517 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 518
 519
 520 static void register_matrix_param5( struct tnl_program *p,
 521                                     GLint s0, /* modelview, projection, etc */
 522                                     GLint s1, /* texture matrix number */
 523                                     GLint s2, /* first row */
 524                                     GLint s3, /* last row */
 525                                     GLint s4, /* inverse, transpose, etc */
 526                                     struct ureg *matrix )
 527 {
 528    GLint i;
 529
 530    /* This is a bit sad as the support is there to pull the whole
 531     * matrix out in one go:
 532     */
 533    for (i = 0; i <= s3 - s2; i++)
 534       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
 535 }
 536
 537
 538 static void emit_arg( struct prog_src_register *src,
 539                       struct ureg reg )
 540 {
 541    src->File = reg.file;
 542    src->Index = reg.idx;
 543    src->Swizzle = reg.swz;
 544    src->NegateBase = reg.negate ? NEGATE_XYZW : 0;
 545    src->Abs = 0;
 546    src->NegateAbs = 0;
 547    src->RelAddr = 0;
 548 }
 549
 550 static void emit_dst( struct prog_dst_register *dst,
 551                       struct ureg reg, GLuint mask )
 552 {
 553    dst->File = reg.file;
 554    dst->Index = reg.idx;
 555    /* allow zero as a shorthand for xyzw */
 556    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 557    dst->CondMask = COND_TR;  /* always pass cond test */
 558    dst->CondSwizzle = SWIZZLE_NOOP;
 559    dst->CondSrc = 0;
 560    dst->pad = 0;
 561 }
 562
 563 static void debug_insn( struct prog_instruction *inst, const char *fn,
 564                         GLuint line )
 565 {
 566    if (DISASSEM) {
 567       static const char *last_fn;
 568
 569       if (fn != last_fn) {
 570          last_fn = fn;
 571          _mesa_printf("%s:\n", fn);
 572       }
 573
 574       _mesa_printf("%d:\t", line);
 575       _mesa_print_instruction(inst);
 576    }
 577 }
 578
 579
 580 static void emit_op3fn(struct tnl_program *p,
 581                        enum prog_opcode op,
 582                        struct ureg dest,
 583                        GLuint mask,
 584                        struct ureg src0,
 585                        struct ureg src1,
 586                        struct ureg src2,
 587                        const char *fn,
 588                        GLuint line)
 589 {
 590    GLuint nr = p->program->Base.NumInstructions++;
 591    struct prog_instruction *inst = &p->program->Base.Instructions[nr];
 592
 593    if (p->program->Base.NumInstructions > MAX_INSN) {
 594       _mesa_problem(0, "Out of instructions in emit_op3fn\n");
 595       return;
 596    }
 597
 598    inst->Opcode = (enum prog_opcode) op;
 599    inst->StringPos = 0;
 600    inst->Data = 0;
 601
 602    emit_arg( &inst->SrcReg[0], src0 );
 603    emit_arg( &inst->SrcReg[1], src1 );
 604    emit_arg( &inst->SrcReg[2], src2 );
 605
 606    emit_dst( &inst->DstReg, dest, mask );
 607
 608    debug_insn(inst, fn, line);
 609 }
 610
 611
 612 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 613    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 614
 615 #define emit_op2(p, op, dst, mask, src0, src1) \
 616     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 617
 618 #define emit_op1(p, op, dst, mask, src0) \
 619     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 620
 621
 622 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 623 {
 624    if (reg.file == PROGRAM_TEMPORARY &&
 625        !(p->temp_reserved & (1<<reg.idx)))
 626       return reg;
 627    else {
 628       struct ureg temp = get_temp(p);
 629       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 630       return temp;
 631    }
 632 }
 633
 634
 635 /* Currently no tracking performed of input/output/register size or
 636  * active elements.  Could be used to reduce these operations, as
 637  * could the matrix type.
 638  */
 639 static void emit_matrix_transform_vec4( struct tnl_program *p,
 640                                         struct ureg dest,
 641                                         const struct ureg *mat,
 642                                         struct ureg src)
 643 {
 644    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 645    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 646    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 647    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 648 }
 649
 650 /* This version is much easier to implement if writemasks are not
 651  * supported natively on the target or (like SSE), the target doesn't
 652  * have a clean/obvious dotproduct implementation.
 653  */
 654 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 655                                                   struct ureg dest,
 656                                                   const struct ureg *mat,
 657                                                   struct ureg src)
 658 {
 659    struct ureg tmp;
 660
 661    if (dest.file != PROGRAM_TEMPORARY)
 662       tmp = get_temp(p);
 663    else
 664       tmp = dest;
 665
 666    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 667    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 668    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 669    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 670
 671    if (dest.file != PROGRAM_TEMPORARY)
 672       release_temp(p, tmp);
 673 }
 674
 675 static void emit_matrix_transform_vec3( struct tnl_program *p,
 676                                         struct ureg dest,
 677                                         const struct ureg *mat,
 678                                         struct ureg src)
 679 {
 680    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 681    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 682    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 683 }
 684
 685
 686 static void emit_normalize_vec3( struct tnl_program *p,
 687                                  struct ureg dest,
 688                                  struct ureg src )
 689 {
 690    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, src);
 691    emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_X, dest);
 692    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(dest, X));
 693 }
 694
 695 static void emit_passthrough( struct tnl_program *p,
 696                               GLuint input,
 697                               GLuint output )
 698 {
 699    struct ureg out = register_output(p, output);
 700    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 701 }
 702
 703 static struct ureg get_eye_position( struct tnl_program *p )
 704 {
 705    if (is_undef(p->eye_position)) {
 706       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 707       struct ureg modelview[4];
 708
 709       p->eye_position = reserve_temp(p);
 710
 711       if (PREFER_DP4) {
 712          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 713                                  0, modelview );
 714
 715          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 716       }
 717       else {
 718          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 719                                  STATE_MATRIX_TRANSPOSE, modelview );
 720
 721          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 722       }
 723    }
 724
 725    return p->eye_position;
 726 }
 727
 728
 729 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 730 {
 731    if (is_undef(p->eye_position_normalized)) {
 732       struct ureg eye = get_eye_position(p);
 733       p->eye_position_normalized = reserve_temp(p);
 734       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 735    }
 736
 737    return p->eye_position_normalized;
 738 }
 739
 740
 741 static struct ureg get_transformed_normal( struct tnl_program *p )
 742 {
 743    if (is_undef(p->transformed_normal) &&
 744        !p->state->need_eye_coords &&
 745        !p->state->normalize &&
 746        !(p->state->need_eye_coords == p->state->rescale_normals))
 747    {
 748       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
 749    }
 750    else if (is_undef(p->transformed_normal))
 751    {
 752       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 753       struct ureg mvinv[3];
 754       struct ureg transformed_normal = reserve_temp(p);
 755
 756       if (p->state->need_eye_coords) {
 757          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
 758                                  STATE_MATRIX_INVTRANS, mvinv );
 759
 760          /* Transform to eye space:
 761           */
 762          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
 763          normal = transformed_normal;
 764       }
 765
 766       /* Normalize/Rescale:
 767        */
 768       if (p->state->normalize) {
 769          emit_normalize_vec3( p, transformed_normal, normal );
 770          normal = transformed_normal;
 771       }
 772       else if (p->state->need_eye_coords == p->state->rescale_normals) {
 773          /* This is already adjusted for eye/non-eye rendering:
 774           */
 775          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 776                                                STATE_NORMAL_SCALE);
 777
 778          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal,
 779                    swizzle1(rescale, X));
 780          normal = transformed_normal;
 781       }
 782
 783       assert(normal.file == PROGRAM_TEMPORARY);
 784       p->transformed_normal = normal;
 785    }
 786
 787    return p->transformed_normal;
 788 }
 789
 790
 791
 792 static void build_hpos( struct tnl_program *p )
 793 {
 794    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 795    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 796    struct ureg mvp[4];
 797
 798    if (PREFER_DP4) {
 799       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 800                               0, mvp );
 801       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 802    }
 803    else {
 804       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 805                               STATE_MATRIX_TRANSPOSE, mvp );
 806       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 807    }
 808 }
 809
 810
 811 static GLuint material_attrib( GLuint side, GLuint property )
 812 {
 813    return ((property - STATE_AMBIENT) * 2 +
 814            side);
 815 }
 816
 817 /* Get a bitmask of which material values vary on a per-vertex basis.
 818  */
 819 static void set_material_flags( struct tnl_program *p )
 820 {
 821    p->color_materials = 0;
 822    p->materials = 0;
 823
 824    if (p->state->light_color_material) {
 825       p->materials =
 826          p->color_materials = p->state->light_color_material_mask;
 827    }
 828
 829    p->materials |= p->state->light_material_mask;
 830 }
 831
 832
 833 /* XXX temporary!!! */
 834 #define _TNL_ATTRIB_MAT_FRONT_AMBIENT 32
 835
 836 static struct ureg get_material( struct tnl_program *p, GLuint side,
 837                                  GLuint property )
 838 {
 839    GLuint attrib = material_attrib(side, property);
 840
 841    if (p->color_materials & (1<<attrib))
 842       return register_input(p, VERT_ATTRIB_COLOR0);
 843    else if (p->materials & (1<<attrib))
 844       return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
 845    else
 846       return register_param3( p, STATE_MATERIAL, side, property );
 847 }
 848
 849 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
 850                                    MAT_BIT_FRONT_AMBIENT | \
 851                                    MAT_BIT_FRONT_DIFFUSE) << (side))
 852
 853 /* Either return a precalculated constant value or emit code to
 854  * calculate these values dynamically in the case where material calls
 855  * are present between begin/end pairs.
 856  *
 857  * Probably want to shift this to the program compilation phase - if
 858  * we always emitted the calculation here, a smart compiler could
 859  * detect that it was constant (given a certain set of inputs), and
 860  * lift it out of the main loop.  That way the programs created here
 861  * would be independent of the vertex_buffer details.
 862  */
 863 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 864 {
 865    if (p->materials & SCENE_COLOR_BITS(side)) {
 866       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 867       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 868       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 869       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 870       struct ureg tmp = make_temp(p, material_diffuse);
 871       emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient,
 872                material_ambient, material_emission);
 873       return tmp;
 874    }
 875    else
 876       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 877 }
 878
 879
 880 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 881                                   GLuint side, GLuint property )
 882 {
 883    GLuint attrib = material_attrib(side, property);
 884    if (p->materials & (1<<attrib)) {
 885       struct ureg light_value =
 886          register_param3(p, STATE_LIGHT, light, property);
 887       struct ureg material_value = get_material(p, side, property);
 888       struct ureg tmp = get_temp(p);
 889       emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
 890       return tmp;
 891    }
 892    else
 893       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 894 }
 895
 896 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 897                                                 GLuint i,
 898                                                 struct ureg VPpli,
 899                                                 struct ureg dist )
 900 {
 901    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 902                                              STATE_ATTENUATION);
 903    struct ureg att = get_temp(p);
 904
 905    /* Calculate spot attenuation:
 906     */
 907    if (!p->state->unit[i].light_spotcutoff_is_180) {
 908       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
 909                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
 910       struct ureg spot = get_temp(p);
 911       struct ureg slt = get_temp(p);
 912
 913       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
 914       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
 915       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
 916       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 917
 918       release_temp(p, spot);
 919       release_temp(p, slt);
 920    }
 921
 922    /* Calculate distance attenuation:
 923     */
 924    if (p->state->unit[i].light_attenuated) {
 925
 926       /* 1/d,d,d,1/d */
 927       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
 928       /* 1,d,d*d,1/d */
 929       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
 930       /* 1/dist-atten */
 931       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
 932
 933       if (!p->state->unit[i].light_spotcutoff_is_180) {
 934          /* dist-atten */
 935          emit_op1(p, OPCODE_RCP, dist, 0, dist);
 936          /* spot-atten * dist-atten */
 937          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
 938       } else {
 939          /* dist-atten */
 940          emit_op1(p, OPCODE_RCP, att, 0, dist);
 941       }
 942    }
 943
 944    return att;
 945 }
 946
 947
 948 static void emit_degenerate_lit( struct tnl_program *p,
 949                                  struct ureg lit,
 950                                  struct ureg dots )
 951 {
 952    struct ureg id = get_identity_param(p);
 953
 954    /* Note that result.x & result.w will not be examined.  Note also that
 955     * dots.xyzw == dots.xxxx.
 956     */
 957
 958    /* result[1] = MAX2(in, 0)
 959     */
 960    emit_op2(p, OPCODE_MAX, lit, 0, id, dots);
 961
 962    /* result[2] = (in > 0 ? 1 : 0)
 963     */
 964    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z,
 965             lit,                /* 0 */
 966             dots); /* in[0] */
 967 }
 968
 969
 970 /* Need to add some addtional parameters to allow lighting in object
 971  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
 972  * space lighting.
 973  */
 974 static void build_lighting( struct tnl_program *p )
 975 {
 976    const GLboolean twoside = p->state->light_twoside;
 977    const GLboolean separate = p->state->separate_specular;
 978    GLuint nr_lights = 0, count = 0;
 979    struct ureg normal = get_transformed_normal(p);
 980    struct ureg lit = get_temp(p);
 981    struct ureg dots = get_temp(p);
 982    struct ureg _col0 = undef, _col1 = undef;
 983    struct ureg _bfc0 = undef, _bfc1 = undef;
 984    GLuint i;
 985
 986    for (i = 0; i < MAX_LIGHTS; i++)
 987       if (p->state->unit[i].light_enabled)
 988          nr_lights++;
 989
 990    set_material_flags(p);
 991
 992    {
 993       if (!p->state->material_shininess_is_zero) {
 994          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
 995          emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
 996          release_temp(p, shininess);
 997       }
 998
 999       _col0 = make_temp(p, get_scenecolor(p, 0));
1000       if (separate)
1001          _col1 = make_temp(p, get_identity_param(p));
1002       else
1003          _col1 = _col0;
1004
1005    }
1006
1007    if (twoside) {
1008       if (!p->state->material_shininess_is_zero) {
1009          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1010          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1011                   negate(swizzle1(shininess,X)));
1012          release_temp(p, shininess);
1013       }
1014
1015       _bfc0 = make_temp(p, get_scenecolor(p, 1));
1016       if (separate)
1017          _bfc1 = make_temp(p, get_identity_param(p));
1018       else
1019          _bfc1 = _bfc0;
1020    }
1021
1022    /* If no lights, still need to emit the scenecolor.
1023     */
1024    {
1025       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
1026       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1027    }
1028
1029    if (separate) {
1030       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
1031       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1032    }
1033
1034    if (twoside) {
1035       struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
1036       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1037    }
1038
1039    if (twoside && separate) {
1040       struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
1041       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1042    }
1043
1044    if (nr_lights == 0) {
1045       release_temps(p);
1046       return;
1047    }
1048
1049    for (i = 0; i < MAX_LIGHTS; i++) {
1050       if (p->state->unit[i].light_enabled) {
1051          struct ureg half = undef;
1052          struct ureg att = undef, VPpli = undef;
1053
1054          count++;
1055
1056          if (p->state->unit[i].light_eyepos3_is_zero) {
1057             /* Can used precomputed constants in this case.
1058              * Attenuation never applies to infinite lights.
1059              */
1060             VPpli = register_param3(p, STATE_INTERNAL,
1061                                     STATE_LIGHT_POSITION_NORMALIZED, i);
1062
1063             if (!p->state->material_shininess_is_zero) {
1064                if (p->state->light_local_viewer) {
1065                   struct ureg eye_hat = get_eye_position_normalized(p);
1066                   half = get_temp(p);
1067                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1068                   emit_normalize_vec3(p, half, half);
1069                } else {
1070                   half = register_param3(p, STATE_INTERNAL,
1071                                          STATE_LIGHT_HALF_VECTOR, i);
1072                }
1073             }
1074          }
1075          else {
1076             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1077                                                STATE_LIGHT_POSITION, i);
1078             struct ureg V = get_eye_position(p);
1079             struct ureg dist = get_temp(p);
1080
1081             VPpli = get_temp(p);
1082
1083             /* Calculate VPpli vector
1084              */
1085             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1086
1087             /* Normalize VPpli.  The dist value also used in
1088              * attenuation below.
1089              */
1090             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1091             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1092             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1093
1094             /* Calculate attenuation:
1095              */
1096             if (!p->state->unit[i].light_spotcutoff_is_180 ||
1097                 p->state->unit[i].light_attenuated) {
1098                att = calculate_light_attenuation(p, i, VPpli, dist);
1099             }
1100
1101             /* Calculate viewer direction, or use infinite viewer:
1102              */
1103             if (!p->state->material_shininess_is_zero) {
1104                half = get_temp(p);
1105
1106                if (p->state->light_local_viewer) {
1107                   struct ureg eye_hat = get_eye_position_normalized(p);
1108                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1109                }
1110                else {
1111                   struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1112                   emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1113                }
1114
1115                emit_normalize_vec3(p, half, half);
1116             }
1117
1118             release_temp(p, dist);
1119          }
1120
1121          /* Calculate dot products:
1122           */
1123          if (p->state->material_shininess_is_zero) {
1124             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1125          }
1126          else {
1127             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1128             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1129          }
1130
1131          /* Front face lighting:
1132           */
1133          {
1134             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1135             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1136             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1137             struct ureg res0, res1;
1138             GLuint mask0, mask1;
1139
1140             if (p->state->material_shininess_is_zero) {
1141                emit_degenerate_lit(p, lit, dots);
1142             } else {
1143                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1144             }
1145
1146             if (!is_undef(att))
1147                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1148
1149
1150             if (count == nr_lights) {
1151                if (separate) {
1152                   mask0 = WRITEMASK_XYZ;
1153                   mask1 = WRITEMASK_XYZ;
1154                   res0 = register_output( p, VERT_RESULT_COL0 );
1155                   res1 = register_output( p, VERT_RESULT_COL1 );
1156                }
1157                else {
1158                   mask0 = 0;
1159                   mask1 = WRITEMASK_XYZ;
1160                   res0 = _col0;
1161                   res1 = register_output( p, VERT_RESULT_COL0 );
1162                }
1163             } else {
1164                mask0 = 0;
1165                mask1 = 0;
1166                res0 = _col0;
1167                res1 = _col1;
1168             }
1169
1170             emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1171             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1172             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1173
1174             release_temp(p, ambient);
1175             release_temp(p, diffuse);
1176             release_temp(p, specular);
1177          }
1178
1179          /* Back face lighting:
1180           */
1181          if (twoside) {
1182             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1183             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1184             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1185             struct ureg res0, res1;
1186             GLuint mask0, mask1;
1187
1188             if (p->state->material_shininess_is_zero) {
1189                emit_degenerate_lit(p, lit, negate(swizzle(dots,X,Y,W,Z)));
1190             } else {
1191                emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
1192             }
1193
1194             if (!is_undef(att))
1195                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1196
1197             if (count == nr_lights) {
1198                if (separate) {
1199                   mask0 = WRITEMASK_XYZ;
1200                   mask1 = WRITEMASK_XYZ;
1201                   res0 = register_output( p, VERT_RESULT_BFC0 );
1202                   res1 = register_output( p, VERT_RESULT_BFC1 );
1203                }
1204                else {
1205                   mask0 = 0;
1206                   mask1 = WRITEMASK_XYZ;
1207                   res0 = _bfc0;
1208                   res1 = register_output( p, VERT_RESULT_BFC0 );
1209                }
1210             } else {
1211                res0 = _bfc0;
1212                res1 = _bfc1;
1213                mask0 = 0;
1214                mask1 = 0;
1215             }
1216
1217             emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1218             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1219             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1220
1221             release_temp(p, ambient);
1222             release_temp(p, diffuse);
1223             release_temp(p, specular);
1224          }
1225
1226          release_temp(p, half);
1227          release_temp(p, VPpli);
1228          release_temp(p, att);
1229       }
1230    }
1231
1232    release_temps( p );
1233 }
1234
1235
1236 static void build_fog( struct tnl_program *p )
1237 {
1238    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1239    struct ureg input;
1240
1241    if (p->state->fog_source_is_depth) {
1242       input = swizzle1(get_eye_position(p), Z);
1243    }
1244    else {
1245       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1246    }
1247
1248    if (p->state->fog_mode && p->state->tnl_do_vertex_fog) {
1249       struct ureg params = register_param2(p, STATE_INTERNAL,
1250                                            STATE_FOG_PARAMS_OPTIMIZED);
1251       struct ureg tmp = get_temp(p);
1252       GLboolean useabs = (p->state->fog_mode != FOG_EXP2);
1253
1254       if (useabs) {
1255          emit_op1(p, OPCODE_ABS, tmp, 0, input);
1256       }
1257
1258       switch (p->state->fog_mode) {
1259       case FOG_LINEAR: {
1260          struct ureg id = get_identity_param(p);
1261          emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
1262                         swizzle1(params,X), swizzle1(params,Y));
1263          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
1264          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
1265          break;
1266       }
1267       case FOG_EXP:
1268          emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
1269                         swizzle1(params,Z));
1270          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1271          break;
1272       case FOG_EXP2:
1273          emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
1274          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
1275          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1276          break;
1277       }
1278
1279       release_temp(p, tmp);
1280    }
1281    else {
1282       /* results = incoming fog coords (compute fog per-fragment later)
1283        *
1284        * KW:  Is it really necessary to do anything in this case?
1285        * BP: Yes, we always need to compute the absolute value, unless
1286        * we want to push that down into the fragment program...
1287        */
1288       GLboolean useabs = GL_TRUE;
1289       emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input);
1290    }
1291 }
1292
1293 static void build_reflect_texgen( struct tnl_program *p,
1294                                   struct ureg dest,
1295                                   GLuint writemask )
1296 {
1297    struct ureg normal = get_transformed_normal(p);
1298    struct ureg eye_hat = get_eye_position_normalized(p);
1299    struct ureg tmp = get_temp(p);
1300
1301    /* n.u */
1302    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1303    /* 2n.u */
1304    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1305    /* (-2n.u)n + u */
1306    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1307
1308    release_temp(p, tmp);
1309 }
1310
1311 static void build_sphere_texgen( struct tnl_program *p,
1312                                  struct ureg dest,
1313                                  GLuint writemask )
1314 {
1315    struct ureg normal = get_transformed_normal(p);
1316    struct ureg eye_hat = get_eye_position_normalized(p);
1317    struct ureg tmp = get_temp(p);
1318    struct ureg half = register_scalar_const(p, .5);
1319    struct ureg r = get_temp(p);
1320    struct ureg inv_m = get_temp(p);
1321    struct ureg id = get_identity_param(p);
1322
1323    /* Could share the above calculations, but it would be
1324     * a fairly odd state for someone to set (both sphere and
1325     * reflection active for different texture coordinate
1326     * components.  Of course - if two texture units enable
1327     * reflect and/or sphere, things start to tilt in favour
1328     * of seperating this out:
1329     */
1330
1331    /* n.u */
1332    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1333    /* 2n.u */
1334    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1335    /* (-2n.u)n + u */
1336    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1337    /* r + 0,0,1 */
1338    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1339    /* rx^2 + ry^2 + (rz+1)^2 */
1340    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1341    /* 2/m */
1342    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1343    /* 1/m */
1344    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1345    /* r/m + 1/2 */
1346    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1347
1348    release_temp(p, tmp);
1349    release_temp(p, r);
1350    release_temp(p, inv_m);
1351 }
1352
1353
1354 static void build_texture_transform( struct tnl_program *p )
1355 {
1356    GLuint i, j;
1357
1358    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
1359
1360       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
1361          continue;
1362
1363       if (p->state->unit[i].texgen_enabled ||
1364           p->state->unit[i].texmat_enabled) {
1365
1366          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1367          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1368          struct ureg out_texgen = undef;
1369
1370          if (p->state->unit[i].texgen_enabled) {
1371             GLuint copy_mask = 0;
1372             GLuint sphere_mask = 0;
1373             GLuint reflect_mask = 0;
1374             GLuint normal_mask = 0;
1375             GLuint modes[4];
1376
1377             if (texmat_enabled)
1378                out_texgen = get_temp(p);
1379             else
1380                out_texgen = out;
1381
1382             modes[0] = p->state->unit[i].texgen_mode0;
1383             modes[1] = p->state->unit[i].texgen_mode1;
1384             modes[2] = p->state->unit[i].texgen_mode2;
1385             modes[3] = p->state->unit[i].texgen_mode3;
1386
1387             for (j = 0; j < 4; j++) {
1388                switch (modes[j]) {
1389                case TXG_OBJ_LINEAR: {
1390                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1391                   struct ureg plane =
1392                      register_param3(p, STATE_TEXGEN, i,
1393                                      STATE_TEXGEN_OBJECT_S + j);
1394
1395                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1396                            obj, plane );
1397                   break;
1398                }
1399                case TXG_EYE_LINEAR: {
1400                   struct ureg eye = get_eye_position(p);
1401                   struct ureg plane =
1402                      register_param3(p, STATE_TEXGEN, i,
1403                                      STATE_TEXGEN_EYE_S + j);
1404
1405                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1406                            eye, plane );
1407                   break;
1408                }
1409                case TXG_SPHERE_MAP:
1410                   sphere_mask |= WRITEMASK_X << j;
1411                   break;
1412                case TXG_REFLECTION_MAP:
1413                   reflect_mask |= WRITEMASK_X << j;
1414                   break;
1415                case TXG_NORMAL_MAP:
1416                   normal_mask |= WRITEMASK_X << j;
1417                   break;
1418                case TXG_NONE:
1419                   copy_mask |= WRITEMASK_X << j;
1420                }
1421
1422             }
1423
1424
1425             if (sphere_mask) {
1426                build_sphere_texgen(p, out_texgen, sphere_mask);
1427             }
1428
1429             if (reflect_mask) {
1430                build_reflect_texgen(p, out_texgen, reflect_mask);
1431             }
1432
1433             if (normal_mask) {
1434                struct ureg normal = get_transformed_normal(p);
1435                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1436             }
1437
1438             if (copy_mask) {
1439                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1440                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1441             }
1442          }
1443
1444          if (texmat_enabled) {
1445             struct ureg texmat[4];
1446             struct ureg in = (!is_undef(out_texgen) ?
1447                               out_texgen :
1448                               register_input(p, VERT_ATTRIB_TEX0+i));
1449             if (PREFER_DP4) {
1450                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1451                                        0, texmat );
1452                emit_matrix_transform_vec4( p, out, texmat, in );
1453             }
1454             else {
1455                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1456                                        STATE_MATRIX_TRANSPOSE, texmat );
1457                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1458             }
1459          }
1460
1461          release_temps(p);
1462       }
1463       else {
1464          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1465       }
1466    }
1467 }
1468
1469
1470 static void build_pointsize( struct tnl_program *p )
1471 {
1472    struct ureg eye = get_eye_position(p);
1473    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1474    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1475    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1476    struct ureg ut = get_temp(p);
1477
1478    /* dist = |eyez| */
1479    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1480    /* p1 + dist * (p2 + dist * p3); */
1481    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1482                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1483    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1484                 ut, swizzle1(state_attenuation, X));
1485
1486    /* 1 / sqrt(factor) */
1487    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1488
1489 #if 0
1490    /* out = pointSize / sqrt(factor) */
1491    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1492 #else
1493    /* this is a good place to clamp the point size since there's likely
1494     * no hardware registers to clamp point size at rasterization time.
1495     */
1496    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1497    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1498    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1499 #endif
1500
1501    release_temp(p, ut);
1502 }
1503
1504 /**
1505  * Emit constant point size.
1506  */
1507 static void constant_pointsize( struct tnl_program *p )
1508 {
1509    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1510    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1511    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, state_size);
1512 }
1513
1514 static void build_tnl_program( struct tnl_program *p )
1515 {   /* Emit the program, starting with modelviewproject:
1516     */
1517    build_hpos(p);
1518
1519    /* Lighting calculations:
1520     */
1521    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1522       if (p->state->light_global_enabled)
1523          build_lighting(p);
1524       else {
1525          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1526             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1527
1528          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1529             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1530       }
1531    }
1532
1533    if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
1534        p->state->fog_mode != FOG_NONE)
1535       build_fog(p);
1536
1537    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1538       build_texture_transform(p);
1539
1540    if (p->state->point_attenuated)
1541       build_pointsize(p);
1542 #if 0
1543    else
1544       constant_pointsize(p);
1545 #endif
1546
1547    /* Finish up:
1548     */
1549    emit_op1(p, OPCODE_END, undef, 0, undef);
1550
1551    /* Disassemble:
1552     */
1553    if (DISASSEM) {
1554       _mesa_printf ("\n");
1555    }
1556 }
1557
1558
1559 static void
1560 create_new_program( const struct state_key *key,
1561                     struct gl_vertex_program *program,
1562                     GLuint max_temps)
1563 {
1564    struct tnl_program p;
1565
1566    _mesa_memset(&p, 0, sizeof(p));
1567    p.state = key;
1568    p.program = program;
1569    p.eye_position = undef;
1570    p.eye_position_normalized = undef;
1571    p.transformed_normal = undef;
1572    p.identity = undef;
1573    p.temp_in_use = 0;
1574
1575    if (max_temps >= sizeof(int) * 8)
1576       p.temp_reserved = 0;
1577    else
1578       p.temp_reserved = ~((1<<max_temps)-1);
1579
1580    p.program->Base.Instructions = _mesa_alloc_instructions(MAX_INSN);
1581    p.program->Base.String = NULL;
1582    p.program->Base.NumInstructions =
1583    p.program->Base.NumTemporaries =
1584    p.program->Base.NumParameters =
1585    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1586    p.program->Base.Parameters = _mesa_new_parameter_list();
1587    p.program->Base.InputsRead = 0;
1588    p.program->Base.OutputsWritten = 0;
1589
1590    build_tnl_program( &p );
1591 }
1592
1593
1594 /**
1595  * Return a vertex program which implements the current fixed-function
1596  * transform/lighting/texgen operations.
1597  * XXX move this into core mesa (main/)
1598  */
1599 struct gl_vertex_program *
1600 _mesa_get_fixed_func_vertex_program(GLcontext *ctx)
1601 {
1602    struct gl_vertex_program *prog;
1603    struct state_key *key;
1604
1605    /* Grab all the relevent state and put it in a single structure:
1606     */
1607    key = make_state_key(ctx);
1608
1609    /* Look for an already-prepared program for this state:
1610     */
1611    prog = (struct gl_vertex_program *)
1612       _mesa_search_program_cache(ctx->VertexProgram.Cache, key, sizeof(*key));
1613
1614    if (!prog) {
1615       /* OK, we'll have to build a new one */
1616       if (0)
1617          _mesa_printf("Build new TNL program\n");
1618
1619       prog = (struct gl_vertex_program *)
1620          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1621       if (!prog)
1622          return NULL;
1623
1624       create_new_program( key, prog,
1625                           ctx->Const.VertexProgram.MaxTemps );
1626
1627 #if 0
1628       if (ctx->Driver.ProgramStringNotify)
1629          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1630                                           &prog->Base );
1631 #endif
1632       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
1633                                  key, sizeof(*key), &prog->Base);
1634    }
1635
1636    _mesa_free(key);
1637
1638    return prog;
1639 }