src/mesa/tnl/t_vp_build.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 2007  Tungsten Graphics   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  21  * WHETHER IN
  22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file t_vp_build.c
  28  * Create a vertex program to execute the current fixed function T&L pipeline.
  29  * \author Keith Whitwell
  30  */
  31
  32
  33 #include "glheader.h"
  34 #include "macros.h"
  35 #include "enums.h"
  36 #include "shader/program.h"
  37 #include "shader/prog_instruction.h"
  38 #include "shader/prog_parameter.h"
  39 #include "shader/prog_print.h"
  40 #include "shader/prog_statevars.h"
  41 #include "t_context.h" /* NOTE: very light dependency on this */
  42 #include "t_vp_build.h"
  43
  44
  45 struct state_key {
  46    unsigned light_global_enabled:1;
  47    unsigned light_local_viewer:1;
  48    unsigned light_twoside:1;
  49    unsigned light_color_material:1;
  50    unsigned light_color_material_mask:12;
  51    unsigned light_material_mask:12;
  52
  53    unsigned normalize:1;
  54    unsigned rescale_normals:1;
  55    unsigned fog_source_is_depth:1;
  56    unsigned tnl_do_vertex_fog:1;
  57    unsigned separate_specular:1;
  58    unsigned fog_mode:2;
  59    unsigned point_attenuated:1;
  60    unsigned texture_enabled_global:1;
  61    unsigned fragprog_inputs_read:12;
  62
  63    struct {
  64       unsigned light_enabled:1;
  65       unsigned light_eyepos3_is_zero:1;
  66       unsigned light_spotcutoff_is_180:1;
  67       unsigned light_attenuated:1;
  68       unsigned texunit_really_enabled:1;
  69       unsigned texmat_enabled:1;
  70       unsigned texgen_enabled:4;
  71       unsigned texgen_mode0:4;
  72       unsigned texgen_mode1:4;
  73       unsigned texgen_mode2:4;
  74       unsigned texgen_mode3:4;
  75    } unit[8];
  76 };
  77
  78
  79
  80 #define FOG_NONE   0
  81 #define FOG_LINEAR 1
  82 #define FOG_EXP    2
  83 #define FOG_EXP2   3
  84
  85 static GLuint translate_fog_mode( GLenum mode )
  86 {
  87    switch (mode) {
  88    case GL_LINEAR: return FOG_LINEAR;
  89    case GL_EXP: return FOG_EXP;
  90    case GL_EXP2: return FOG_EXP2;
  91    default: return FOG_NONE;
  92    }
  93 }
  94
  95 #define TXG_NONE           0
  96 #define TXG_OBJ_LINEAR     1
  97 #define TXG_EYE_LINEAR     2
  98 #define TXG_SPHERE_MAP     3
  99 #define TXG_REFLECTION_MAP 4
 100 #define TXG_NORMAL_MAP     5
 101
 102 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
 103 {
 104    if (!enabled)
 105       return TXG_NONE;
 106
 107    switch (mode) {
 108    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
 109    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
 110    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 111    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 112    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 113    default: return TXG_NONE;
 114    }
 115 }
 116
 117 static struct state_key *make_state_key( GLcontext *ctx )
 118 {
 119    TNLcontext *tnl = TNL_CONTEXT(ctx);
 120    struct vertex_buffer *VB = &tnl->vb;
 121    const struct gl_fragment_program *fp = ctx->FragmentProgram._Current;
 122    struct state_key *key = CALLOC_STRUCT(state_key);
 123    GLuint i;
 124
 125    /* This now relies on texenvprogram.c being active:
 126     */
 127    assert(fp);
 128
 129    key->fragprog_inputs_read = fp->Base.InputsRead;
 130
 131    key->separate_specular = (ctx->Light.Model.ColorControl ==
 132                              GL_SEPARATE_SPECULAR_COLOR);
 133
 134    if (ctx->Light.Enabled) {
 135       key->light_global_enabled = 1;
 136
 137       if (ctx->Light.Model.LocalViewer)
 138          key->light_local_viewer = 1;
 139
 140       if (ctx->Light.Model.TwoSide)
 141          key->light_twoside = 1;
 142
 143       if (ctx->Light.ColorMaterialEnabled) {
 144          key->light_color_material = 1;
 145          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
 146       }
 147
 148       for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++)
 149          if (VB->AttribPtr[i]->stride)
 150             key->light_material_mask |= 1<<(i-_TNL_ATTRIB_MAT_FRONT_AMBIENT);
 151
 152       for (i = 0; i < MAX_LIGHTS; i++) {
 153          struct gl_light *light = &ctx->Light.Light[i];
 154
 155          if (light->Enabled) {
 156             key->unit[i].light_enabled = 1;
 157
 158             if (light->EyePosition[3] == 0.0)
 159                key->unit[i].light_eyepos3_is_zero = 1;
 160
 161             if (light->SpotCutoff == 180.0)
 162                key->unit[i].light_spotcutoff_is_180 = 1;
 163
 164             if (light->ConstantAttenuation != 1.0 ||
 165                 light->LinearAttenuation != 0.0 ||
 166                 light->QuadraticAttenuation != 0.0)
 167                key->unit[i].light_attenuated = 1;
 168          }
 169       }
 170    }
 171
 172    if (ctx->Transform.Normalize)
 173       key->normalize = 1;
 174
 175    if (ctx->Transform.RescaleNormals)
 176       key->rescale_normals = 1;
 177
 178    key->fog_mode = translate_fog_mode(fp->FogOption);
 179
 180    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
 181       key->fog_source_is_depth = 1;
 182
 183    if (tnl->_DoVertexFog)
 184       key->tnl_do_vertex_fog = 1;
 185
 186    if (ctx->Point._Attenuated)
 187       key->point_attenuated = 1;
 188
 189    if (ctx->Texture._TexGenEnabled ||
 190        ctx->Texture._TexMatEnabled ||
 191        ctx->Texture._EnabledUnits)
 192       key->texture_enabled_global = 1;
 193
 194    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
 195       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 196
 197       if (texUnit->_ReallyEnabled)
 198          key->unit[i].texunit_really_enabled = 1;
 199
 200       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
 201          key->unit[i].texmat_enabled = 1;
 202
 203       if (texUnit->TexGenEnabled) {
 204          key->unit[i].texgen_enabled = 1;
 205
 206          key->unit[i].texgen_mode0 =
 207             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 208                               texUnit->GenModeS );
 209          key->unit[i].texgen_mode1 =
 210             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 211                               texUnit->GenModeT );
 212          key->unit[i].texgen_mode2 =
 213             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 214                               texUnit->GenModeR );
 215          key->unit[i].texgen_mode3 =
 216             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 217                               texUnit->GenModeQ );
 218       }
 219    }
 220
 221    return key;
 222 }
 223
 224
 225
 226 /* Very useful debugging tool - produces annotated listing of
 227  * generated program with line/function references for each
 228  * instruction back into this file:
 229  */
 230 #define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM)
 231
 232 /* Should be tunable by the driver - do we want to do matrix
 233  * multiplications with DP4's or with MUL/MAD's?  SSE works better
 234  * with the latter, drivers may differ.
 235  */
 236 #define PREFER_DP4 0
 237
 238 #define MAX_INSN 256
 239
 240 /* Use uregs to represent registers internally, translate to Mesa's
 241  * expected formats on emit.
 242  *
 243  * NOTE: These are passed by value extensively in this file rather
 244  * than as usual by pointer reference.  If this disturbs you, try
 245  * remembering they are just 32bits in size.
 246  *
 247  * GCC is smart enough to deal with these dword-sized structures in
 248  * much the same way as if I had defined them as dwords and was using
 249  * macros to access and set the fields.  This is much nicer and easier
 250  * to evolve.
 251  */
 252 struct ureg {
 253    GLuint file:4;
 254    GLint idx:8;      /* relative addressing may be negative */
 255    GLuint negate:1;
 256    GLuint swz:12;
 257    GLuint pad:7;
 258 };
 259
 260
 261 struct tnl_program {
 262    const struct state_key *state;
 263    struct gl_vertex_program *program;
 264
 265    GLuint temp_in_use;
 266    GLuint temp_reserved;
 267
 268    struct ureg eye_position;
 269    struct ureg eye_position_normalized;
 270    struct ureg eye_normal;
 271    struct ureg identity;
 272
 273    GLuint materials;
 274    GLuint color_materials;
 275 };
 276
 277
 278 static const struct ureg undef = {
 279    PROGRAM_UNDEFINED,
 280    ~0,
 281    0,
 282    0,
 283    0
 284 };
 285
 286 /* Local shorthand:
 287  */
 288 #define X    SWIZZLE_X
 289 #define Y    SWIZZLE_Y
 290 #define Z    SWIZZLE_Z
 291 #define W    SWIZZLE_W
 292
 293
 294 /* Construct a ureg:
 295  */
 296 static struct ureg make_ureg(GLuint file, GLint idx)
 297 {
 298    struct ureg reg;
 299    reg.file = file;
 300    reg.idx = idx;
 301    reg.negate = 0;
 302    reg.swz = SWIZZLE_NOOP;
 303    reg.pad = 0;
 304    return reg;
 305 }
 306
 307
 308
 309 static struct ureg negate( struct ureg reg )
 310 {
 311    reg.negate ^= 1;
 312    return reg;
 313 }
 314
 315
 316 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 317 {
 318    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 319                            GET_SWZ(reg.swz, y),
 320                            GET_SWZ(reg.swz, z),
 321                            GET_SWZ(reg.swz, w));
 322
 323    return reg;
 324 }
 325
 326 static struct ureg swizzle1( struct ureg reg, int x )
 327 {
 328    return swizzle(reg, x, x, x, x);
 329 }
 330
 331 static struct ureg get_temp( struct tnl_program *p )
 332 {
 333    int bit = _mesa_ffs( ~p->temp_in_use );
 334    if (!bit) {
 335       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 336       _mesa_exit(1);
 337    }
 338
 339    if ((GLuint) bit > p->program->Base.NumTemporaries)
 340       p->program->Base.NumTemporaries = bit;
 341
 342    p->temp_in_use |= 1<<(bit-1);
 343    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 344 }
 345
 346 static struct ureg reserve_temp( struct tnl_program *p )
 347 {
 348    struct ureg temp = get_temp( p );
 349    p->temp_reserved |= 1<<temp.idx;
 350    return temp;
 351 }
 352
 353 static void release_temp( struct tnl_program *p, struct ureg reg )
 354 {
 355    if (reg.file == PROGRAM_TEMPORARY) {
 356       p->temp_in_use &= ~(1<<reg.idx);
 357       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 358    }
 359 }
 360
 361 static void release_temps( struct tnl_program *p )
 362 {
 363    p->temp_in_use = p->temp_reserved;
 364 }
 365
 366
 367
 368 static struct ureg register_input( struct tnl_program *p, GLuint input )
 369 {
 370    p->program->Base.InputsRead |= (1<<input);
 371    return make_ureg(PROGRAM_INPUT, input);
 372 }
 373
 374 static struct ureg register_output( struct tnl_program *p, GLuint output )
 375 {
 376    p->program->Base.OutputsWritten |= (1<<output);
 377    return make_ureg(PROGRAM_OUTPUT, output);
 378 }
 379
 380 static struct ureg register_const4f( struct tnl_program *p,
 381                               GLfloat s0,
 382                               GLfloat s1,
 383                               GLfloat s2,
 384                               GLfloat s3)
 385 {
 386    GLfloat values[4];
 387    GLint idx;
 388    GLuint swizzle;
 389    values[0] = s0;
 390    values[1] = s1;
 391    values[2] = s2;
 392    values[3] = s3;
 393    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 394                                      &swizzle );
 395    ASSERT(swizzle == SWIZZLE_NOOP);
 396    return make_ureg(PROGRAM_STATE_VAR, idx);
 397 }
 398
 399 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 400 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 401 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 402 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 403
 404 static GLboolean is_undef( struct ureg reg )
 405 {
 406    return reg.file == PROGRAM_UNDEFINED;
 407 }
 408
 409 static struct ureg get_identity_param( struct tnl_program *p )
 410 {
 411    if (is_undef(p->identity))
 412       p->identity = register_const4f(p, 0,0,0,1);
 413
 414    return p->identity;
 415 }
 416
 417 static struct ureg register_param5(struct tnl_program *p,
 418                                    GLint s0,
 419                                    GLint s1,
 420                                    GLint s2,
 421                                    GLint s3,
 422                                    GLint s4)
 423 {
 424    gl_state_index tokens[STATE_LENGTH];
 425    GLint idx;
 426    tokens[0] = s0;
 427    tokens[1] = s1;
 428    tokens[2] = s2;
 429    tokens[3] = s3;
 430    tokens[4] = s4;
 431    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 432    return make_ureg(PROGRAM_STATE_VAR, idx);
 433 }
 434
 435
 436 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 437 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 438 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 439 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 440
 441
 442 static void register_matrix_param5( struct tnl_program *p,
 443                                     GLint s0, /* modelview, projection, etc */
 444                                     GLint s1, /* texture matrix number */
 445                                     GLint s2, /* first row */
 446                                     GLint s3, /* last row */
 447                                     GLint s4, /* inverse, transpose, etc */
 448                                     struct ureg *matrix )
 449 {
 450    GLint i;
 451
 452    /* This is a bit sad as the support is there to pull the whole
 453     * matrix out in one go:
 454     */
 455    for (i = 0; i <= s3 - s2; i++)
 456       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
 457 }
 458
 459
 460 /**
 461  * Convert a ureg source register to a prog_src_register.
 462  */
 463 static void emit_arg( struct prog_src_register *src,
 464                       struct ureg reg )
 465 {
 466    assert(reg.file != PROGRAM_OUTPUT);
 467    src->File = reg.file;
 468    src->Index = reg.idx;
 469    src->Swizzle = reg.swz;
 470    src->NegateBase = reg.negate ? NEGATE_XYZW : 0;
 471    src->Abs = 0;
 472    src->NegateAbs = 0;
 473    src->RelAddr = 0;
 474 }
 475
 476 /**
 477  * Convert a ureg dest register to a prog_dst_register.
 478  */
 479 static void emit_dst( struct prog_dst_register *dst,
 480                       struct ureg reg, GLuint mask )
 481 {
 482    /* Check for legal output register type.  UNDEFINED will occur in
 483     * instruction that don't produce a result (like END).
 484     */
 485    assert(reg.file == PROGRAM_TEMPORARY ||
 486           reg.file == PROGRAM_OUTPUT ||
 487           reg.file == PROGRAM_UNDEFINED);
 488    dst->File = reg.file;
 489    dst->Index = reg.idx;
 490    /* allow zero as a shorthand for xyzw */
 491    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 492    dst->CondMask = COND_TR;  /* always pass cond test */
 493    dst->CondSwizzle = SWIZZLE_NOOP;
 494    dst->CondSrc = 0;
 495    dst->pad = 0;
 496 }
 497
 498 static void debug_insn( struct prog_instruction *inst, const char *fn,
 499                         GLuint line )
 500 {
 501    if (DISASSEM) {
 502       static const char *last_fn;
 503
 504       if (fn != last_fn) {
 505          last_fn = fn;
 506          _mesa_printf("%s:\n", fn);
 507       }
 508
 509       _mesa_printf("%d:\t", line);
 510       _mesa_print_instruction(inst);
 511    }
 512 }
 513
 514
 515 static void emit_op3fn(struct tnl_program *p,
 516                        enum prog_opcode op,
 517                        struct ureg dest,
 518                        GLuint mask,
 519                        struct ureg src0,
 520                        struct ureg src1,
 521                        struct ureg src2,
 522                        const char *fn,
 523                        GLuint line)
 524 {
 525    GLuint nr = p->program->Base.NumInstructions++;
 526    struct prog_instruction *inst = &p->program->Base.Instructions[nr];
 527
 528    if (p->program->Base.NumInstructions > MAX_INSN) {
 529       _mesa_problem(0, "Out of instructions in emit_op3fn\n");
 530       return;
 531    }
 532
 533    inst->Opcode = (enum prog_opcode) op;
 534    inst->StringPos = 0;
 535    inst->Data = 0;
 536
 537    emit_arg( &inst->SrcReg[0], src0 );
 538    emit_arg( &inst->SrcReg[1], src1 );
 539    emit_arg( &inst->SrcReg[2], src2 );
 540
 541    emit_dst( &inst->DstReg, dest, mask );
 542
 543    debug_insn(inst, fn, line);
 544 }
 545
 546
 547 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 548    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 549
 550 #define emit_op2(p, op, dst, mask, src0, src1) \
 551     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 552
 553 #define emit_op1(p, op, dst, mask, src0) \
 554     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 555
 556
 557 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 558 {
 559    if (reg.file == PROGRAM_TEMPORARY &&
 560        !(p->temp_reserved & (1<<reg.idx)))
 561       return reg;
 562    else {
 563       struct ureg temp = get_temp(p);
 564       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 565       return temp;
 566    }
 567 }
 568
 569
 570 /* Currently no tracking performed of input/output/register size or
 571  * active elements.  Could be used to reduce these operations, as
 572  * could the matrix type.
 573  */
 574 static void emit_matrix_transform_vec4( struct tnl_program *p,
 575                                         struct ureg dest,
 576                                         const struct ureg *mat,
 577                                         struct ureg src)
 578 {
 579    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 580    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 581    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 582    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 583 }
 584
 585 /* This version is much easier to implement if writemasks are not
 586  * supported natively on the target or (like SSE), the target doesn't
 587  * have a clean/obvious dotproduct implementation.
 588  */
 589 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 590                                                   struct ureg dest,
 591                                                   const struct ureg *mat,
 592                                                   struct ureg src)
 593 {
 594    struct ureg tmp;
 595
 596    if (dest.file != PROGRAM_TEMPORARY)
 597       tmp = get_temp(p);
 598    else
 599       tmp = dest;
 600
 601    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 602    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 603    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 604    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 605
 606    if (dest.file != PROGRAM_TEMPORARY)
 607       release_temp(p, tmp);
 608 }
 609
 610 static void emit_matrix_transform_vec3( struct tnl_program *p,
 611                                         struct ureg dest,
 612                                         const struct ureg *mat,
 613                                         struct ureg src)
 614 {
 615    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 616    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 617    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 618 }
 619
 620
 621 static void emit_normalize_vec3( struct tnl_program *p,
 622                                  struct ureg dest,
 623                                  struct ureg src )
 624 {
 625    struct ureg tmp = get_temp(p);
 626    emit_op2(p, OPCODE_DP3, tmp, 0, src, src);
 627    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
 628    emit_op2(p, OPCODE_MUL, dest, 0, src, tmp);
 629    release_temp(p, tmp);
 630 }
 631
 632 static void emit_passthrough( struct tnl_program *p,
 633                               GLuint input,
 634                               GLuint output )
 635 {
 636    struct ureg out = register_output(p, output);
 637    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 638 }
 639
 640 static struct ureg get_eye_position( struct tnl_program *p )
 641 {
 642    if (is_undef(p->eye_position)) {
 643       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 644       struct ureg modelview[4];
 645
 646       p->eye_position = reserve_temp(p);
 647
 648       if (PREFER_DP4) {
 649          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 650                                  0, modelview );
 651
 652          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 653       }
 654       else {
 655          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 656                                  STATE_MATRIX_TRANSPOSE, modelview );
 657
 658          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 659       }
 660    }
 661
 662    return p->eye_position;
 663 }
 664
 665
 666 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 667 {
 668    if (is_undef(p->eye_position_normalized)) {
 669       struct ureg eye = get_eye_position(p);
 670       p->eye_position_normalized = reserve_temp(p);
 671       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 672    }
 673
 674    return p->eye_position_normalized;
 675 }
 676
 677
 678 static struct ureg get_eye_normal( struct tnl_program *p )
 679 {
 680    if (is_undef(p->eye_normal)) {
 681       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 682       struct ureg mvinv[3];
 683
 684       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
 685                               STATE_MATRIX_INVTRANS, mvinv );
 686
 687       p->eye_normal = reserve_temp(p);
 688
 689       /* Transform to eye space:
 690        */
 691       emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
 692
 693       /* Normalize/Rescale:
 694        */
 695       if (p->state->normalize) {
 696          emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
 697       }
 698       else if (p->state->rescale_normals) {
 699          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 700                                                STATE_NORMAL_SCALE);
 701
 702          emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
 703                    swizzle1(rescale, X));
 704       }
 705    }
 706
 707    return p->eye_normal;
 708 }
 709
 710
 711
 712 static void build_hpos( struct tnl_program *p )
 713 {
 714    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 715    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 716    struct ureg mvp[4];
 717
 718    if (PREFER_DP4) {
 719       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 720                               0, mvp );
 721       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 722    }
 723    else {
 724       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 725                               STATE_MATRIX_TRANSPOSE, mvp );
 726       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 727    }
 728 }
 729
 730
 731 static GLuint material_attrib( GLuint side, GLuint property )
 732 {
 733    return ((property - STATE_AMBIENT) * 2 +
 734            side);
 735 }
 736
 737 /* Get a bitmask of which material values vary on a per-vertex basis.
 738  */
 739 static void set_material_flags( struct tnl_program *p )
 740 {
 741    p->color_materials = 0;
 742    p->materials = 0;
 743
 744    if (p->state->light_color_material) {
 745       p->materials =
 746          p->color_materials = p->state->light_color_material_mask;
 747    }
 748
 749    p->materials |= p->state->light_material_mask;
 750 }
 751
 752
 753 static struct ureg get_material( struct tnl_program *p, GLuint side,
 754                                  GLuint property )
 755 {
 756    GLuint attrib = material_attrib(side, property);
 757
 758    if (p->color_materials & (1<<attrib))
 759       return register_input(p, VERT_ATTRIB_COLOR0);
 760    else if (p->materials & (1<<attrib))
 761       return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
 762    else
 763       return register_param3( p, STATE_MATERIAL, side, property );
 764 }
 765
 766 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
 767                                    MAT_BIT_FRONT_AMBIENT | \
 768                                    MAT_BIT_FRONT_DIFFUSE) << (side))
 769
 770 /* Either return a precalculated constant value or emit code to
 771  * calculate these values dynamically in the case where material calls
 772  * are present between begin/end pairs.
 773  *
 774  * Probably want to shift this to the program compilation phase - if
 775  * we always emitted the calculation here, a smart compiler could
 776  * detect that it was constant (given a certain set of inputs), and
 777  * lift it out of the main loop.  That way the programs created here
 778  * would be independent of the vertex_buffer details.
 779  */
 780 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 781 {
 782    if (p->materials & SCENE_COLOR_BITS(side)) {
 783       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 784       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 785       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 786       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 787       struct ureg tmp = make_temp(p, material_diffuse);
 788       emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient,
 789                material_ambient, material_emission);
 790       return tmp;
 791    }
 792    else
 793       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 794 }
 795
 796
 797 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 798                                   GLuint side, GLuint property )
 799 {
 800    GLuint attrib = material_attrib(side, property);
 801    if (p->materials & (1<<attrib)) {
 802       struct ureg light_value =
 803          register_param3(p, STATE_LIGHT, light, property);
 804       struct ureg material_value = get_material(p, side, property);
 805       struct ureg tmp = get_temp(p);
 806       emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
 807       return tmp;
 808    }
 809    else
 810       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 811 }
 812
 813 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 814                                                 GLuint i,
 815                                                 struct ureg VPpli,
 816                                                 struct ureg dist )
 817 {
 818    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 819                                              STATE_ATTENUATION);
 820    struct ureg att = get_temp(p);
 821
 822    /* Calculate spot attenuation:
 823     */
 824    if (!p->state->unit[i].light_spotcutoff_is_180) {
 825       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
 826                                                   STATE_SPOT_DIR_NORMALIZED, i);
 827       struct ureg spot = get_temp(p);
 828       struct ureg slt = get_temp(p);
 829
 830       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
 831       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
 832       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
 833       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 834
 835       release_temp(p, spot);
 836       release_temp(p, slt);
 837    }
 838
 839    /* Calculate distance attenuation:
 840     */
 841    if (p->state->unit[i].light_attenuated) {
 842
 843       /* 1/d,d,d,1/d */
 844       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
 845       /* 1,d,d*d,1/d */
 846       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
 847       /* 1/dist-atten */
 848       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
 849
 850       if (!p->state->unit[i].light_spotcutoff_is_180) {
 851          /* dist-atten */
 852          emit_op1(p, OPCODE_RCP, dist, 0, dist);
 853          /* spot-atten * dist-atten */
 854          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
 855       } else {
 856          /* dist-atten */
 857          emit_op1(p, OPCODE_RCP, att, 0, dist);
 858       }
 859    }
 860
 861    return att;
 862 }
 863
 864
 865
 866
 867
 868 /* Need to add some addtional parameters to allow lighting in object
 869  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
 870  * space lighting.
 871  */
 872 static void build_lighting( struct tnl_program *p )
 873 {
 874    const GLboolean twoside = p->state->light_twoside;
 875    const GLboolean separate = p->state->separate_specular;
 876    GLuint nr_lights = 0, count = 0;
 877    struct ureg normal = get_eye_normal(p);
 878    struct ureg lit = get_temp(p);
 879    struct ureg dots = get_temp(p);
 880    struct ureg _col0 = undef, _col1 = undef;
 881    struct ureg _bfc0 = undef, _bfc1 = undef;
 882    GLuint i;
 883
 884    for (i = 0; i < MAX_LIGHTS; i++)
 885       if (p->state->unit[i].light_enabled)
 886          nr_lights++;
 887
 888    set_material_flags(p);
 889
 890    {
 891       struct ureg shininess = get_material(p, 0, STATE_SHININESS);
 892       emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
 893
 894       _col0 = make_temp(p, get_scenecolor(p, 0));
 895       if (separate)
 896          _col1 = make_temp(p, get_identity_param(p));
 897       else
 898          _col1 = _col0;
 899
 900    }
 901
 902    if (twoside) {
 903       struct ureg shininess = get_material(p, 1, STATE_SHININESS);
 904       emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
 905                negate(swizzle1(shininess,X)));
 906
 907       _bfc0 = make_temp(p, get_scenecolor(p, 1));
 908       if (separate)
 909          _bfc1 = make_temp(p, get_identity_param(p));
 910       else
 911          _bfc1 = _bfc0;
 912    }
 913
 914
 915    /* If no lights, still need to emit the scenecolor.
 916     */
 917       {
 918          struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
 919          emit_op1(p, OPCODE_MOV, res0, 0, _col0);
 920       }
 921
 922       if (separate) {
 923          struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
 924          emit_op1(p, OPCODE_MOV, res1, 0, _col1);
 925       }
 926
 927       if (twoside) {
 928          struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
 929          emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
 930       }
 931
 932       if (twoside && separate) {
 933          struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
 934          emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
 935       }
 936
 937    if (nr_lights == 0) {
 938       release_temps(p);
 939       return;
 940    }
 941
 942
 943    for (i = 0; i < MAX_LIGHTS; i++) {
 944       if (p->state->unit[i].light_enabled) {
 945          struct ureg half = undef;
 946          struct ureg att = undef, VPpli = undef;
 947
 948          count++;
 949
 950          if (p->state->unit[i].light_eyepos3_is_zero) {
 951             /* Can used precomputed constants in this case.
 952              * Attenuation never applies to infinite lights.
 953              */
 954             VPpli = register_param3(p, STATE_LIGHT, i,
 955                                     STATE_POSITION_NORMALIZED);
 956             if (p->state->light_local_viewer) {
 957                 struct ureg eye_hat = get_eye_position_normalized(p);
 958                 half = get_temp(p);
 959                 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
 960                 emit_normalize_vec3(p, half, half);
 961             } else {
 962                 half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
 963             }
 964          }
 965          else {
 966             struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
 967                                                STATE_POSITION);
 968             struct ureg V = get_eye_position(p);
 969             struct ureg dist = get_temp(p);
 970             struct ureg tmpPpli = get_temp(p);
 971
 972             VPpli = get_temp(p);
 973
 974             /* In homogeneous object coordinates
 975              */
 976             emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
 977             emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
 978
 979             /* Calculate VPpli vector
 980              */
 981             emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V);
 982
 983             /* we're done with tmpPpli now */
 984             release_temp(p, tmpPpli);
 985
 986             /* Normalize VPpli.  The dist value also used in
 987              * attenuation below.
 988              */
 989             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
 990             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
 991             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
 992
 993
 994             /* Calculate  attenuation:
 995              */
 996             if (!p->state->unit[i].light_spotcutoff_is_180 ||
 997                 p->state->unit[i].light_attenuated) {
 998                att = calculate_light_attenuation(p, i, VPpli, dist);
 999             }
1000
1001             /* We're done with dist now */
1002             release_temp(p, dist);
1003
1004
1005             /* Calculate viewer direction, or use infinite viewer:
1006              */
1007             half = get_temp(p);
1008             if (p->state->light_local_viewer) {
1009                struct ureg eye_hat = get_eye_position_normalized(p);
1010                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1011             }
1012             else {
1013                struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1014                emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1015             }
1016
1017             emit_normalize_vec3(p, half, half);
1018          }
1019
1020          /* Calculate dot products:
1021           */
1022          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1023          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1024
1025          /* we're done with VPpli and half now, so free them as to not drive up
1026             our temp usage unnecessary */
1027          release_temp(p, VPpli);
1028          release_temp(p, half);
1029
1030          /* Front face lighting:
1031           */
1032          {
1033             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1034             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1035             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1036             struct ureg res0, res1;
1037             GLuint mask0, mask1;
1038
1039             emit_op1(p, OPCODE_LIT, lit, 0, dots);
1040
1041             if (!is_undef(att))
1042                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1043
1044
1045             if (count == nr_lights) {
1046                if (separate) {
1047                   mask0 = WRITEMASK_XYZ;
1048                   mask1 = WRITEMASK_XYZ;
1049                   res0 = register_output( p, VERT_RESULT_COL0 );
1050                   res1 = register_output( p, VERT_RESULT_COL1 );
1051                }
1052                else {
1053                   mask0 = 0;
1054                   mask1 = WRITEMASK_XYZ;
1055                   res0 = _col0;
1056                   res1 = register_output( p, VERT_RESULT_COL0 );
1057                }
1058             } else {
1059                mask0 = 0;
1060                mask1 = 0;
1061                res0 = _col0;
1062                res1 = _col1;
1063             }
1064
1065             emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1066             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1067             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1068
1069             release_temp(p, ambient);
1070             release_temp(p, diffuse);
1071             release_temp(p, specular);
1072          }
1073
1074          /* Back face lighting:
1075           */
1076          if (twoside) {
1077             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1078             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1079             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1080             struct ureg res0, res1;
1081             GLuint mask0, mask1;
1082
1083             emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
1084
1085             if (!is_undef(att))
1086                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1087
1088             if (count == nr_lights) {
1089                if (separate) {
1090                   mask0 = WRITEMASK_XYZ;
1091                   mask1 = WRITEMASK_XYZ;
1092                   res0 = register_output( p, VERT_RESULT_BFC0 );
1093                   res1 = register_output( p, VERT_RESULT_BFC1 );
1094                }
1095                else {
1096                   mask0 = 0;
1097                   mask1 = WRITEMASK_XYZ;
1098                   res0 = _bfc0;
1099                   res1 = register_output( p, VERT_RESULT_BFC0 );
1100                }
1101             } else {
1102                res0 = _bfc0;
1103                res1 = _bfc1;
1104                mask0 = 0;
1105                mask1 = 0;
1106             }
1107
1108             emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1109             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1110             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1111
1112             release_temp(p, ambient);
1113             release_temp(p, diffuse);
1114             release_temp(p, specular);
1115          }
1116
1117          release_temp(p, att);
1118       }
1119    }
1120
1121    release_temps( p );
1122 }
1123
1124
1125 static void build_fog( struct tnl_program *p )
1126 {
1127    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1128    struct ureg input;
1129
1130    if (p->state->fog_source_is_depth) {
1131       input = swizzle1(get_eye_position(p), Z);
1132    }
1133    else {
1134       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1135    }
1136
1137    if (p->state->fog_mode && p->state->tnl_do_vertex_fog) {
1138       struct ureg params = register_param2(p, STATE_INTERNAL,
1139                                            STATE_FOG_PARAMS_OPTIMIZED);
1140       struct ureg tmp = get_temp(p);
1141       GLboolean useabs = (p->state->fog_mode != FOG_EXP2);
1142
1143       if (useabs) {
1144          emit_op1(p, OPCODE_ABS, tmp, 0, input);
1145       }
1146
1147       switch (p->state->fog_mode) {
1148       case FOG_LINEAR: {
1149          struct ureg id = get_identity_param(p);
1150          emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
1151                         swizzle1(params,X), swizzle1(params,Y));
1152          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
1153          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
1154          break;
1155       }
1156       case FOG_EXP:
1157          emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
1158                         swizzle1(params,Z));
1159          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1160          break;
1161       case FOG_EXP2:
1162          emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
1163          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
1164          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
1165          break;
1166       }
1167
1168       release_temp(p, tmp);
1169    }
1170    else {
1171       /* results = incoming fog coords (compute fog per-fragment later)
1172        *
1173        * KW:  Is it really necessary to do anything in this case?
1174        * BP: Yes, we always need to compute the absolute value, unless
1175        * we want to push that down into the fragment program...
1176        */
1177       GLboolean useabs = GL_TRUE;
1178       emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input);
1179    }
1180 }
1181
1182 static void build_reflect_texgen( struct tnl_program *p,
1183                                   struct ureg dest,
1184                                   GLuint writemask )
1185 {
1186    struct ureg normal = get_eye_normal(p);
1187    struct ureg eye_hat = get_eye_position_normalized(p);
1188    struct ureg tmp = get_temp(p);
1189
1190    /* n.u */
1191    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1192    /* 2n.u */
1193    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1194    /* (-2n.u)n + u */
1195    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1196
1197    release_temp(p, tmp);
1198 }
1199
1200 static void build_sphere_texgen( struct tnl_program *p,
1201                                  struct ureg dest,
1202                                  GLuint writemask )
1203 {
1204    struct ureg normal = get_eye_normal(p);
1205    struct ureg eye_hat = get_eye_position_normalized(p);
1206    struct ureg tmp = get_temp(p);
1207    struct ureg half = register_scalar_const(p, .5);
1208    struct ureg r = get_temp(p);
1209    struct ureg inv_m = get_temp(p);
1210    struct ureg id = get_identity_param(p);
1211
1212    /* Could share the above calculations, but it would be
1213     * a fairly odd state for someone to set (both sphere and
1214     * reflection active for different texture coordinate
1215     * components.  Of course - if two texture units enable
1216     * reflect and/or sphere, things start to tilt in favour
1217     * of seperating this out:
1218     */
1219
1220    /* n.u */
1221    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1222    /* 2n.u */
1223    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1224    /* (-2n.u)n + u */
1225    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1226    /* r + 0,0,1 */
1227    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1228    /* rx^2 + ry^2 + (rz+1)^2 */
1229    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1230    /* 2/m */
1231    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1232    /* 1/m */
1233    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1234    /* r/m + 1/2 */
1235    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1236
1237    release_temp(p, tmp);
1238    release_temp(p, r);
1239    release_temp(p, inv_m);
1240 }
1241
1242
1243 static void build_texture_transform( struct tnl_program *p )
1244 {
1245    GLuint i, j;
1246
1247    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
1248
1249       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
1250          continue;
1251
1252       if (p->state->unit[i].texgen_enabled ||
1253           p->state->unit[i].texmat_enabled) {
1254
1255          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1256          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1257          struct ureg out_texgen = undef;
1258
1259          if (p->state->unit[i].texgen_enabled) {
1260             GLuint copy_mask = 0;
1261             GLuint sphere_mask = 0;
1262             GLuint reflect_mask = 0;
1263             GLuint normal_mask = 0;
1264             GLuint modes[4];
1265
1266             if (texmat_enabled)
1267                out_texgen = get_temp(p);
1268             else
1269                out_texgen = out;
1270
1271             modes[0] = p->state->unit[i].texgen_mode0;
1272             modes[1] = p->state->unit[i].texgen_mode1;
1273             modes[2] = p->state->unit[i].texgen_mode2;
1274             modes[3] = p->state->unit[i].texgen_mode3;
1275
1276             for (j = 0; j < 4; j++) {
1277                switch (modes[j]) {
1278                case TXG_OBJ_LINEAR: {
1279                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1280                   struct ureg plane =
1281                      register_param3(p, STATE_TEXGEN, i,
1282                                      STATE_TEXGEN_OBJECT_S + j);
1283
1284                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1285                            obj, plane );
1286                   break;
1287                }
1288                case TXG_EYE_LINEAR: {
1289                   struct ureg eye = get_eye_position(p);
1290                   struct ureg plane =
1291                      register_param3(p, STATE_TEXGEN, i,
1292                                      STATE_TEXGEN_EYE_S + j);
1293
1294                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1295                            eye, plane );
1296                   break;
1297                }
1298                case TXG_SPHERE_MAP:
1299                   sphere_mask |= WRITEMASK_X << j;
1300                   break;
1301                case TXG_REFLECTION_MAP:
1302                   reflect_mask |= WRITEMASK_X << j;
1303                   break;
1304                case TXG_NORMAL_MAP:
1305                   normal_mask |= WRITEMASK_X << j;
1306                   break;
1307                case TXG_NONE:
1308                   copy_mask |= WRITEMASK_X << j;
1309                }
1310
1311             }
1312
1313
1314             if (sphere_mask) {
1315                build_sphere_texgen(p, out_texgen, sphere_mask);
1316             }
1317
1318             if (reflect_mask) {
1319                build_reflect_texgen(p, out_texgen, reflect_mask);
1320             }
1321
1322             if (normal_mask) {
1323                struct ureg normal = get_eye_normal(p);
1324                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1325             }
1326
1327             if (copy_mask) {
1328                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1329                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1330             }
1331          }
1332
1333          if (texmat_enabled) {
1334             struct ureg texmat[4];
1335             struct ureg in = (!is_undef(out_texgen) ?
1336                               out_texgen :
1337                               register_input(p, VERT_ATTRIB_TEX0+i));
1338             if (PREFER_DP4) {
1339                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1340                                        0, texmat );
1341                emit_matrix_transform_vec4( p, out, texmat, in );
1342             }
1343             else {
1344                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1345                                        STATE_MATRIX_TRANSPOSE, texmat );
1346                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1347             }
1348          }
1349
1350          release_temps(p);
1351       }
1352       else {
1353          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1354       }
1355    }
1356 }
1357
1358
1359 static void build_pointsize( struct tnl_program *p )
1360 {
1361    struct ureg eye = get_eye_position(p);
1362    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1363    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1364    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1365    struct ureg ut = get_temp(p);
1366
1367    /* dist = |eyez| */
1368    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1369    /* p1 + dist * (p2 + dist * p3); */
1370    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1371                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1372    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1373                 ut, swizzle1(state_attenuation, X));
1374
1375    /* 1 / sqrt(factor) */
1376    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1377
1378 #if 1
1379    /* out = pointSize / sqrt(factor) */
1380    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1381 #else
1382    /* not sure, might make sense to do clamping here,
1383       but it's not done in t_vb_points neither */
1384    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1385    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1386    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1387 #endif
1388
1389    release_temp(p, ut);
1390 }
1391
1392 static void build_tnl_program( struct tnl_program *p )
1393 {   /* Emit the program, starting with modelviewproject:
1394     */
1395    build_hpos(p);
1396
1397    /* Lighting calculations:
1398     */
1399    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1400       if (p->state->light_global_enabled)
1401          build_lighting(p);
1402       else {
1403          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1404             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1405
1406          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1407             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1408       }
1409    }
1410
1411    if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
1412        p->state->fog_mode != FOG_NONE)
1413       build_fog(p);
1414
1415    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1416       build_texture_transform(p);
1417
1418    if (p->state->point_attenuated)
1419       build_pointsize(p);
1420
1421    /* Finish up:
1422     */
1423    emit_op1(p, OPCODE_END, undef, 0, undef);
1424
1425    /* Disassemble:
1426     */
1427    if (DISASSEM) {
1428       _mesa_printf ("\n");
1429    }
1430 }
1431
1432
1433 static void
1434 create_new_program( const struct state_key *key,
1435                     struct gl_vertex_program *program,
1436                     GLuint max_temps)
1437 {
1438    struct tnl_program p;
1439
1440    _mesa_memset(&p, 0, sizeof(p));
1441    p.state = key;
1442    p.program = program;
1443    p.eye_position = undef;
1444    p.eye_position_normalized = undef;
1445    p.eye_normal = undef;
1446    p.identity = undef;
1447    p.temp_in_use = 0;
1448
1449    if (max_temps >= sizeof(int) * 8)
1450       p.temp_reserved = 0;
1451    else
1452       p.temp_reserved = ~((1<<max_temps)-1);
1453
1454    p.program->Base.Instructions = _mesa_alloc_instructions(MAX_INSN);
1455    p.program->Base.String = NULL;
1456    p.program->Base.NumInstructions =
1457    p.program->Base.NumTemporaries =
1458    p.program->Base.NumParameters =
1459    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1460    p.program->Base.Parameters = _mesa_new_parameter_list();
1461    p.program->Base.InputsRead = 0;
1462    p.program->Base.OutputsWritten = 0;
1463
1464    build_tnl_program( &p );
1465 }
1466
1467 static void *search_cache( struct tnl_cache *cache,
1468                            GLuint hash,
1469                            const void *key,
1470                            GLuint keysize)
1471 {
1472    struct tnl_cache_item *c;
1473
1474    for (c = cache->items[hash % cache->size]; c; c = c->next) {
1475       if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0)
1476          return c->data;
1477    }
1478
1479    return NULL;
1480 }
1481
1482 static void rehash( struct tnl_cache *cache )
1483 {
1484    struct tnl_cache_item **items;
1485    struct tnl_cache_item *c, *next;
1486    GLuint size, i;
1487
1488    size = cache->size * 3;
1489    items = (struct tnl_cache_item**) _mesa_malloc(size * sizeof(*items));
1490    _mesa_memset(items, 0, size * sizeof(*items));
1491
1492    for (i = 0; i < cache->size; i++)
1493       for (c = cache->items[i]; c; c = next) {
1494          next = c->next;
1495          c->next = items[c->hash % size];
1496          items[c->hash % size] = c;
1497       }
1498
1499    FREE(cache->items);
1500    cache->items = items;
1501    cache->size = size;
1502 }
1503
1504 static void cache_item( struct tnl_cache *cache,
1505                         GLuint hash,
1506                         void *key,
1507                         void *data )
1508 {
1509    struct tnl_cache_item *c = (struct tnl_cache_item*) _mesa_malloc(sizeof(*c));
1510    c->hash = hash;
1511    c->key = key;
1512    c->data = data;
1513
1514    if (++cache->n_items > cache->size * 1.5)
1515       rehash(cache);
1516
1517    c->next = cache->items[hash % cache->size];
1518    cache->items[hash % cache->size] = c;
1519 }
1520
1521 static GLuint hash_key( struct state_key *key )
1522 {
1523    GLuint *ikey = (GLuint *)key;
1524    GLuint hash = 0, i;
1525
1526    /* I'm sure this can be improved on, but speed is important:
1527     */
1528    for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++)
1529       hash ^= ikey[i];
1530
1531    return hash;
1532 }
1533
1534 void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx )
1535 {
1536    TNLcontext *tnl = TNL_CONTEXT(ctx);
1537    struct state_key *key;
1538    GLuint hash;
1539    const struct gl_vertex_program *prev = ctx->VertexProgram._Current;
1540
1541    if (!ctx->VertexProgram._Current ||
1542        ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) {
1543       /* Grab all the relevent state and put it in a single structure:
1544        */
1545       key = make_state_key(ctx);
1546       hash = hash_key(key);
1547
1548       /* Look for an already-prepared program for this state:
1549        */
1550       ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *)
1551          search_cache( tnl->vp_cache, hash, key, sizeof(*key) );
1552
1553       /* OK, we'll have to build a new one:
1554        */
1555       if (!ctx->VertexProgram._TnlProgram) {
1556          if (0)
1557             _mesa_printf("Build new TNL program\n");
1558
1559          ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *)
1560             ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1561
1562          create_new_program( key, ctx->VertexProgram._TnlProgram,
1563                              ctx->Const.VertexProgram.MaxTemps );
1564
1565          if (ctx->Driver.ProgramStringNotify)
1566             ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1567                                        &ctx->VertexProgram._TnlProgram->Base );
1568
1569          cache_item(tnl->vp_cache, hash, key, ctx->VertexProgram._TnlProgram );
1570       }
1571       else {
1572          FREE(key);
1573          if (0)
1574             _mesa_printf("Found existing TNL program for key %x\n", hash);
1575       }
1576       _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
1577                                ctx->VertexProgram._TnlProgram);
1578    }
1579
1580    /* Tell the driver about the change.  Could define a new target for
1581     * this?
1582     */
1583    if (ctx->VertexProgram._Current != prev && ctx->Driver.BindProgram) {
1584       ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
1585                             (struct gl_program *) ctx->VertexProgram._Current);
1586    }
1587 }
1588
1589 void _tnl_ProgramCacheInit( GLcontext *ctx )
1590 {
1591    TNLcontext *tnl = TNL_CONTEXT(ctx);
1592
1593    tnl->vp_cache = (struct tnl_cache *) MALLOC(sizeof(*tnl->vp_cache));
1594    tnl->vp_cache->size = 17;
1595    tnl->vp_cache->n_items = 0;
1596    tnl->vp_cache->items = (struct tnl_cache_item**)
1597       _mesa_calloc(tnl->vp_cache->size * sizeof(*tnl->vp_cache->items));
1598 }
1599
1600 void _tnl_ProgramCacheDestroy( GLcontext *ctx )
1601 {
1602    TNLcontext *tnl = TNL_CONTEXT(ctx);
1603    struct tnl_cache_item *c, *next;
1604    GLuint i;
1605
1606    for (i = 0; i < tnl->vp_cache->size; i++)
1607       for (c = tnl->vp_cache->items[i]; c; c = next) {
1608          next = c->next;
1609          FREE(c->key);
1610          FREE(c->data);
1611          FREE(c);
1612       }
1613
1614    FREE(tnl->vp_cache->items);
1615    FREE(tnl->vp_cache);
1616 }