src/mesa/main/ffvertex_prog.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \file ffvertex_prog.c
  30  *
  31  * Create a vertex program to execute the current fixed function T&L pipeline.
  32  * \author Keith Whitwell
  33  */
  34
  35
  36 #include "main/glheader.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39 #include "main/enums.h"
  40 #include "main/ffvertex_prog.h"
  41 #include "shader/program.h"
  42 #include "shader/prog_cache.h"
  43 #include "shader/prog_instruction.h"
  44 #include "shader/prog_parameter.h"
  45 #include "shader/prog_print.h"
  46 #include "shader/prog_statevars.h"
  47
  48
  49 struct state_key {
  50    unsigned light_color_material_mask:12;
  51    unsigned light_global_enabled:1;
  52    unsigned light_local_viewer:1;
  53    unsigned light_twoside:1;
  54    unsigned material_shininess_is_zero:1;
  55    unsigned need_eye_coords:1;
  56    unsigned normalize:1;
  57    unsigned rescale_normals:1;
  58
  59    unsigned fog_source_is_depth:1;
  60    unsigned separate_specular:1;
  61    unsigned point_attenuated:1;
  62    unsigned point_array:1;
  63    unsigned texture_enabled_global:1;
  64    unsigned fragprog_inputs_read:12;
  65
  66    unsigned varying_vp_inputs;
  67
  68    struct {
  69       unsigned light_enabled:1;
  70       unsigned light_eyepos3_is_zero:1;
  71       unsigned light_spotcutoff_is_180:1;
  72       unsigned light_attenuated:1;
  73       unsigned texunit_really_enabled:1;
  74       unsigned texmat_enabled:1;
  75       unsigned texgen_enabled:4;
  76       unsigned texgen_mode0:4;
  77       unsigned texgen_mode1:4;
  78       unsigned texgen_mode2:4;
  79       unsigned texgen_mode3:4;
  80    } unit[8];
  81 };
  82
  83
  84 #define TXG_NONE           0
  85 #define TXG_OBJ_LINEAR     1
  86 #define TXG_EYE_LINEAR     2
  87 #define TXG_SPHERE_MAP     3
  88 #define TXG_REFLECTION_MAP 4
  89 #define TXG_NORMAL_MAP     5
  90
  91 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
  92 {
  93    if (!enabled)
  94       return TXG_NONE;
  95
  96    switch (mode) {
  97    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
  98    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
  99    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 100    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 101    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 102    default: return TXG_NONE;
 103    }
 104 }
 105
 106
 107
 108 static GLboolean check_active_shininess( GLcontext *ctx,
 109                                          const struct state_key *key,
 110                                          GLuint side )
 111 {
 112    GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
 113
 114    if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
 115        (key->light_color_material_mask & bit))
 116       return GL_TRUE;
 117
 118    if (key->varying_vp_inputs & (bit << 16))
 119       return GL_TRUE;
 120
 121    if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
 122       return GL_TRUE;
 123
 124    return GL_FALSE;
 125 }
 126
 127
 128 static void make_state_key( GLcontext *ctx, struct state_key *key )
 129 {
 130    const struct gl_fragment_program *fp;
 131    GLuint i;
 132
 133    memset(key, 0, sizeof(struct state_key));
 134    fp = ctx->FragmentProgram._Current;
 135
 136    /* This now relies on texenvprogram.c being active:
 137     */
 138    assert(fp);
 139
 140    key->need_eye_coords = ctx->_NeedEyeCoords;
 141
 142    key->fragprog_inputs_read = fp->Base.InputsRead;
 143    key->varying_vp_inputs = ctx->varying_vp_inputs;
 144
 145    if (ctx->RenderMode == GL_FEEDBACK) {
 146       /* make sure the vertprog emits color and tex0 */
 147       key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
 148    }
 149
 150    key->separate_specular = (ctx->Light.Model.ColorControl ==
 151                              GL_SEPARATE_SPECULAR_COLOR);
 152
 153    if (ctx->Light.Enabled) {
 154       key->light_global_enabled = 1;
 155
 156       if (ctx->Light.Model.LocalViewer)
 157          key->light_local_viewer = 1;
 158
 159       if (ctx->Light.Model.TwoSide)
 160          key->light_twoside = 1;
 161
 162       if (ctx->Light.ColorMaterialEnabled) {
 163          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
 164       }
 165
 166       for (i = 0; i < MAX_LIGHTS; i++) {
 167          struct gl_light *light = &ctx->Light.Light[i];
 168
 169          if (light->Enabled) {
 170             key->unit[i].light_enabled = 1;
 171
 172             if (light->EyePosition[3] == 0.0)
 173                key->unit[i].light_eyepos3_is_zero = 1;
 174
 175             if (light->SpotCutoff == 180.0)
 176                key->unit[i].light_spotcutoff_is_180 = 1;
 177
 178             if (light->ConstantAttenuation != 1.0 ||
 179                 light->LinearAttenuation != 0.0 ||
 180                 light->QuadraticAttenuation != 0.0)
 181                key->unit[i].light_attenuated = 1;
 182          }
 183       }
 184
 185       if (check_active_shininess(ctx, key, 0)) {
 186          key->material_shininess_is_zero = 0;
 187       }
 188       else if (key->light_twoside &&
 189                check_active_shininess(ctx, key, 1)) {
 190          key->material_shininess_is_zero = 0;
 191       }
 192       else {
 193          key->material_shininess_is_zero = 1;
 194       }
 195    }
 196
 197    if (ctx->Transform.Normalize)
 198       key->normalize = 1;
 199
 200    if (ctx->Transform.RescaleNormals)
 201       key->rescale_normals = 1;
 202
 203    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
 204       key->fog_source_is_depth = 1;
 205
 206    if (ctx->Point._Attenuated)
 207       key->point_attenuated = 1;
 208
 209 #if FEATURE_point_size_array
 210    if (ctx->Array.ArrayObj->PointSize.Enabled)
 211       key->point_array = 1;
 212 #endif
 213
 214    if (ctx->Texture._TexGenEnabled ||
 215        ctx->Texture._TexMatEnabled ||
 216        ctx->Texture._EnabledUnits)
 217       key->texture_enabled_global = 1;
 218
 219    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
 220       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 221
 222       if (texUnit->_ReallyEnabled)
 223          key->unit[i].texunit_really_enabled = 1;
 224
 225       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
 226          key->unit[i].texmat_enabled = 1;
 227
 228       if (texUnit->TexGenEnabled) {
 229          key->unit[i].texgen_enabled = 1;
 230
 231          key->unit[i].texgen_mode0 =
 232             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 233                               texUnit->GenS.Mode );
 234          key->unit[i].texgen_mode1 =
 235             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 236                               texUnit->GenT.Mode );
 237          key->unit[i].texgen_mode2 =
 238             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 239                               texUnit->GenR.Mode );
 240          key->unit[i].texgen_mode3 =
 241             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 242                               texUnit->GenQ.Mode );
 243       }
 244    }
 245 }
 246
 247
 248
 249 /* Very useful debugging tool - produces annotated listing of
 250  * generated program with line/function references for each
 251  * instruction back into this file:
 252  */
 253 #define DISASSEM 0
 254
 255
 256 /* Use uregs to represent registers internally, translate to Mesa's
 257  * expected formats on emit.
 258  *
 259  * NOTE: These are passed by value extensively in this file rather
 260  * than as usual by pointer reference.  If this disturbs you, try
 261  * remembering they are just 32bits in size.
 262  *
 263  * GCC is smart enough to deal with these dword-sized structures in
 264  * much the same way as if I had defined them as dwords and was using
 265  * macros to access and set the fields.  This is much nicer and easier
 266  * to evolve.
 267  */
 268 struct ureg {
 269    GLuint file:4;
 270    GLint idx:9;      /* relative addressing may be negative */
 271                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
 272    GLuint negate:1;
 273    GLuint swz:12;
 274    GLuint pad:6;
 275 };
 276
 277
 278 struct tnl_program {
 279    const struct state_key *state;
 280    struct gl_vertex_program *program;
 281    GLint max_inst;  /** number of instructions allocated for program */
 282    GLboolean mvp_with_dp4;
 283
 284    GLuint temp_in_use;
 285    GLuint temp_reserved;
 286
 287    struct ureg eye_position;
 288    struct ureg eye_position_z;
 289    struct ureg eye_position_normalized;
 290    struct ureg transformed_normal;
 291    struct ureg identity;
 292
 293    GLuint materials;
 294    GLuint color_materials;
 295 };
 296
 297
 298 static const struct ureg undef = {
 299    PROGRAM_UNDEFINED,
 300    0,
 301    0,
 302    0,
 303    0
 304 };
 305
 306 /* Local shorthand:
 307  */
 308 #define X    SWIZZLE_X
 309 #define Y    SWIZZLE_Y
 310 #define Z    SWIZZLE_Z
 311 #define W    SWIZZLE_W
 312
 313
 314 /* Construct a ureg:
 315  */
 316 static struct ureg make_ureg(GLuint file, GLint idx)
 317 {
 318    struct ureg reg;
 319    reg.file = file;
 320    reg.idx = idx;
 321    reg.negate = 0;
 322    reg.swz = SWIZZLE_NOOP;
 323    reg.pad = 0;
 324    return reg;
 325 }
 326
 327
 328
 329 static struct ureg negate( struct ureg reg )
 330 {
 331    reg.negate ^= 1;
 332    return reg;
 333 }
 334
 335
 336 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 337 {
 338    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 339                            GET_SWZ(reg.swz, y),
 340                            GET_SWZ(reg.swz, z),
 341                            GET_SWZ(reg.swz, w));
 342    return reg;
 343 }
 344
 345
 346 static struct ureg swizzle1( struct ureg reg, int x )
 347 {
 348    return swizzle(reg, x, x, x, x);
 349 }
 350
 351
 352 static struct ureg get_temp( struct tnl_program *p )
 353 {
 354    int bit = _mesa_ffs( ~p->temp_in_use );
 355    if (!bit) {
 356       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 357       _mesa_exit(1);
 358    }
 359
 360    if ((GLuint) bit > p->program->Base.NumTemporaries)
 361       p->program->Base.NumTemporaries = bit;
 362
 363    p->temp_in_use |= 1<<(bit-1);
 364    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 365 }
 366
 367
 368 static struct ureg reserve_temp( struct tnl_program *p )
 369 {
 370    struct ureg temp = get_temp( p );
 371    p->temp_reserved |= 1<<temp.idx;
 372    return temp;
 373 }
 374
 375
 376 static void release_temp( struct tnl_program *p, struct ureg reg )
 377 {
 378    if (reg.file == PROGRAM_TEMPORARY) {
 379       p->temp_in_use &= ~(1<<reg.idx);
 380       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 381    }
 382 }
 383
 384 static void release_temps( struct tnl_program *p )
 385 {
 386    p->temp_in_use = p->temp_reserved;
 387 }
 388
 389
 390 static struct ureg register_param5(struct tnl_program *p,
 391                                    GLint s0,
 392                                    GLint s1,
 393                                    GLint s2,
 394                                    GLint s3,
 395                                    GLint s4)
 396 {
 397    gl_state_index tokens[STATE_LENGTH];
 398    GLint idx;
 399    tokens[0] = s0;
 400    tokens[1] = s1;
 401    tokens[2] = s2;
 402    tokens[3] = s3;
 403    tokens[4] = s4;
 404    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 405    return make_ureg(PROGRAM_STATE_VAR, idx);
 406 }
 407
 408
 409 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 410 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 411 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 412 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 413
 414
 415
 416 /**
 417  * \param input  one of VERT_ATTRIB_x tokens.
 418  */
 419 static struct ureg register_input( struct tnl_program *p, GLuint input )
 420 {
 421    assert(input < 32);
 422
 423    if (p->state->varying_vp_inputs & (1<<input)) {
 424       p->program->Base.InputsRead |= (1<<input);
 425       return make_ureg(PROGRAM_INPUT, input);
 426    }
 427    else {
 428       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
 429    }
 430 }
 431
 432
 433 /**
 434  * \param input  one of VERT_RESULT_x tokens.
 435  */
 436 static struct ureg register_output( struct tnl_program *p, GLuint output )
 437 {
 438    p->program->Base.OutputsWritten |= (1<<output);
 439    return make_ureg(PROGRAM_OUTPUT, output);
 440 }
 441
 442
 443 static struct ureg register_const4f( struct tnl_program *p,
 444                               GLfloat s0,
 445                               GLfloat s1,
 446                               GLfloat s2,
 447                               GLfloat s3)
 448 {
 449    GLfloat values[4];
 450    GLint idx;
 451    GLuint swizzle;
 452    values[0] = s0;
 453    values[1] = s1;
 454    values[2] = s2;
 455    values[3] = s3;
 456    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 457                                      &swizzle );
 458    ASSERT(swizzle == SWIZZLE_NOOP);
 459    return make_ureg(PROGRAM_CONSTANT, idx);
 460 }
 461
 462 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 463 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 464 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 465 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 466
 467 static GLboolean is_undef( struct ureg reg )
 468 {
 469    return reg.file == PROGRAM_UNDEFINED;
 470 }
 471
 472
 473 static struct ureg get_identity_param( struct tnl_program *p )
 474 {
 475    if (is_undef(p->identity))
 476       p->identity = register_const4f(p, 0,0,0,1);
 477
 478    return p->identity;
 479 }
 480
 481 static void register_matrix_param5( struct tnl_program *p,
 482                                     GLint s0, /* modelview, projection, etc */
 483                                     GLint s1, /* texture matrix number */
 484                                     GLint s2, /* first row */
 485                                     GLint s3, /* last row */
 486                                     GLint s4, /* inverse, transpose, etc */
 487                                     struct ureg *matrix )
 488 {
 489    GLint i;
 490
 491    /* This is a bit sad as the support is there to pull the whole
 492     * matrix out in one go:
 493     */
 494    for (i = 0; i <= s3 - s2; i++)
 495       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
 496 }
 497
 498
 499 static void emit_arg( struct prog_src_register *src,
 500                       struct ureg reg )
 501 {
 502    src->File = reg.file;
 503    src->Index = reg.idx;
 504    src->Swizzle = reg.swz;
 505    src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
 506    src->Abs = 0;
 507    src->RelAddr = 0;
 508    /* Check that bitfield sizes aren't exceeded */
 509    ASSERT(src->Index == reg.idx);
 510 }
 511
 512
 513 static void emit_dst( struct prog_dst_register *dst,
 514                       struct ureg reg, GLuint mask )
 515 {
 516    dst->File = reg.file;
 517    dst->Index = reg.idx;
 518    /* allow zero as a shorthand for xyzw */
 519    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 520    dst->CondMask = COND_TR;  /* always pass cond test */
 521    dst->CondSwizzle = SWIZZLE_NOOP;
 522    dst->CondSrc = 0;
 523    dst->pad = 0;
 524    /* Check that bitfield sizes aren't exceeded */
 525    ASSERT(dst->Index == reg.idx);
 526 }
 527
 528
 529 static void debug_insn( struct prog_instruction *inst, const char *fn,
 530                         GLuint line )
 531 {
 532    if (DISASSEM) {
 533       static const char *last_fn;
 534
 535       if (fn != last_fn) {
 536          last_fn = fn;
 537          _mesa_printf("%s:\n", fn);
 538       }
 539
 540       _mesa_printf("%d:\t", line);
 541       _mesa_print_instruction(inst);
 542    }
 543 }
 544
 545
 546 static void emit_op3fn(struct tnl_program *p,
 547                        enum prog_opcode op,
 548                        struct ureg dest,
 549                        GLuint mask,
 550                        struct ureg src0,
 551                        struct ureg src1,
 552                        struct ureg src2,
 553                        const char *fn,
 554                        GLuint line)
 555 {
 556    GLuint nr;
 557    struct prog_instruction *inst;
 558
 559    assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
 560
 561    if (p->program->Base.NumInstructions == p->max_inst) {
 562       /* need to extend the program's instruction array */
 563       struct prog_instruction *newInst;
 564
 565       /* double the size */
 566       p->max_inst *= 2;
 567
 568       newInst = _mesa_alloc_instructions(p->max_inst);
 569       if (!newInst) {
 570          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
 571          return;
 572       }
 573
 574       _mesa_copy_instructions(newInst,
 575                               p->program->Base.Instructions,
 576                               p->program->Base.NumInstructions);
 577
 578       _mesa_free_instructions(p->program->Base.Instructions,
 579                               p->program->Base.NumInstructions);
 580
 581       p->program->Base.Instructions = newInst;
 582    }
 583
 584    nr = p->program->Base.NumInstructions++;
 585
 586    inst = &p->program->Base.Instructions[nr];
 587    inst->Opcode = (enum prog_opcode) op;
 588    inst->Data = 0;
 589
 590    emit_arg( &inst->SrcReg[0], src0 );
 591    emit_arg( &inst->SrcReg[1], src1 );
 592    emit_arg( &inst->SrcReg[2], src2 );
 593
 594    emit_dst( &inst->DstReg, dest, mask );
 595
 596    debug_insn(inst, fn, line);
 597 }
 598
 599
 600 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 601    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 602
 603 #define emit_op2(p, op, dst, mask, src0, src1) \
 604     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 605
 606 #define emit_op1(p, op, dst, mask, src0) \
 607     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 608
 609
 610 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 611 {
 612    if (reg.file == PROGRAM_TEMPORARY &&
 613        !(p->temp_reserved & (1<<reg.idx)))
 614       return reg;
 615    else {
 616       struct ureg temp = get_temp(p);
 617       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 618       return temp;
 619    }
 620 }
 621
 622
 623 /* Currently no tracking performed of input/output/register size or
 624  * active elements.  Could be used to reduce these operations, as
 625  * could the matrix type.
 626  */
 627 static void emit_matrix_transform_vec4( struct tnl_program *p,
 628                                         struct ureg dest,
 629                                         const struct ureg *mat,
 630                                         struct ureg src)
 631 {
 632    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 633    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 634    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 635    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 636 }
 637
 638
 639 /* This version is much easier to implement if writemasks are not
 640  * supported natively on the target or (like SSE), the target doesn't
 641  * have a clean/obvious dotproduct implementation.
 642  */
 643 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 644                                                   struct ureg dest,
 645                                                   const struct ureg *mat,
 646                                                   struct ureg src)
 647 {
 648    struct ureg tmp;
 649
 650    if (dest.file != PROGRAM_TEMPORARY)
 651       tmp = get_temp(p);
 652    else
 653       tmp = dest;
 654
 655    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 656    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 657    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 658    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 659
 660    if (dest.file != PROGRAM_TEMPORARY)
 661       release_temp(p, tmp);
 662 }
 663
 664
 665 static void emit_matrix_transform_vec3( struct tnl_program *p,
 666                                         struct ureg dest,
 667                                         const struct ureg *mat,
 668                                         struct ureg src)
 669 {
 670    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 671    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 672    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 673 }
 674
 675
 676 static void emit_normalize_vec3( struct tnl_program *p,
 677                                  struct ureg dest,
 678                                  struct ureg src )
 679 {
 680 #if 0
 681    /* XXX use this when drivers are ready for NRM3 */
 682    emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
 683 #else
 684    struct ureg tmp = get_temp(p);
 685    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
 686    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
 687    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
 688    release_temp(p, tmp);
 689 #endif
 690 }
 691
 692
 693 static void emit_passthrough( struct tnl_program *p,
 694                               GLuint input,
 695                               GLuint output )
 696 {
 697    struct ureg out = register_output(p, output);
 698    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 699 }
 700
 701
 702 static struct ureg get_eye_position( struct tnl_program *p )
 703 {
 704    if (is_undef(p->eye_position)) {
 705       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 706       struct ureg modelview[4];
 707
 708       p->eye_position = reserve_temp(p);
 709
 710       if (p->mvp_with_dp4) {
 711          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 712                                  0, modelview );
 713
 714          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 715       }
 716       else {
 717          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 718                                  STATE_MATRIX_TRANSPOSE, modelview );
 719
 720          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 721       }
 722    }
 723
 724    return p->eye_position;
 725 }
 726
 727
 728 static struct ureg get_eye_position_z( struct tnl_program *p )
 729 {
 730    if (!is_undef(p->eye_position))
 731       return swizzle1(p->eye_position, Z);
 732
 733    if (is_undef(p->eye_position_z)) {
 734       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 735       struct ureg modelview[4];
 736
 737       p->eye_position_z = reserve_temp(p);
 738
 739       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
 740                               0, modelview );
 741
 742       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
 743    }
 744
 745    return p->eye_position_z;
 746 }
 747
 748
 749 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 750 {
 751    if (is_undef(p->eye_position_normalized)) {
 752       struct ureg eye = get_eye_position(p);
 753       p->eye_position_normalized = reserve_temp(p);
 754       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 755    }
 756
 757    return p->eye_position_normalized;
 758 }
 759
 760
 761 static struct ureg get_transformed_normal( struct tnl_program *p )
 762 {
 763    if (is_undef(p->transformed_normal) &&
 764        !p->state->need_eye_coords &&
 765        !p->state->normalize &&
 766        !(p->state->need_eye_coords == p->state->rescale_normals))
 767    {
 768       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
 769    }
 770    else if (is_undef(p->transformed_normal))
 771    {
 772       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 773       struct ureg mvinv[3];
 774       struct ureg transformed_normal = reserve_temp(p);
 775
 776       if (p->state->need_eye_coords) {
 777          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
 778                                  STATE_MATRIX_INVTRANS, mvinv );
 779
 780          /* Transform to eye space:
 781           */
 782          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
 783          normal = transformed_normal;
 784       }
 785
 786       /* Normalize/Rescale:
 787        */
 788       if (p->state->normalize) {
 789          emit_normalize_vec3( p, transformed_normal, normal );
 790          normal = transformed_normal;
 791       }
 792       else if (p->state->need_eye_coords == p->state->rescale_normals) {
 793          /* This is already adjusted for eye/non-eye rendering:
 794           */
 795          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 796                                                STATE_NORMAL_SCALE);
 797
 798          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
 799          normal = transformed_normal;
 800       }
 801
 802       assert(normal.file == PROGRAM_TEMPORARY);
 803       p->transformed_normal = normal;
 804    }
 805
 806    return p->transformed_normal;
 807 }
 808
 809
 810 static void build_hpos( struct tnl_program *p )
 811 {
 812    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 813    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 814    struct ureg mvp[4];
 815
 816    if (p->mvp_with_dp4) {
 817       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 818                               0, mvp );
 819       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 820    }
 821    else {
 822       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
 823                               STATE_MATRIX_TRANSPOSE, mvp );
 824       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 825    }
 826 }
 827
 828
 829 static GLuint material_attrib( GLuint side, GLuint property )
 830 {
 831    return (property - STATE_AMBIENT) * 2 + side;
 832 }
 833
 834
 835 /**
 836  * Get a bitmask of which material values vary on a per-vertex basis.
 837  */
 838 static void set_material_flags( struct tnl_program *p )
 839 {
 840    p->color_materials = 0;
 841    p->materials = 0;
 842
 843    if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
 844       p->materials =
 845          p->color_materials = p->state->light_color_material_mask;
 846    }
 847
 848    p->materials |= (p->state->varying_vp_inputs >> 16);
 849 }
 850
 851
 852 static struct ureg get_material( struct tnl_program *p, GLuint side,
 853                                  GLuint property )
 854 {
 855    GLuint attrib = material_attrib(side, property);
 856
 857    if (p->color_materials & (1<<attrib))
 858       return register_input(p, VERT_ATTRIB_COLOR0);
 859    else if (p->materials & (1<<attrib)) {
 860       /* Put material values in the GENERIC slots -- they are not used
 861        * for anything in fixed function mode.
 862        */
 863       return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
 864    }
 865    else
 866       return register_param3( p, STATE_MATERIAL, side, property );
 867 }
 868
 869 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
 870                                    MAT_BIT_FRONT_AMBIENT | \
 871                                    MAT_BIT_FRONT_DIFFUSE) << (side))
 872
 873
 874 /**
 875  * Either return a precalculated constant value or emit code to
 876  * calculate these values dynamically in the case where material calls
 877  * are present between begin/end pairs.
 878  *
 879  * Probably want to shift this to the program compilation phase - if
 880  * we always emitted the calculation here, a smart compiler could
 881  * detect that it was constant (given a certain set of inputs), and
 882  * lift it out of the main loop.  That way the programs created here
 883  * would be independent of the vertex_buffer details.
 884  */
 885 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 886 {
 887    if (p->materials & SCENE_COLOR_BITS(side)) {
 888       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 889       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 890       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 891       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 892       struct ureg tmp = make_temp(p, material_diffuse);
 893       emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
 894                material_ambient, material_emission);
 895       return tmp;
 896    }
 897    else
 898       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 899 }
 900
 901
 902 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 903                                   GLuint side, GLuint property )
 904 {
 905    GLuint attrib = material_attrib(side, property);
 906    if (p->materials & (1<<attrib)) {
 907       struct ureg light_value =
 908          register_param3(p, STATE_LIGHT, light, property);
 909       struct ureg material_value = get_material(p, side, property);
 910       struct ureg tmp = get_temp(p);
 911       emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
 912       return tmp;
 913    }
 914    else
 915       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 916 }
 917
 918
 919 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 920                                                 GLuint i,
 921                                                 struct ureg VPpli,
 922                                                 struct ureg dist )
 923 {
 924    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 925                                              STATE_ATTENUATION);
 926    struct ureg att = get_temp(p);
 927
 928    /* Calculate spot attenuation:
 929     */
 930    if (!p->state->unit[i].light_spotcutoff_is_180) {
 931       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
 932                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
 933       struct ureg spot = get_temp(p);
 934       struct ureg slt = get_temp(p);
 935
 936       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
 937       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
 938       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
 939       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 940
 941       release_temp(p, spot);
 942       release_temp(p, slt);
 943    }
 944
 945    /* Calculate distance attenuation:
 946     */
 947    if (p->state->unit[i].light_attenuated) {
 948       /* 1/d,d,d,1/d */
 949       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
 950       /* 1,d,d*d,1/d */
 951       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
 952       /* 1/dist-atten */
 953       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
 954
 955       if (!p->state->unit[i].light_spotcutoff_is_180) {
 956          /* dist-atten */
 957          emit_op1(p, OPCODE_RCP, dist, 0, dist);
 958          /* spot-atten * dist-atten */
 959          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
 960       }
 961       else {
 962          /* dist-atten */
 963          emit_op1(p, OPCODE_RCP, att, 0, dist);
 964       }
 965    }
 966
 967    return att;
 968 }
 969
 970
 971 /**
 972  * Compute:
 973  *   lit.y = MAX(0, dots.x)
 974  *   lit.z = SLT(0, dots.x)
 975  */
 976 static void emit_degenerate_lit( struct tnl_program *p,
 977                                  struct ureg lit,
 978                                  struct ureg dots )
 979 {
 980    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
 981
 982    /* Note that lit.x & lit.w will not be examined.  Note also that
 983     * dots.xyzw == dots.xxxx.
 984     */
 985
 986    /* MAX lit, id, dots;
 987     */
 988    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
 989
 990    /* result[2] = (in > 0 ? 1 : 0)
 991     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
 992     */
 993    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
 994 }
 995
 996
 997 /* Need to add some addtional parameters to allow lighting in object
 998  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
 999  * space lighting.
1000  */
1001 static void build_lighting( struct tnl_program *p )
1002 {
1003    const GLboolean twoside = p->state->light_twoside;
1004    const GLboolean separate = p->state->separate_specular;
1005    GLuint nr_lights = 0, count = 0;
1006    struct ureg normal = get_transformed_normal(p);
1007    struct ureg lit = get_temp(p);
1008    struct ureg dots = get_temp(p);
1009    struct ureg _col0 = undef, _col1 = undef;
1010    struct ureg _bfc0 = undef, _bfc1 = undef;
1011    GLuint i;
1012
1013    /*
1014     * NOTE:
1015     * dots.x = dot(normal, VPpli)
1016     * dots.y = dot(normal, halfAngle)
1017     * dots.z = back.shininess
1018     * dots.w = front.shininess
1019     */
1020
1021    for (i = 0; i < MAX_LIGHTS; i++)
1022       if (p->state->unit[i].light_enabled)
1023          nr_lights++;
1024
1025    set_material_flags(p);
1026
1027    {
1028       if (!p->state->material_shininess_is_zero) {
1029          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
1030          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
1031          release_temp(p, shininess);
1032       }
1033
1034       _col0 = make_temp(p, get_scenecolor(p, 0));
1035       if (separate)
1036          _col1 = make_temp(p, get_identity_param(p));
1037       else
1038          _col1 = _col0;
1039    }
1040
1041    if (twoside) {
1042       if (!p->state->material_shininess_is_zero) {
1043          /* Note that we negate the back-face specular exponent here.
1044           * The negation will be un-done later in the back-face code below.
1045           */
1046          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1047          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1048                   negate(swizzle1(shininess,X)));
1049          release_temp(p, shininess);
1050       }
1051
1052       _bfc0 = make_temp(p, get_scenecolor(p, 1));
1053       if (separate)
1054          _bfc1 = make_temp(p, get_identity_param(p));
1055       else
1056          _bfc1 = _bfc0;
1057    }
1058
1059    /* If no lights, still need to emit the scenecolor.
1060     */
1061    {
1062       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
1063       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1064    }
1065
1066    if (separate) {
1067       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
1068       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1069    }
1070
1071    if (twoside) {
1072       struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
1073       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1074    }
1075
1076    if (twoside && separate) {
1077       struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
1078       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1079    }
1080
1081    if (nr_lights == 0) {
1082       release_temps(p);
1083       return;
1084    }
1085
1086    for (i = 0; i < MAX_LIGHTS; i++) {
1087       if (p->state->unit[i].light_enabled) {
1088          struct ureg half = undef;
1089          struct ureg att = undef, VPpli = undef;
1090
1091          count++;
1092
1093          if (p->state->unit[i].light_eyepos3_is_zero) {
1094             /* Can used precomputed constants in this case.
1095              * Attenuation never applies to infinite lights.
1096              */
1097             VPpli = register_param3(p, STATE_INTERNAL,
1098                                     STATE_LIGHT_POSITION_NORMALIZED, i);
1099
1100             if (!p->state->material_shininess_is_zero) {
1101                if (p->state->light_local_viewer) {
1102                   struct ureg eye_hat = get_eye_position_normalized(p);
1103                   half = get_temp(p);
1104                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1105                   emit_normalize_vec3(p, half, half);
1106                }
1107                else {
1108                   half = register_param3(p, STATE_INTERNAL,
1109                                          STATE_LIGHT_HALF_VECTOR, i);
1110                }
1111             }
1112          }
1113          else {
1114             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1115                                                STATE_LIGHT_POSITION, i);
1116             struct ureg V = get_eye_position(p);
1117             struct ureg dist = get_temp(p);
1118
1119             VPpli = get_temp(p);
1120
1121             /* Calculate VPpli vector
1122              */
1123             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1124
1125             /* Normalize VPpli.  The dist value also used in
1126              * attenuation below.
1127              */
1128             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1129             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1130             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1131
1132             /* Calculate attenuation:
1133              */
1134             if (!p->state->unit[i].light_spotcutoff_is_180 ||
1135                 p->state->unit[i].light_attenuated) {
1136                att = calculate_light_attenuation(p, i, VPpli, dist);
1137             }
1138
1139             /* Calculate viewer direction, or use infinite viewer:
1140              */
1141             if (!p->state->material_shininess_is_zero) {
1142                half = get_temp(p);
1143
1144                if (p->state->light_local_viewer) {
1145                   struct ureg eye_hat = get_eye_position_normalized(p);
1146                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1147                }
1148                else {
1149                   struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1150                   emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1151                }
1152
1153                emit_normalize_vec3(p, half, half);
1154             }
1155
1156             release_temp(p, dist);
1157          }
1158
1159          /* Calculate dot products:
1160           */
1161          if (p->state->material_shininess_is_zero) {
1162             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1163          }
1164          else {
1165             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1166             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1167          }
1168
1169          /* Front face lighting:
1170           */
1171          {
1172             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1173             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1174             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1175             struct ureg res0, res1;
1176             GLuint mask0, mask1;
1177
1178             if (count == nr_lights) {
1179                if (separate) {
1180                   mask0 = WRITEMASK_XYZ;
1181                   mask1 = WRITEMASK_XYZ;
1182                   res0 = register_output( p, VERT_RESULT_COL0 );
1183                   res1 = register_output( p, VERT_RESULT_COL1 );
1184                }
1185                else {
1186                   mask0 = 0;
1187                   mask1 = WRITEMASK_XYZ;
1188                   res0 = _col0;
1189                   res1 = register_output( p, VERT_RESULT_COL0 );
1190                }
1191             }
1192             else {
1193                mask0 = 0;
1194                mask1 = 0;
1195                res0 = _col0;
1196                res1 = _col1;
1197             }
1198
1199             if (!is_undef(att)) {
1200                /* light is attenuated by distance */
1201                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1202                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1203                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1204             }
1205             else if (!p->state->material_shininess_is_zero) {
1206                /* there's a non-zero specular term */
1207                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1208                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1209             }
1210             else {
1211                /* no attenutation, no specular */
1212                emit_degenerate_lit(p, lit, dots);
1213                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1214             }
1215
1216             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1217             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1218
1219             release_temp(p, ambient);
1220             release_temp(p, diffuse);
1221             release_temp(p, specular);
1222          }
1223
1224          /* Back face lighting:
1225           */
1226          if (twoside) {
1227             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1228             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1229             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1230             struct ureg res0, res1;
1231             GLuint mask0, mask1;
1232
1233             if (count == nr_lights) {
1234                if (separate) {
1235                   mask0 = WRITEMASK_XYZ;
1236                   mask1 = WRITEMASK_XYZ;
1237                   res0 = register_output( p, VERT_RESULT_BFC0 );
1238                   res1 = register_output( p, VERT_RESULT_BFC1 );
1239                }
1240                else {
1241                   mask0 = 0;
1242                   mask1 = WRITEMASK_XYZ;
1243                   res0 = _bfc0;
1244                   res1 = register_output( p, VERT_RESULT_BFC0 );
1245                }
1246             }
1247             else {
1248                res0 = _bfc0;
1249                res1 = _bfc1;
1250                mask0 = 0;
1251                mask1 = 0;
1252             }
1253
1254             /* For the back face we need to negate the X and Y component
1255              * dot products.  dots.Z has the negated back-face specular
1256              * exponent.  We swizzle that into the W position.  This
1257              * negation makes the back-face specular term positive again.
1258              */
1259             dots = negate(swizzle(dots,X,Y,W,Z));
1260
1261             if (!is_undef(att)) {
1262                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1263                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1264                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1265             }
1266             else if (!p->state->material_shininess_is_zero) {
1267                emit_op1(p, OPCODE_LIT, lit, 0, dots);
1268                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
1269             }
1270             else {
1271                emit_degenerate_lit(p, lit, dots);
1272                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1273             }
1274
1275             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1276             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1277             /* restore dots to its original state for subsequent lights
1278              * by negating and swizzling again.
1279              */
1280             dots = negate(swizzle(dots,X,Y,W,Z));
1281
1282             release_temp(p, ambient);
1283             release_temp(p, diffuse);
1284             release_temp(p, specular);
1285          }
1286
1287          release_temp(p, half);
1288          release_temp(p, VPpli);
1289          release_temp(p, att);
1290       }
1291    }
1292
1293    release_temps( p );
1294 }
1295
1296
1297 static void build_fog( struct tnl_program *p )
1298 {
1299    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1300    struct ureg input;
1301
1302    if (p->state->fog_source_is_depth) {
1303       input = get_eye_position_z(p);
1304    }
1305    else {
1306       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1307    }
1308
1309    emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1310 }
1311
1312
1313 static void build_reflect_texgen( struct tnl_program *p,
1314                                   struct ureg dest,
1315                                   GLuint writemask )
1316 {
1317    struct ureg normal = get_transformed_normal(p);
1318    struct ureg eye_hat = get_eye_position_normalized(p);
1319    struct ureg tmp = get_temp(p);
1320
1321    /* n.u */
1322    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1323    /* 2n.u */
1324    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1325    /* (-2n.u)n + u */
1326    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1327
1328    release_temp(p, tmp);
1329 }
1330
1331
1332 static void build_sphere_texgen( struct tnl_program *p,
1333                                  struct ureg dest,
1334                                  GLuint writemask )
1335 {
1336    struct ureg normal = get_transformed_normal(p);
1337    struct ureg eye_hat = get_eye_position_normalized(p);
1338    struct ureg tmp = get_temp(p);
1339    struct ureg half = register_scalar_const(p, .5);
1340    struct ureg r = get_temp(p);
1341    struct ureg inv_m = get_temp(p);
1342    struct ureg id = get_identity_param(p);
1343
1344    /* Could share the above calculations, but it would be
1345     * a fairly odd state for someone to set (both sphere and
1346     * reflection active for different texture coordinate
1347     * components.  Of course - if two texture units enable
1348     * reflect and/or sphere, things start to tilt in favour
1349     * of seperating this out:
1350     */
1351
1352    /* n.u */
1353    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1354    /* 2n.u */
1355    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1356    /* (-2n.u)n + u */
1357    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1358    /* r + 0,0,1 */
1359    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1360    /* rx^2 + ry^2 + (rz+1)^2 */
1361    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1362    /* 2/m */
1363    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1364    /* 1/m */
1365    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1366    /* r/m + 1/2 */
1367    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1368
1369    release_temp(p, tmp);
1370    release_temp(p, r);
1371    release_temp(p, inv_m);
1372 }
1373
1374
1375 static void build_texture_transform( struct tnl_program *p )
1376 {
1377    GLuint i, j;
1378
1379    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
1380
1381       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
1382          continue;
1383
1384       if (p->state->unit[i].texgen_enabled ||
1385           p->state->unit[i].texmat_enabled) {
1386
1387          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1388          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1389          struct ureg out_texgen = undef;
1390
1391          if (p->state->unit[i].texgen_enabled) {
1392             GLuint copy_mask = 0;
1393             GLuint sphere_mask = 0;
1394             GLuint reflect_mask = 0;
1395             GLuint normal_mask = 0;
1396             GLuint modes[4];
1397
1398             if (texmat_enabled)
1399                out_texgen = get_temp(p);
1400             else
1401                out_texgen = out;
1402
1403             modes[0] = p->state->unit[i].texgen_mode0;
1404             modes[1] = p->state->unit[i].texgen_mode1;
1405             modes[2] = p->state->unit[i].texgen_mode2;
1406             modes[3] = p->state->unit[i].texgen_mode3;
1407
1408             for (j = 0; j < 4; j++) {
1409                switch (modes[j]) {
1410                case TXG_OBJ_LINEAR: {
1411                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1412                   struct ureg plane =
1413                      register_param3(p, STATE_TEXGEN, i,
1414                                      STATE_TEXGEN_OBJECT_S + j);
1415
1416                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1417                            obj, plane );
1418                   break;
1419                }
1420                case TXG_EYE_LINEAR: {
1421                   struct ureg eye = get_eye_position(p);
1422                   struct ureg plane =
1423                      register_param3(p, STATE_TEXGEN, i,
1424                                      STATE_TEXGEN_EYE_S + j);
1425
1426                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1427                            eye, plane );
1428                   break;
1429                }
1430                case TXG_SPHERE_MAP:
1431                   sphere_mask |= WRITEMASK_X << j;
1432                   break;
1433                case TXG_REFLECTION_MAP:
1434                   reflect_mask |= WRITEMASK_X << j;
1435                   break;
1436                case TXG_NORMAL_MAP:
1437                   normal_mask |= WRITEMASK_X << j;
1438                   break;
1439                case TXG_NONE:
1440                   copy_mask |= WRITEMASK_X << j;
1441                }
1442             }
1443
1444             if (sphere_mask) {
1445                build_sphere_texgen(p, out_texgen, sphere_mask);
1446             }
1447
1448             if (reflect_mask) {
1449                build_reflect_texgen(p, out_texgen, reflect_mask);
1450             }
1451
1452             if (normal_mask) {
1453                struct ureg normal = get_transformed_normal(p);
1454                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1455             }
1456
1457             if (copy_mask) {
1458                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1459                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1460             }
1461          }
1462
1463          if (texmat_enabled) {
1464             struct ureg texmat[4];
1465             struct ureg in = (!is_undef(out_texgen) ?
1466                               out_texgen :
1467                               register_input(p, VERT_ATTRIB_TEX0+i));
1468             if (p->mvp_with_dp4) {
1469                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1470                                        0, texmat );
1471                emit_matrix_transform_vec4( p, out, texmat, in );
1472             }
1473             else {
1474                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1475                                        STATE_MATRIX_TRANSPOSE, texmat );
1476                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1477             }
1478          }
1479
1480          release_temps(p);
1481       }
1482       else {
1483          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1484       }
1485    }
1486 }
1487
1488
1489 /**
1490  * Point size attenuation computation.
1491  */
1492 static void build_atten_pointsize( struct tnl_program *p )
1493 {
1494    struct ureg eye = get_eye_position_z(p);
1495    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1496    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1497    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1498    struct ureg ut = get_temp(p);
1499
1500    /* dist = |eyez| */
1501    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1502    /* p1 + dist * (p2 + dist * p3); */
1503    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1504                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1505    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1506                 ut, swizzle1(state_attenuation, X));
1507
1508    /* 1 / sqrt(factor) */
1509    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1510
1511 #if 0
1512    /* out = pointSize / sqrt(factor) */
1513    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1514 #else
1515    /* this is a good place to clamp the point size since there's likely
1516     * no hardware registers to clamp point size at rasterization time.
1517     */
1518    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1519    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1520    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1521 #endif
1522
1523    release_temp(p, ut);
1524 }
1525
1526
1527 /**
1528  * Pass-though per-vertex point size, from user's point size array.
1529  */
1530 static void build_array_pointsize( struct tnl_program *p )
1531 {
1532    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
1533    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1534    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
1535 }
1536
1537
1538 static void build_tnl_program( struct tnl_program *p )
1539 {
1540    /* Emit the program, starting with modelviewproject:
1541     */
1542    build_hpos(p);
1543
1544    /* Lighting calculations:
1545     */
1546    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1547       if (p->state->light_global_enabled)
1548          build_lighting(p);
1549       else {
1550          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1551             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1552
1553          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1554             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1555       }
1556    }
1557
1558    if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC)
1559       build_fog(p);
1560
1561    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1562       build_texture_transform(p);
1563
1564    if (p->state->point_attenuated)
1565       build_atten_pointsize(p);
1566    else if (p->state->point_array)
1567       build_array_pointsize(p);
1568
1569    /* Finish up:
1570     */
1571    emit_op1(p, OPCODE_END, undef, 0, undef);
1572
1573    /* Disassemble:
1574     */
1575    if (DISASSEM) {
1576       _mesa_printf ("\n");
1577    }
1578 }
1579
1580
1581 static void
1582 create_new_program( const struct state_key *key,
1583                     struct gl_vertex_program *program,
1584                     GLboolean mvp_with_dp4,
1585                     GLuint max_temps)
1586 {
1587    struct tnl_program p;
1588
1589    _mesa_memset(&p, 0, sizeof(p));
1590    p.state = key;
1591    p.program = program;
1592    p.eye_position = undef;
1593    p.eye_position_z = undef;
1594    p.eye_position_normalized = undef;
1595    p.transformed_normal = undef;
1596    p.identity = undef;
1597    p.temp_in_use = 0;
1598    p.mvp_with_dp4 = mvp_with_dp4;
1599
1600    if (max_temps >= sizeof(int) * 8)
1601       p.temp_reserved = 0;
1602    else
1603       p.temp_reserved = ~((1<<max_temps)-1);
1604
1605    /* Start by allocating 32 instructions.
1606     * If we need more, we'll grow the instruction array as needed.
1607     */
1608    p.max_inst = 32;
1609    p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
1610    p.program->Base.String = NULL;
1611    p.program->Base.NumInstructions =
1612    p.program->Base.NumTemporaries =
1613    p.program->Base.NumParameters =
1614    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1615    p.program->Base.Parameters = _mesa_new_parameter_list();
1616    p.program->Base.InputsRead = 0;
1617    p.program->Base.OutputsWritten = 0;
1618
1619    build_tnl_program( &p );
1620 }
1621
1622
1623 /**
1624  * Return a vertex program which implements the current fixed-function
1625  * transform/lighting/texgen operations.
1626  * XXX move this into core mesa (main/)
1627  */
1628 struct gl_vertex_program *
1629 _mesa_get_fixed_func_vertex_program(GLcontext *ctx)
1630 {
1631    struct gl_vertex_program *prog;
1632    struct state_key key;
1633
1634    /* Grab all the relevent state and put it in a single structure:
1635     */
1636    make_state_key(ctx, &key);
1637
1638    /* Look for an already-prepared program for this state:
1639     */
1640    prog = (struct gl_vertex_program *)
1641       _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
1642
1643    if (!prog) {
1644       /* OK, we'll have to build a new one */
1645       if (0)
1646          _mesa_printf("Build new TNL program\n");
1647
1648       prog = (struct gl_vertex_program *)
1649          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1650       if (!prog)
1651          return NULL;
1652
1653       create_new_program( &key, prog,
1654                           ctx->mvp_with_dp4,
1655                           ctx->Const.VertexProgram.MaxTemps );
1656
1657 #if 0
1658       if (ctx->Driver.ProgramStringNotify)
1659          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1660                                           &prog->Base );
1661 #endif
1662       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
1663                                  &key, sizeof(key), &prog->Base);
1664    }
1665
1666    return prog;
1667 }