src/mesa/drivers/dri/i965/brw_vs_tnl.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.3
   4  *
   5  * Copyright (C) 2005  Tungsten Graphics   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  21  * WHETHER IN
  22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file t_vp_build.c
  28  * Create a vertex program to execute the current fixed function T&L pipeline.
  29  * \author Keith Whitwell
  30  */
  31
  32
  33 #include "glheader.h"
  34 #include "macros.h"
  35 #include "enums.h"
  36 #include "brw_vs.h"
  37 #include "brw_state.h"
  38
  39 #include "shader/program.h"
  40 #include "shader/program_instruction.h"
  41 #include "shader/arbprogparse.h"
  42
  43 struct state_key {
  44    unsigned light_global_enabled:1;
  45    unsigned light_local_viewer:1;
  46    unsigned light_twoside:1;
  47    unsigned light_color_material:1;
  48    unsigned light_color_material_mask:12;
  49    unsigned light_material_mask:12;
  50    unsigned normalize:1;
  51    unsigned rescale_normals:1;
  52    unsigned fog_source_is_depth:1;
  53    unsigned tnl_do_vertex_fog:1;
  54    unsigned separate_specular:1;
  55    unsigned fog_option:2;
  56    unsigned point_attenuated:1;
  57    unsigned texture_enabled_global:1;
  58    unsigned fragprog_inputs_read:12;
  59
  60    struct {
  61       unsigned light_enabled:1;
  62       unsigned light_eyepos3_is_zero:1;
  63       unsigned light_spotcutoff_is_180:1;
  64       unsigned light_attenuated:1;
  65       unsigned texunit_really_enabled:1;
  66       unsigned texmat_enabled:1;
  67       unsigned texgen_enabled:4;
  68       unsigned texgen_mode0:4;
  69       unsigned texgen_mode1:4;
  70       unsigned texgen_mode2:4;
  71       unsigned texgen_mode3:4;
  72    } unit[8];
  73 };
  74
  75
  76
  77 #define FOG_NONE   0
  78 #define FOG_LINEAR 1
  79 #define FOG_EXP    2
  80 #define FOG_EXP2   3
  81
  82 static GLuint translate_fog_mode( GLenum mode )
  83 {
  84    switch (mode) {
  85    case GL_LINEAR: return FOG_LINEAR;
  86    case GL_EXP: return FOG_EXP;
  87    case GL_EXP2: return FOG_EXP2;
  88    default: return FOG_NONE;
  89    }
  90 }
  91
  92 #define TXG_NONE           0
  93 #define TXG_OBJ_LINEAR     1
  94 #define TXG_EYE_LINEAR     2
  95 #define TXG_SPHERE_MAP     3
  96 #define TXG_REFLECTION_MAP 4
  97 #define TXG_NORMAL_MAP     5
  98
  99 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
 100 {
 101    if (!enabled)
 102       return TXG_NONE;
 103
 104    switch (mode) {
 105    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
 106    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
 107    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
 108    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
 109    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
 110    default: return TXG_NONE;
 111    }
 112 }
 113
 114 static void make_state_key( GLcontext *ctx, struct state_key *key )
 115 {
 116    struct brw_context *brw = brw_context(ctx);
 117    struct gl_fragment_program *fp = brw->fragment_program;
 118    GLuint i;
 119
 120    /* This now relies on texenvprogram.c being active:
 121     */
 122    assert(fp);
 123
 124    memset(key, 0, sizeof(*key));
 125
 126    /* BRW_NEW_FRAGMENT_PROGRAM */
 127    key->fragprog_inputs_read = fp->Base.InputsRead;
 128
 129    /* _NEW_LIGHT */
 130    key->separate_specular = (brw->attribs.Light->Model.ColorControl ==
 131                              GL_SEPARATE_SPECULAR_COLOR);
 132
 133    /* _NEW_LIGHT */
 134    if (brw->attribs.Light->Enabled) {
 135       key->light_global_enabled = 1;
 136
 137       if (brw->attribs.Light->Model.LocalViewer)
 138          key->light_local_viewer = 1;
 139
 140       if (brw->attribs.Light->Model.TwoSide)
 141          key->light_twoside = 1;
 142
 143       if (brw->attribs.Light->ColorMaterialEnabled) {
 144          key->light_color_material = 1;
 145          key->light_color_material_mask = brw->attribs.Light->ColorMaterialBitmask;
 146       }
 147
 148       /* BRW_NEW_INPUT_VARYING */
 149       for (i = BRW_ATTRIB_MAT_FRONT_AMBIENT ; i < BRW_ATTRIB_INDEX ; i++)
 150          if (brw->vb.info.varying[0] & (1<<i))
 151             key->light_material_mask |= 1<<(i-BRW_ATTRIB_MAT_FRONT_AMBIENT);
 152
 153       for (i = 0; i < MAX_LIGHTS; i++) {
 154          struct gl_light *light = &brw->attribs.Light->Light[i];
 155
 156          if (light->Enabled) {
 157             key->unit[i].light_enabled = 1;
 158
 159             if (light->EyePosition[3] == 0.0)
 160                key->unit[i].light_eyepos3_is_zero = 1;
 161
 162             if (light->SpotCutoff == 180.0)
 163                key->unit[i].light_spotcutoff_is_180 = 1;
 164
 165             if (light->ConstantAttenuation != 1.0 ||
 166                 light->LinearAttenuation != 0.0 ||
 167                 light->QuadraticAttenuation != 0.0)
 168                key->unit[i].light_attenuated = 1;
 169          }
 170       }
 171    }
 172
 173    /* _NEW_TRANSFORM */
 174    if (brw->attribs.Transform->Normalize)
 175       key->normalize = 1;
 176
 177    if (brw->attribs.Transform->RescaleNormals)
 178       key->rescale_normals = 1;
 179
 180    /* BRW_NEW_FRAGMENT_PROGRAM */
 181    key->fog_option = translate_fog_mode(fp->FogOption);
 182    if (key->fog_option)
 183       key->fragprog_inputs_read |= FRAG_BIT_FOGC;
 184
 185    /* _NEW_FOG */
 186    if (brw->attribs.Fog->FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
 187       key->fog_source_is_depth = 1;
 188
 189    /* _NEW_HINT, ??? */
 190    if (1)
 191       key->tnl_do_vertex_fog = 1;
 192
 193    /* _NEW_POINT */
 194    if (brw->attribs.Point->_Attenuated)
 195       key->point_attenuated = 1;
 196
 197    /* _NEW_TEXTURE */
 198    if (brw->attribs.Texture->_TexGenEnabled ||
 199        brw->attribs.Texture->_TexMatEnabled ||
 200        brw->attribs.Texture->_EnabledUnits)
 201       key->texture_enabled_global = 1;
 202
 203    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
 204       struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
 205
 206       if (texUnit->_ReallyEnabled)
 207          key->unit[i].texunit_really_enabled = 1;
 208
 209       if (brw->attribs.Texture->_TexMatEnabled & ENABLE_TEXMAT(i))
 210          key->unit[i].texmat_enabled = 1;
 211
 212       if (texUnit->TexGenEnabled) {
 213          key->unit[i].texgen_enabled = 1;
 214
 215          key->unit[i].texgen_mode0 =
 216             translate_texgen( texUnit->TexGenEnabled & (1<<0),
 217                               texUnit->GenModeS );
 218          key->unit[i].texgen_mode1 =
 219             translate_texgen( texUnit->TexGenEnabled & (1<<1),
 220                               texUnit->GenModeT );
 221          key->unit[i].texgen_mode2 =
 222             translate_texgen( texUnit->TexGenEnabled & (1<<2),
 223                               texUnit->GenModeR );
 224          key->unit[i].texgen_mode3 =
 225             translate_texgen( texUnit->TexGenEnabled & (1<<3),
 226                               texUnit->GenModeQ );
 227       }
 228    }
 229 }
 230
 231
 232
 233 /* Very useful debugging tool - produces annotated listing of
 234  * generated program with line/function references for each
 235  * instruction back into this file:
 236  */
 237 #define DISASSEM 0
 238
 239 /* Should be tunable by the driver - do we want to do matrix
 240  * multiplications with DP4's or with MUL/MAD's?  SSE works better
 241  * with the latter, drivers may differ.
 242  */
 243 #define PREFER_DP4 1
 244
 245 #define BRW_TNL_MAX_INSN 256
 246
 247 /* Use uregs to represent registers internally, translate to Mesa's
 248  * expected formats on emit.
 249  *
 250  * NOTE: These are passed by value extensively in this file rather
 251  * than as usual by pointer reference.  If this disturbs you, try
 252  * remembering they are just 32bits in size.
 253  *
 254  * GCC is smart enough to deal with these dword-sized structures in
 255  * much the same way as if I had defined them as dwords and was using
 256  * macros to access and set the fields.  This is much nicer and easier
 257  * to evolve.
 258  */
 259 struct ureg {
 260    GLuint file:4;
 261    GLint idx:8;      /* relative addressing may be negative */
 262    GLuint negate:1;
 263    GLuint swz:12;
 264    GLuint pad:7;
 265 };
 266
 267
 268 struct tnl_program {
 269    const struct state_key *state;
 270    struct gl_vertex_program *program;
 271
 272    GLuint temp_in_use;
 273    GLuint temp_reserved;
 274
 275    struct ureg eye_position;
 276    struct ureg eye_position_normalized;
 277    struct ureg eye_normal;
 278    struct ureg identity;
 279
 280    GLuint materials;
 281    GLuint color_materials;
 282 };
 283
 284
 285 const static struct ureg undef = {
 286    PROGRAM_UNDEFINED,
 287    ~0,
 288    0,
 289    0,
 290    0
 291 };
 292
 293 /* Local shorthand:
 294  */
 295 #define X    SWIZZLE_X
 296 #define Y    SWIZZLE_Y
 297 #define Z    SWIZZLE_Z
 298 #define W    SWIZZLE_W
 299
 300
 301 /* Construct a ureg:
 302  */
 303 static struct ureg make_ureg(GLuint file, GLint idx)
 304 {
 305    struct ureg reg;
 306    reg.file = file;
 307    reg.idx = idx;
 308    reg.negate = 0;
 309    reg.swz = SWIZZLE_NOOP;
 310    reg.pad = 0;
 311    return reg;
 312 }
 313
 314
 315
 316 static struct ureg ureg_negate( struct ureg reg )
 317 {
 318    reg.negate ^= 1;
 319    return reg;
 320 }
 321
 322
 323 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 324 {
 325    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 326                            GET_SWZ(reg.swz, y),
 327                            GET_SWZ(reg.swz, z),
 328                            GET_SWZ(reg.swz, w));
 329
 330    return reg;
 331 }
 332
 333 static struct ureg swizzle1( struct ureg reg, int x )
 334 {
 335    return swizzle(reg, x, x, x, x);
 336 }
 337
 338 static struct ureg get_temp( struct tnl_program *p )
 339 {
 340    int bit = ffs( ~p->temp_in_use );
 341    if (!bit) {
 342       fprintf(stderr, "%s: out of temporaries\n", __FILE__);
 343       abort();
 344    }
 345
 346    if (bit > p->program->Base.NumTemporaries)
 347       p->program->Base.NumTemporaries = bit;
 348
 349    p->temp_in_use |= 1<<(bit-1);
 350    return make_ureg(PROGRAM_TEMPORARY, bit-1);
 351 }
 352
 353 static struct ureg reserve_temp( struct tnl_program *p )
 354 {
 355    struct ureg temp = get_temp( p );
 356    p->temp_reserved |= 1<<temp.idx;
 357    return temp;
 358 }
 359
 360 static void release_temp( struct tnl_program *p, struct ureg reg )
 361 {
 362    if (reg.file == PROGRAM_TEMPORARY) {
 363       p->temp_in_use &= ~(1<<reg.idx);
 364       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
 365    }
 366 }
 367
 368 static void release_temps( struct tnl_program *p )
 369 {
 370    p->temp_in_use = p->temp_reserved;
 371 }
 372
 373
 374
 375 static struct ureg register_input( struct tnl_program *p, GLuint input )
 376 {
 377    p->program->Base.InputsRead |= (1<<input);
 378    return make_ureg(PROGRAM_INPUT, input);
 379 }
 380
 381 static struct ureg register_output( struct tnl_program *p, GLuint output )
 382 {
 383    p->program->Base.OutputsWritten |= (1<<output);
 384    return make_ureg(PROGRAM_OUTPUT, output);
 385 }
 386
 387 static struct ureg register_const4f( struct tnl_program *p,
 388                               GLfloat s0,
 389                               GLfloat s1,
 390                               GLfloat s2,
 391                               GLfloat s3)
 392 {
 393    GLfloat values[4];
 394    GLint idx;
 395    values[0] = s0;
 396    values[1] = s1;
 397    values[2] = s2;
 398    values[3] = s3;
 399    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values );
 400    return make_ureg(PROGRAM_STATE_VAR, idx);
 401 }
 402
 403 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 404 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 405 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 406 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 407
 408 static GLboolean is_undef( struct ureg reg )
 409 {
 410    return reg.file == PROGRAM_UNDEFINED;
 411 }
 412
 413 static struct ureg get_identity_param( struct tnl_program *p )
 414 {
 415    if (is_undef(p->identity))
 416       p->identity = register_const4f(p, 0,0,0,1);
 417
 418    return p->identity;
 419 }
 420
 421 static struct ureg register_param6( struct tnl_program *p,
 422                                    GLint s0,
 423                                    GLint s1,
 424                                    GLint s2,
 425                                    GLint s3,
 426                                    GLint s4,
 427                                    GLint s5)
 428 {
 429    GLint tokens[6];
 430    GLint idx;
 431    tokens[0] = s0;
 432    tokens[1] = s1;
 433    tokens[2] = s2;
 434    tokens[3] = s3;
 435    tokens[4] = s4;
 436    tokens[5] = s5;
 437    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
 438    return make_ureg(PROGRAM_STATE_VAR, idx);
 439 }
 440
 441
 442 #define register_param1(p,s0)          register_param6(p,s0,0,0,0,0,0)
 443 #define register_param2(p,s0,s1)       register_param6(p,s0,s1,0,0,0,0)
 444 #define register_param3(p,s0,s1,s2)    register_param6(p,s0,s1,s2,0,0,0)
 445 #define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0)
 446
 447
 448 static void register_matrix_param6( struct tnl_program *p,
 449                                     GLint s0,
 450                                     GLint s1,
 451                                     GLint s2,
 452                                     GLint s3,
 453                                     GLint s4,
 454                                     GLint s5,
 455                                     struct ureg *matrix )
 456 {
 457    GLint i;
 458
 459    /* This is a bit sad as the support is there to pull the whole
 460     * matrix out in one go:
 461     */
 462    for (i = 0; i <= s4 - s3; i++)
 463       matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 );
 464 }
 465
 466
 467 static void emit_arg( struct prog_src_register *src,
 468                       struct ureg reg )
 469 {
 470    src->File = reg.file;
 471    src->Index = reg.idx;
 472    src->Swizzle = reg.swz;
 473    src->RelAddr = 0;
 474    src->NegateBase = reg.negate;
 475    src->Abs = 0;
 476    src->NegateAbs = 0;
 477 }
 478
 479 static void emit_dst( struct prog_dst_register *dst,
 480                       struct ureg reg, GLuint mask )
 481 {
 482    dst->File = reg.file;
 483    dst->Index = reg.idx;
 484    /* allow zero as a shorthand for xyzw */
 485    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
 486    dst->CondMask = 0;
 487    dst->CondSwizzle = 0;
 488    dst->CondSrc = 0;
 489    dst->pad = 0;
 490 }
 491
 492 static void debug_insn( struct prog_instruction *inst, const char *fn,
 493                         GLuint line )
 494 {
 495    if (DISASSEM) {
 496       static const char *last_fn;
 497
 498       if (fn != last_fn) {
 499          last_fn = fn;
 500          _mesa_printf("%s:\n", fn);
 501       }
 502
 503       _mesa_printf("%d:\t", line);
 504       _mesa_print_instruction(inst);
 505    }
 506 }
 507
 508
 509 static void emit_op3fn(struct tnl_program *p,
 510                        GLuint op,
 511                        struct ureg dest,
 512                        GLuint mask,
 513                        struct ureg src0,
 514                        struct ureg src1,
 515                        struct ureg src2,
 516                        const char *fn,
 517                        GLuint line)
 518 {
 519    GLuint nr = p->program->Base.NumInstructions++;
 520    struct prog_instruction *inst = &p->program->Base.Instructions[nr];
 521
 522    if (p->program->Base.NumInstructions > BRW_TNL_MAX_INSN) {
 523       _mesa_problem(0, "Out of instructions in emit_op3fn\n");
 524       return;
 525    }
 526
 527    inst->Opcode = op;
 528    inst->StringPos = 0;
 529    inst->Data = 0;
 530
 531    emit_arg( &inst->SrcReg[0], src0 );
 532    emit_arg( &inst->SrcReg[1], src1 );
 533    emit_arg( &inst->SrcReg[2], src2 );
 534
 535    emit_dst( &inst->DstReg, dest, mask );
 536
 537    debug_insn(inst, fn, line);
 538 }
 539
 540
 541
 542 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
 543    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
 544
 545 #define emit_op2(p, op, dst, mask, src0, src1) \
 546     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
 547
 548 #define emit_op1(p, op, dst, mask, src0) \
 549     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
 550
 551
 552 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
 553 {
 554    if (reg.file == PROGRAM_TEMPORARY &&
 555        !(p->temp_reserved & (1<<reg.idx)))
 556       return reg;
 557    else {
 558       struct ureg temp = get_temp(p);
 559       emit_op1(p, OPCODE_MOV, temp, 0, reg);
 560       return temp;
 561    }
 562 }
 563
 564
 565 /* Currently no tracking performed of input/output/register size or
 566  * active elements.  Could be used to reduce these operations, as
 567  * could the matrix type.
 568  */
 569 static void emit_matrix_transform_vec4( struct tnl_program *p,
 570                                         struct ureg dest,
 571                                         const struct ureg *mat,
 572                                         struct ureg src)
 573 {
 574    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
 575    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
 576    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
 577    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
 578 }
 579
 580 /* This version is much easier to implement if writemasks are not
 581  * supported natively on the target or (like SSE), the target doesn't
 582  * have a clean/obvious dotproduct implementation.
 583  */
 584 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
 585                                                   struct ureg dest,
 586                                                   const struct ureg *mat,
 587                                                   struct ureg src)
 588 {
 589    struct ureg tmp;
 590
 591    if (dest.file != PROGRAM_TEMPORARY)
 592       tmp = get_temp(p);
 593    else
 594       tmp = dest;
 595
 596    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
 597    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
 598    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
 599    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
 600
 601    if (dest.file != PROGRAM_TEMPORARY)
 602       release_temp(p, tmp);
 603 }
 604
 605 static void emit_matrix_transform_vec3( struct tnl_program *p,
 606                                         struct ureg dest,
 607                                         const struct ureg *mat,
 608                                         struct ureg src)
 609 {
 610    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
 611    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
 612    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
 613 }
 614
 615
 616 static void emit_normalize_vec3( struct tnl_program *p,
 617                                  struct ureg dest,
 618                                  struct ureg src )
 619 {
 620    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_W, src, src);
 621    emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_W, swizzle1(dest,W));
 622    emit_op2(p, OPCODE_MUL, dest, WRITEMASK_XYZ, src, swizzle1(dest,W));
 623 }
 624
 625 static void emit_passthrough( struct tnl_program *p,
 626                               GLuint input,
 627                               GLuint output )
 628 {
 629    struct ureg out = register_output(p, output);
 630    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
 631 }
 632
 633 static struct ureg get_eye_position( struct tnl_program *p )
 634 {
 635    if (is_undef(p->eye_position)) {
 636       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 637       struct ureg modelview[4];
 638
 639       p->eye_position = reserve_temp(p);
 640
 641       if (PREFER_DP4) {
 642          register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3,
 643                                  STATE_MATRIX, modelview );
 644
 645          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 646       }
 647       else {
 648          register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3,
 649                                  STATE_MATRIX_TRANSPOSE, modelview );
 650
 651          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 652       }
 653    }
 654
 655    return p->eye_position;
 656 }
 657
 658
 659 #if 0
 660 static struct ureg get_eye_z( struct tnl_program *p )
 661 {
 662    if (!is_undef(p->eye_position)) {
 663       return swizzle1(p->eye_position, Z);
 664    }
 665    else if (!is_undef(p->eye_z)) {
 666       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 667       struct ureg modelview2;
 668
 669       p->eye_z = reserve_temp(p);
 670
 671       register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 2, 1,
 672                               STATE_MATRIX, &modelview2 );
 673
 674       emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
 675       emit_op2(p, OPCODE_DP4, p->eye_z, WRITEMASK_Z, pos, modelview2);
 676    }
 677
 678    return swizzle1(p->eye_z, Z)
 679 }
 680 #endif
 681
 682
 683
 684 static struct ureg get_eye_position_normalized( struct tnl_program *p )
 685 {
 686    if (is_undef(p->eye_position_normalized)) {
 687       struct ureg eye = get_eye_position(p);
 688       p->eye_position_normalized = reserve_temp(p);
 689       emit_normalize_vec3(p, p->eye_position_normalized, eye);
 690    }
 691
 692    return p->eye_position_normalized;
 693 }
 694
 695
 696 static struct ureg get_eye_normal( struct tnl_program *p )
 697 {
 698    if (is_undef(p->eye_normal)) {
 699       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
 700       struct ureg mvinv[3];
 701
 702       register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2,
 703                               STATE_MATRIX_INVTRANS, mvinv );
 704
 705       p->eye_normal = reserve_temp(p);
 706
 707       /* Transform to eye space:
 708        */
 709       emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
 710
 711       /* Normalize/Rescale:
 712        */
 713       if (p->state->normalize) {
 714          emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
 715       }
 716       else if (p->state->rescale_normals) {
 717          struct ureg rescale = register_param2(p, STATE_INTERNAL,
 718                                                STATE_NORMAL_SCALE);
 719
 720          emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
 721                    swizzle1(rescale, X));
 722       }
 723    }
 724
 725    return p->eye_normal;
 726 }
 727
 728
 729
 730 static void build_hpos( struct tnl_program *p )
 731 {
 732    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
 733    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
 734    struct ureg mvp[4];
 735
 736    if (PREFER_DP4) {
 737       register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3,
 738                               STATE_MATRIX, mvp );
 739       emit_matrix_transform_vec4( p, hpos, mvp, pos );
 740    }
 741    else {
 742       register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3,
 743                               STATE_MATRIX_TRANSPOSE, mvp );
 744       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
 745    }
 746 }
 747
 748
 749 static GLuint material_attrib( GLuint side, GLuint property )
 750 {
 751    return (property - STATE_AMBIENT) * 2 + side;
 752 }
 753
 754 /* Get a bitmask of which material values vary on a per-vertex basis.
 755  */
 756 static void set_material_flags( struct tnl_program *p )
 757 {
 758    p->color_materials = 0;
 759    p->materials = 0;
 760
 761    if (p->state->light_color_material) {
 762       p->materials =
 763          p->color_materials = p->state->light_color_material_mask;
 764    }
 765
 766    p->materials |= p->state->light_material_mask;
 767 }
 768
 769
 770 static struct ureg get_material( struct tnl_program *p, GLuint side,
 771                                  GLuint property )
 772 {
 773    GLuint attrib = material_attrib(side, property);
 774
 775    if (p->color_materials & (1<<attrib))
 776       return register_input(p, VERT_ATTRIB_COLOR0);
 777    else if (p->materials & (1<<attrib))
 778       return register_input( p, attrib + BRW_ATTRIB_MAT_FRONT_AMBIENT );
 779    else
 780       return register_param3( p, STATE_MATERIAL, side, property );
 781 }
 782
 783 #define SCENE_COLOR_BITS(side) ((MAT_BIT_FRONT_EMISSION | \
 784                                  MAT_BIT_FRONT_AMBIENT | \
 785                                  MAT_BIT_FRONT_DIFFUSE) << (side))
 786
 787 /* Either return a precalculated constant value or emit code to
 788  * calculate these values dynamically in the case where material calls
 789  * are present between begin/end pairs.
 790  *
 791  * Probably want to shift this to the program compilation phase - if
 792  * we always emitted the calculation here, a smart compiler could
 793  * detect that it was constant (given a certain set of inputs), and
 794  * lift it out of the main loop.  That way the programs created here
 795  * would be independent of the vertex_buffer details.
 796  */
 797 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
 798 {
 799    if (p->materials & SCENE_COLOR_BITS(side)) {
 800       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
 801       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
 802       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
 803       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
 804       struct ureg tmp = make_temp(p, material_diffuse);
 805       emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient,
 806                material_ambient, material_emission);
 807       return tmp;
 808    }
 809    else
 810       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
 811 }
 812
 813
 814 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
 815                                   GLuint side, GLuint property )
 816 {
 817    GLuint attrib = material_attrib(side, property);
 818    if (p->materials & (1<<attrib)) {
 819       struct ureg light_value =
 820          register_param3(p, STATE_LIGHT, light, property);
 821       struct ureg material_value = get_material(p, side, property);
 822       struct ureg tmp = get_temp(p);
 823       emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
 824       return tmp;
 825    }
 826    else
 827       return register_param4(p, STATE_LIGHTPROD, light, side, property);
 828 }
 829
 830 static struct ureg calculate_light_attenuation( struct tnl_program *p,
 831                                                 GLuint i,
 832                                                 struct ureg VPpli,
 833                                                 struct ureg dist )
 834 {
 835    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
 836                                              STATE_ATTENUATION);
 837    struct ureg att = get_temp(p);
 838
 839    /* Calculate spot attenuation:
 840     */
 841    if (!p->state->unit[i].light_spotcutoff_is_180) {
 842       struct ureg spot_dir = register_param3(p, STATE_LIGHT, i,
 843                                              STATE_SPOT_DIRECTION);
 844       struct ureg spot = get_temp(p);
 845       struct ureg slt = get_temp(p);
 846
 847       emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */
 848       emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot);
 849       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot);
 850       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
 851       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 852
 853       release_temp(p, spot);
 854       release_temp(p, slt);
 855    }
 856
 857    /* Calculate distance attenuation:
 858     */
 859    if (p->state->unit[i].light_attenuated) {
 860
 861       /* 1/d,d,d,1/d */
 862       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
 863       /* 1,d,d*d,1/d */
 864       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
 865       /* 1/dist-atten */
 866       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
 867
 868       if (!p->state->unit[i].light_spotcutoff_is_180) {
 869          /* dist-atten */
 870          emit_op1(p, OPCODE_RCP, dist, 0, dist);
 871          /* spot-atten * dist-atten */
 872          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
 873       } else {
 874          /* dist-atten */
 875          emit_op1(p, OPCODE_RCP, att, 0, dist);
 876       }
 877    }
 878
 879    return att;
 880 }
 881
 882
 883
 884
 885
 886 /* Need to add some addtional parameters to allow lighting in object
 887  * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
 888  * space lighting.
 889  */
 890 static void build_lighting( struct tnl_program *p )
 891 {
 892    const GLboolean twoside = p->state->light_twoside;
 893    const GLboolean separate = p->state->separate_specular;
 894    GLuint nr_lights = 0, count = 0;
 895    struct ureg normal = get_eye_normal(p);
 896    struct ureg lit = get_temp(p);
 897    struct ureg dots = get_temp(p);
 898    struct ureg _col0 = undef, _col1 = undef;
 899    struct ureg _bfc0 = undef, _bfc1 = undef;
 900    GLuint i;
 901
 902    for (i = 0; i < MAX_LIGHTS; i++)
 903       if (p->state->unit[i].light_enabled)
 904          nr_lights++;
 905
 906    set_material_flags(p);
 907
 908    {
 909       struct ureg shininess = get_material(p, 0, STATE_SHININESS);
 910       emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
 911       release_temp(p, shininess);
 912
 913       _col0 = make_temp(p, get_scenecolor(p, 0));
 914       if (separate)
 915          _col1 = make_temp(p, get_identity_param(p));
 916       else
 917          _col1 = _col0;
 918
 919    }
 920
 921    if (twoside) {
 922       struct ureg shininess = get_material(p, 1, STATE_SHININESS);
 923       emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
 924                ureg_negate(swizzle1(shininess,X)));
 925       release_temp(p, shininess);
 926
 927       _bfc0 = make_temp(p, get_scenecolor(p, 1));
 928       if (separate)
 929          _bfc1 = make_temp(p, get_identity_param(p));
 930       else
 931          _bfc1 = _bfc0;
 932    }
 933
 934
 935    /* If no lights, still need to emit the scenecolor.
 936     */
 937    /* KW: changed to do this always - v1.17 "Fix lighting alpha result"?
 938     */
 939    if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
 940    {
 941       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
 942       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
 943
 944       if (twoside) {
 945          struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
 946          emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
 947       }
 948    }
 949
 950    if (separate && (p->state->fragprog_inputs_read & FRAG_BIT_COL1)) {
 951
 952       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
 953       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
 954
 955       if (twoside) {
 956          struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
 957          emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
 958       }
 959    }
 960
 961    if (nr_lights == 0) {
 962       release_temps(p);
 963       return;
 964    }
 965
 966
 967    for (i = 0; i < MAX_LIGHTS; i++) {
 968       if (p->state->unit[i].light_enabled) {
 969          struct ureg half = undef;
 970          struct ureg att = undef, VPpli = undef;
 971
 972          count++;
 973
 974          if (p->state->unit[i].light_eyepos3_is_zero) {
 975             /* Can used precomputed constants in this case.
 976              * Attenuation never applies to infinite lights.
 977              */
 978             VPpli = register_param3(p, STATE_LIGHT, i,
 979                                     STATE_POSITION_NORMALIZED);
 980             half = register_param3(p, STATE_LIGHT, i, STATE_HALF);
 981          }
 982          else {
 983             struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
 984                                                STATE_POSITION);
 985             struct ureg V = get_eye_position(p);
 986             struct ureg dist = get_temp(p);
 987
 988             VPpli = get_temp(p);
 989             half = get_temp(p);
 990
 991             /* Calulate VPpli vector
 992              */
 993             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
 994
 995             /* Normalize VPpli.  The dist value also used in
 996              * attenuation below.
 997              */
 998             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
 999             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1000             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1001
1002
1003             /* Calculate  attenuation:
1004              */
1005             if (!p->state->unit[i].light_spotcutoff_is_180 ||
1006                 p->state->unit[i].light_attenuated) {
1007                att = calculate_light_attenuation(p, i, VPpli, dist);
1008             }
1009
1010
1011             /* Calculate viewer direction, or use infinite viewer:
1012              */
1013             if (p->state->light_local_viewer) {
1014                struct ureg eye_hat = get_eye_position_normalized(p);
1015                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1016             }
1017             else {
1018                struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1019                emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1020             }
1021
1022             emit_normalize_vec3(p, half, half);
1023
1024             release_temp(p, dist);
1025          }
1026
1027          /* Calculate dot products:
1028           */
1029          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1030          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1031
1032
1033          /* Front face lighting:
1034           */
1035          {
1036             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1037             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1038             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1039             struct ureg res0, res1;
1040             GLuint mask0, mask1;
1041
1042             emit_op1(p, OPCODE_LIT, lit, 0, dots);
1043
1044             if (!is_undef(att))
1045                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1046
1047
1048             mask0 = 0;
1049             mask1 = 0;
1050             res0 = _col0;
1051             res1 = _col1;
1052
1053             if (count == nr_lights) {
1054                if (separate) {
1055                   mask0 = WRITEMASK_XYZ;
1056                   mask1 = WRITEMASK_XYZ;
1057
1058                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1059                      res0 = register_output( p, VERT_RESULT_COL0 );
1060
1061                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1062                      res1 = register_output( p, VERT_RESULT_COL1 );
1063                }
1064                else {
1065                   mask1 = WRITEMASK_XYZ;
1066
1067                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1068                      res1 = register_output( p, VERT_RESULT_COL0 );
1069                }
1070             }
1071
1072             emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1073             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1074             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1075
1076             release_temp(p, ambient);
1077             release_temp(p, diffuse);
1078             release_temp(p, specular);
1079          }
1080
1081          /* Back face lighting:
1082           */
1083          if (twoside) {
1084             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1085             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1086             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1087             struct ureg res0, res1;
1088             GLuint mask0, mask1;
1089
1090             emit_op1(p, OPCODE_LIT, lit, 0, ureg_negate(swizzle(dots,X,Y,W,Z)));
1091
1092             if (!is_undef(att))
1093                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1094
1095             mask0 = 0;
1096             mask1 = 0;
1097             res0 = _bfc0;
1098             res1 = _bfc1;
1099
1100             if (count == nr_lights) {
1101                if (separate) {
1102                   mask0 = WRITEMASK_XYZ;
1103                   mask1 = WRITEMASK_XYZ;
1104                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1105                      res0 = register_output( p, VERT_RESULT_BFC0 );
1106
1107                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1108                      res1 = register_output( p, VERT_RESULT_BFC1 );
1109                }
1110                else {
1111                   mask1 = WRITEMASK_XYZ;
1112
1113                   if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1114                      res1 = register_output( p, VERT_RESULT_BFC0 );
1115                }
1116             }
1117
1118             emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1119             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1120             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1121
1122             release_temp(p, ambient);
1123             release_temp(p, diffuse);
1124             release_temp(p, specular);
1125          }
1126
1127          release_temp(p, half);
1128          release_temp(p, VPpli);
1129          release_temp(p, att);
1130       }
1131    }
1132
1133    release_temps( p );
1134 }
1135
1136
1137 static void build_fog( struct tnl_program *p )
1138 {
1139    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1140    struct ureg input;
1141
1142    if (p->state->fog_source_is_depth) {
1143       input = swizzle1(get_eye_position(p), Z);
1144    }
1145    else {
1146       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1147    }
1148
1149    if (p->state->fog_option &&
1150        p->state->tnl_do_vertex_fog) {
1151       struct ureg params = register_param1(p, STATE_FOG_PARAMS);
1152       struct ureg tmp = get_temp(p);
1153       struct ureg id = get_identity_param(p);
1154
1155       emit_op1(p, OPCODE_MOV, fog, 0, id);
1156
1157       switch (p->state->fog_option) {
1158       case FOG_LINEAR: {
1159          emit_op1(p, OPCODE_ABS, tmp, 0, input);
1160          emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), tmp);
1161          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W));
1162          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
1163          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
1164          break;
1165       }
1166       case FOG_EXP:
1167          emit_op1(p, OPCODE_ABS, tmp, 0, input);
1168          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X));
1169          emit_op2(p, OPCODE_POW, fog, WRITEMASK_X,
1170                   register_const1f(p, M_E), ureg_negate(tmp));
1171          break;
1172       case FOG_EXP2:
1173          emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X));
1174          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
1175          emit_op2(p, OPCODE_POW, fog, WRITEMASK_X,
1176                   register_const1f(p, M_E), ureg_negate(tmp));
1177          break;
1178       }
1179
1180       release_temp(p, tmp);
1181    }
1182    else {
1183       /* results = incoming fog coords (compute fog per-fragment later)
1184        *
1185        * KW:  Is it really necessary to do anything in this case?
1186        */
1187       emit_op1(p, OPCODE_MOV, fog, 0, input);
1188    }
1189 }
1190
1191 static void build_reflect_texgen( struct tnl_program *p,
1192                                   struct ureg dest,
1193                                   GLuint writemask )
1194 {
1195    struct ureg normal = get_eye_normal(p);
1196    struct ureg eye_hat = get_eye_position_normalized(p);
1197    struct ureg tmp = get_temp(p);
1198
1199    /* n.u */
1200    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1201    /* 2n.u */
1202    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1203    /* (-2n.u)n + u */
1204    emit_op3(p, OPCODE_MAD, dest, writemask, ureg_negate(tmp), normal, eye_hat);
1205
1206    release_temp(p, tmp);
1207 }
1208
1209 static void build_sphere_texgen( struct tnl_program *p,
1210                                  struct ureg dest,
1211                                  GLuint writemask )
1212 {
1213    struct ureg normal = get_eye_normal(p);
1214    struct ureg eye_hat = get_eye_position_normalized(p);
1215    struct ureg tmp = get_temp(p);
1216    struct ureg half = register_scalar_const(p, .5);
1217    struct ureg r = get_temp(p);
1218    struct ureg inv_m = get_temp(p);
1219    struct ureg id = get_identity_param(p);
1220
1221    /* Could share the above calculations, but it would be
1222     * a fairly odd state for someone to set (both sphere and
1223     * reflection active for different texture coordinate
1224     * components.  Of course - if two texture units enable
1225     * reflect and/or sphere, things start to tilt in favour
1226     * of seperating this out:
1227     */
1228
1229    /* n.u */
1230    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1231    /* 2n.u */
1232    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1233    /* (-2n.u)n + u */
1234    emit_op3(p, OPCODE_MAD, r, 0, ureg_negate(tmp), normal, eye_hat);
1235    /* r + 0,0,1 */
1236    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1237    /* rx^2 + ry^2 + (rz+1)^2 */
1238    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1239    /* 2/m */
1240    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1241    /* 1/m */
1242    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1243    /* r/m + 1/2 */
1244    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1245
1246    release_temp(p, tmp);
1247    release_temp(p, r);
1248    release_temp(p, inv_m);
1249 }
1250
1251
1252 static void build_texture_transform( struct tnl_program *p )
1253 {
1254    GLuint i, j;
1255
1256    for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
1257
1258       if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i)))
1259          continue;
1260
1261       if (p->state->unit[i].texgen_enabled ||
1262           p->state->unit[i].texmat_enabled) {
1263
1264          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1265          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1266          struct ureg out_texgen = undef;
1267
1268          if (p->state->unit[i].texgen_enabled) {
1269             GLuint copy_mask = 0;
1270             GLuint sphere_mask = 0;
1271             GLuint reflect_mask = 0;
1272             GLuint normal_mask = 0;
1273             GLuint modes[4];
1274
1275             if (texmat_enabled)
1276                out_texgen = get_temp(p);
1277             else
1278                out_texgen = out;
1279
1280             modes[0] = p->state->unit[i].texgen_mode0;
1281             modes[1] = p->state->unit[i].texgen_mode1;
1282             modes[2] = p->state->unit[i].texgen_mode2;
1283             modes[3] = p->state->unit[i].texgen_mode3;
1284
1285             for (j = 0; j < 4; j++) {
1286                switch (modes[j]) {
1287                case TXG_OBJ_LINEAR: {
1288                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1289                   struct ureg plane =
1290                      register_param3(p, STATE_TEXGEN, i,
1291                                      STATE_TEXGEN_OBJECT_S + j);
1292
1293                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1294                            obj, plane );
1295                   break;
1296                }
1297                case TXG_EYE_LINEAR: {
1298                   struct ureg eye = get_eye_position(p);
1299                   struct ureg plane =
1300                      register_param3(p, STATE_TEXGEN, i,
1301                                      STATE_TEXGEN_EYE_S + j);
1302
1303                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1304                            eye, plane );
1305                   break;
1306                }
1307                case TXG_SPHERE_MAP:
1308                   sphere_mask |= WRITEMASK_X << j;
1309                   break;
1310                case TXG_REFLECTION_MAP:
1311                   reflect_mask |= WRITEMASK_X << j;
1312                   break;
1313                case TXG_NORMAL_MAP:
1314                   normal_mask |= WRITEMASK_X << j;
1315                   break;
1316                case TXG_NONE:
1317                   copy_mask |= WRITEMASK_X << j;
1318                }
1319
1320             }
1321
1322
1323             if (sphere_mask) {
1324                build_sphere_texgen(p, out_texgen, sphere_mask);
1325             }
1326
1327             if (reflect_mask) {
1328                build_reflect_texgen(p, out_texgen, reflect_mask);
1329             }
1330
1331             if (normal_mask) {
1332                struct ureg normal = get_eye_normal(p);
1333                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1334             }
1335
1336             if (copy_mask) {
1337                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1338                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1339             }
1340          }
1341
1342          if (texmat_enabled) {
1343             struct ureg texmat[4];
1344             struct ureg in = (!is_undef(out_texgen) ?
1345                               out_texgen :
1346                               register_input(p, VERT_ATTRIB_TEX0+i));
1347             if (PREFER_DP4) {
1348                register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i,
1349                                        0, 3, STATE_MATRIX, texmat );
1350                emit_matrix_transform_vec4( p, out, texmat, in );
1351             }
1352             else {
1353                register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i,
1354                                        0, 3, STATE_MATRIX_TRANSPOSE, texmat );
1355                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1356             }
1357          }
1358
1359          release_temps(p);
1360       }
1361       else {
1362          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1363       }
1364    }
1365 }
1366
1367
1368 /* Seems like it could be tighter:
1369  */
1370 static void build_pointsize( struct tnl_program *p )
1371 {
1372    struct ureg eye = get_eye_position(p);
1373    struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1374    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1375    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1376    struct ureg ut = get_temp(p);
1377
1378    /* 1, -Z, Z * Z, 1 */
1379    emit_op1(p, OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W));
1380    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_YZ, ut, ureg_negate(swizzle1(eye, Z)));
1381    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, ureg_negate(swizzle1(eye, Z)));
1382
1383
1384    /* p1 +  p2 * dist + p3 * dist * dist, 0 */
1385    emit_op2(p, OPCODE_DP3, ut, 0, ut, state_attenuation);
1386
1387    /* 1 / factor */
1388    emit_op1(p, OPCODE_RCP, ut, 0, ut );
1389
1390    /* out = pointSize / factor */
1391    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1392
1393    release_temp(p, ut);
1394 }
1395
1396 static void build_tnl_program( struct tnl_program *p )
1397 {
1398    /* Emit the program, starting with modelviewproject:
1399     */
1400    build_hpos(p);
1401
1402    /* Lighting calculations:
1403     */
1404    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1405       if (p->state->light_global_enabled)
1406          build_lighting(p);
1407       else {
1408          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1409             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1410
1411          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1412             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1413       }
1414    }
1415
1416    if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
1417        p->state->fog_option != FOG_NONE)
1418       build_fog(p);
1419
1420    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1421       build_texture_transform(p);
1422
1423    if (p->state->point_attenuated)
1424       build_pointsize(p);
1425
1426    /* Finish up:
1427     */
1428    emit_op1(p, OPCODE_END, undef, 0, undef);
1429
1430    /* Disassemble:
1431     */
1432    if (DISASSEM) {
1433       _mesa_printf ("\n");
1434    }
1435 }
1436
1437
1438 static void build_new_tnl_program( const struct state_key *key,
1439                                    struct gl_vertex_program *program,
1440                                    GLuint max_temps)
1441 {
1442    struct tnl_program p;
1443
1444    _mesa_memset(&p, 0, sizeof(p));
1445    p.state = key;
1446    p.program = program;
1447    p.eye_position = undef;
1448    p.eye_position_normalized = undef;
1449    p.eye_normal = undef;
1450    p.identity = undef;
1451    p.temp_in_use = 0;
1452
1453    if (max_temps >= sizeof(int) * 8)
1454       p.temp_reserved = 0;
1455    else
1456       p.temp_reserved = ~((1<<max_temps)-1);
1457
1458    p.program->Base.Instructions = MALLOC(sizeof(struct prog_instruction) * BRW_TNL_MAX_INSN);
1459    p.program->Base.String = 0;
1460    p.program->Base.NumInstructions =
1461    p.program->Base.NumTemporaries =
1462    p.program->Base.NumParameters =
1463    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1464    p.program->Base.Parameters = _mesa_new_parameter_list();
1465    p.program->Base.InputsRead = 0;
1466    p.program->Base.OutputsWritten = 0;
1467
1468    build_tnl_program( &p );
1469 }
1470
1471 static void *search_cache( struct brw_tnl_cache *cache,
1472                            GLuint hash,
1473                            const void *key,
1474                            GLuint keysize)
1475 {
1476    struct brw_tnl_cache_item *c;
1477
1478    for (c = cache->items[hash % cache->size]; c; c = c->next) {
1479       if (c->hash == hash && memcmp(c->key, key, keysize) == 0)
1480          return c->data;
1481    }
1482
1483    return NULL;
1484 }
1485
1486 static void rehash( struct brw_tnl_cache *cache )
1487 {
1488    struct brw_tnl_cache_item **items;
1489    struct brw_tnl_cache_item *c, *next;
1490    GLuint size, i;
1491
1492    size = cache->size * 3;
1493    items = (struct brw_tnl_cache_item**) _mesa_malloc(size * sizeof(*items));
1494    _mesa_memset(items, 0, size * sizeof(*items));
1495
1496    for (i = 0; i < cache->size; i++)
1497       for (c = cache->items[i]; c; c = next) {
1498          next = c->next;
1499          c->next = items[c->hash % size];
1500          items[c->hash % size] = c;
1501       }
1502
1503    FREE(cache->items);
1504    cache->items = items;
1505    cache->size = size;
1506 }
1507
1508 static void cache_item( struct brw_tnl_cache *cache,
1509                         GLuint hash,
1510                         const struct state_key *key,
1511                         void *data )
1512 {
1513    struct brw_tnl_cache_item *c = MALLOC(sizeof(*c));
1514    c->hash = hash;
1515
1516    c->key = malloc(sizeof(*key));
1517    memcpy(c->key, key, sizeof(*key));
1518
1519    c->data = data;
1520
1521    if (++cache->n_items > cache->size * 1.5)
1522       rehash(cache);
1523
1524    c->next = cache->items[hash % cache->size];
1525    cache->items[hash % cache->size] = c;
1526 }
1527
1528
1529 static GLuint hash_key( struct state_key *key )
1530 {
1531    GLuint *ikey = (GLuint *)key;
1532    GLuint hash = 0, i;
1533
1534    /* I'm sure this can be improved on, but speed is important:
1535     */
1536    for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++)
1537       hash += ikey[i];
1538
1539    return hash;
1540 }
1541
1542 static void update_tnl_program( struct brw_context *brw )
1543 {
1544    GLcontext *ctx = &brw->intel.ctx;
1545    struct state_key key;
1546    GLuint hash;
1547    struct gl_vertex_program *old = brw->tnl_program;
1548
1549    /* _NEW_PROGRAM */
1550    if (brw->attribs.VertexProgram->_Enabled)
1551       return;
1552
1553    /* Grab all the relevent state and put it in a single structure:
1554     */
1555    make_state_key(ctx, &key);
1556    hash = hash_key(&key);
1557
1558    /* Look for an already-prepared program for this state:
1559     */
1560    brw->tnl_program = (struct gl_vertex_program *)
1561       search_cache( &brw->tnl_program_cache, hash, &key, sizeof(key) );
1562
1563    /* OK, we'll have to build a new one:
1564     */
1565    if (!brw->tnl_program) {
1566       brw->tnl_program = (struct gl_vertex_program *)
1567          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1568
1569       build_new_tnl_program( &key, brw->tnl_program,
1570 /*                           ctx->Const.MaxVertexProgramTemps  */
1571                              32
1572          );
1573
1574       if (ctx->Driver.ProgramStringNotify)
1575          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1576                                           &brw->tnl_program->Base );
1577
1578       cache_item( &brw->tnl_program_cache,
1579                   hash, &key, brw->tnl_program );
1580    }
1581
1582    if (old != brw->tnl_program)
1583       brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM;
1584 }
1585
1586 /* Note: See brw_draw.c - the vertex program must not rely on
1587  * brw->primitive or brw->reduced_prim.
1588  */
1589 const struct brw_tracked_state brw_tnl_vertprog = {
1590    .dirty = {
1591       .mesa = (_NEW_PROGRAM |
1592                _NEW_LIGHT |
1593                _NEW_TRANSFORM |
1594                _NEW_FOG |
1595                _NEW_HINT |
1596                _NEW_POINT |
1597                _NEW_TEXTURE),
1598       .brw = (BRW_NEW_FRAGMENT_PROGRAM |
1599               BRW_NEW_INPUT_VARYING),
1600       .cache = 0
1601    },
1602    .update = update_tnl_program
1603 };
1604
1605
1606
1607
1608 static void update_active_vertprog( struct brw_context *brw )
1609 {
1610    struct gl_vertex_program *prev = brw->vertex_program;
1611
1612    /* NEW_PROGRAM */
1613    if (brw->attribs.VertexProgram->_Enabled) {
1614       brw->vertex_program = brw->attribs.VertexProgram->Current;
1615    }
1616    else {
1617       /* BRW_NEW_TNL_PROGRAM */
1618       brw->vertex_program = brw->tnl_program;
1619    }
1620
1621    if (brw->vertex_program != prev)
1622       brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
1623 }
1624
1625
1626
1627 const struct brw_tracked_state brw_active_vertprog = {
1628    .dirty = {
1629       .mesa = _NEW_PROGRAM,
1630       .brw = BRW_NEW_TNL_PROGRAM,
1631       .cache = 0
1632    },
1633    .update = update_active_vertprog
1634 };
1635
1636
1637 void brw_ProgramCacheInit( GLcontext *ctx )
1638 {
1639    struct brw_context *brw = brw_context(ctx);
1640
1641    brw->tnl_program_cache.size = 17;
1642    brw->tnl_program_cache.n_items = 0;
1643    brw->tnl_program_cache.items = (struct brw_tnl_cache_item **)
1644       _mesa_calloc(brw->tnl_program_cache.size *
1645                    sizeof(struct brw_tnl_cache_item));
1646 }
1647
1648 void brw_ProgramCacheDestroy( GLcontext *ctx )
1649 {
1650    struct brw_context *brw = brw_context(ctx);
1651    struct brw_tnl_cache_item *c, *next;
1652    GLuint i;
1653
1654    for (i = 0; i < brw->tnl_program_cache.size; i++)
1655       for (c = brw->tnl_program_cache.items[i]; c; c = next) {
1656          next = c->next;
1657          FREE(c->key);
1658          FREE(c->data);
1659          FREE(c);
1660       }
1661
1662    FREE(brw->tnl_program_cache.items);
1663 }