src/mesa/tnl/t_vb_arbprogram.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.3
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file t_arb_program.c
  27  * Compile vertex programs to an intermediate representation.
  28  * Execute vertex programs over a buffer of vertices.
  29  * \author Keith Whitwell, Brian Paul
  30  */
  31
  32 #include "glheader.h"
  33 #include "context.h"
  34 #include "imports.h"
  35 #include "macros.h"
  36 #include "mtypes.h"
  37 #include "arbprogparse.h"
  38 #include "program.h"
  39 #include "math/m_matrix.h"
  40 #include "math/m_translate.h"
  41 #include "t_context.h"
  42 #include "t_pipeline.h"
  43
  44
  45
  46
  47 /* New, internal instructions:
  48  */
  49 #define IN1        (VP_OPCODE_XPD+1)
  50 #define IN2        (IN1+1)      /* intput-to-reg MOV */
  51 #define IN3        (IN1+2)
  52 #define IN4        (IN1+3)
  53 #define OUT        (IN1+4)      /* reg-to-output MOV */
  54 #define OUM        (IN1+5)      /* reg-to-output MOV with mask */
  55 #define RSW        (IN1+6)
  56 #define MSK        (IN1+7)      /* reg-to-reg MOV with mask */
  57 #define PAR        (IN1+8)      /* parameter-to-reg MOV */
  58 #define PRL        (IN1+9)      /* parameter-to-reg MOV */
  59
  60
  61 /* Layout of register file:
  62
  63   0 -- Scratch (Arg0)
  64   1 -- Scratch (Arg1)
  65   2 -- Scratch (Arg2)
  66   3 -- Scratch (Result)
  67   4 -- Program Temporary 0
  68   ..
  69   31 -- Program Temporary 27
  70   32 -- State/Input/Const shadow 0
  71   ..
  72   63 -- State/Input/Const shadow 31
  73
  74 */
  75
  76
  77
  78 #define REG_ARG0  0
  79 #define REG_ARG1  1
  80 #define REG_ARG2  2
  81 #define REG_RES   3
  82 #define REG_TMP0  4
  83 #define REG_TMP_MAX 32
  84 #define REG_TMP_NR (REG_TMP_MAX-REG_TMP0)
  85 #define REG_PAR0  32
  86 #define REG_PAR_MAX 64
  87 #define REG_PAR_NR (REG_PAR_MAX-REG_PAR0)
  88
  89 #define REG_MAX 64
  90 #define REG_SWZDST_MAX 16
  91
  92 /* ARB_vp instructions are broken down into one or more of the
  93  * following micro-instructions, each representable in a 32 bit packed
  94  * structure.
  95  */
  96
  97
  98 union instruction {
  99    struct {
 100       GLuint opcode:6;
 101       GLuint dst:5;
 102       GLuint arg0:6;
 103       GLuint arg1:6;
 104       GLuint elt:2;             /* x,y,z or w */
 105       GLuint pad:7;
 106    } scl;
 107
 108
 109    struct {
 110       GLuint opcode:6;
 111       GLuint dst:5;
 112       GLuint arg0:6;
 113       GLuint arg1:6;
 114       GLuint arg2:6;
 115       GLuint pad:3;
 116    } vec;
 117
 118    struct {
 119       GLuint opcode:6;
 120       GLuint dst:4;             /* NOTE!  REG 0..16 only! */
 121       GLuint arg0:6;
 122       GLuint neg:4;
 123       GLuint swz:12;
 124    } swz;
 125
 126    struct {
 127       GLuint opcode:6;
 128       GLuint dst:6;
 129       GLuint arg0:6;
 130       GLuint neg:1;             /* 1 bit only */
 131       GLuint swz:8;             /* xyzw only */
 132       GLuint pad:5;
 133    } rsw;
 134
 135    struct {
 136       GLuint opcode:6;
 137       GLuint reg:6;
 138       GLuint file:5;
 139       GLuint idx:8;             /* plenty? */
 140       GLuint rel:1;
 141       GLuint pad:6;
 142    } inr;
 143
 144
 145    struct {
 146       GLuint opcode:6;
 147       GLuint reg:6;
 148       GLuint file:5;
 149       GLuint idx:8;             /* plenty? */
 150       GLuint mask:4;
 151       GLuint pad:3;
 152    } out;
 153
 154    struct {
 155       GLuint opcode:6;
 156       GLuint dst:5;
 157       GLuint arg0:6;
 158       GLuint mask:4;
 159       GLuint pad:11;
 160    } msk;
 161
 162    GLuint dword;
 163 };
 164
 165
 166
 167 struct compilation {
 168    struct {
 169       GLuint file:5;
 170       GLuint idx:8;
 171    } reg[REG_PAR_NR];
 172
 173    GLuint par_active;
 174    GLuint par_protected;
 175    GLuint tmp_active;
 176
 177    union instruction *csr;
 178
 179    struct vertex_buffer *VB;    /* for input sizes! */
 180 };
 181
 182 /*--------------------------------------------------------------------------- */
 183
 184 /*!
 185  * Private storage for the vertex program pipeline stage.
 186  */
 187 struct arb_vp_machine {
 188    GLfloat reg[REG_MAX][4];     /* Program temporaries, shadowed parameters and inputs,
 189                                    plus some internal values */
 190
 191    GLfloat (*File[8])[4];       /* Src/Dest for PAR/PRL instructions. */
 192    GLint AddressReg;
 193
 194    union instruction store[1024];
 195 /*    GLuint store_size; */
 196
 197    union instruction *instructions;
 198    GLint nr_instructions;
 199
 200    GLvector4f attribs[VERT_RESULT_MAX]; /**< result vectors. */
 201    GLvector4f ndcCoords;              /**< normalized device coords */
 202    GLubyte *clipmask;                 /**< clip flags */
 203    GLubyte ormask, andmask;           /**< for clipping */
 204
 205    GLuint vtx_nr;               /**< loop counter */
 206
 207    struct vertex_buffer *VB;
 208    GLcontext *ctx;
 209 };
 210
 211
 212 /*--------------------------------------------------------------------------- */
 213
 214 struct opcode_info {
 215    GLuint type;
 216    GLuint nr_args;
 217    const char *string;
 218    void (*func)( struct arb_vp_machine *, union instruction );
 219    void (*print)( union instruction , const struct opcode_info * );
 220 };
 221
 222
 223 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
 224
 225
 226
 227 /**
 228  * Set x to positive or negative infinity.
 229  *
 230  * XXX: FIXME - type punning.
 231  */
 232 #if defined(USE_IEEE) || defined(_WIN32)
 233 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
 234 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
 235 #elif defined(VMS)
 236 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
 237 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
 238 #define IS_INF_OR_NAN(t)   ((t) == __MAXFLOAT)
 239 #else
 240 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
 241 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
 242 #endif
 243
 244 #define FREXPF(a,b) frexpf(a,b)
 245
 246 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
 247
 248 /* FIXME: more type punning (despite use of fi_type...)
 249  */
 250 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
 251
 252
 253 static GLfloat RoughApproxLog2(GLfloat t)
 254 {
 255    return LOG2(t);
 256 }
 257
 258 static GLfloat RoughApproxPow2(GLfloat t)
 259 {
 260    GLfloat q;
 261 #ifdef USE_IEEE
 262    GLint ii = (GLint) t;
 263    ii = (ii < 23) + 0x3f800000;
 264    SET_FLOAT_BITS(q, ii);
 265    q = *((GLfloat *) (void *)&ii);
 266 #else
 267    q = (GLfloat) pow(2.0, floor_t0);
 268 #endif
 269    return q;
 270 }
 271
 272 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
 273 {
 274 #if 0
 275    return (GLfloat) exp(y * log(x));
 276 #else
 277    return (GLfloat) _mesa_pow(x, y);
 278 #endif
 279 }
 280
 281
 282 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 283
 284
 285
 286
 287 /**
 288  * This is probably the least-optimal part of the process, have to
 289  * multiply out the stride to access each incoming input value.
 290  */
 291 static GLfloat *get_input( struct arb_vp_machine *m, GLuint index )
 292 {
 293    return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr);
 294 }
 295
 296
 297 /**
 298  * Fetch a 4-element float vector from the given source register.
 299  * Deal with the possibility that not all elements are present.
 300  */
 301 static void do_IN1( struct arb_vp_machine *m, union instruction op )
 302 {
 303    GLfloat *result = m->reg[op.inr.reg];
 304    const GLfloat *src = get_input(m, op.inr.idx);
 305
 306    result[0] = src[0];
 307    result[1] = 0;
 308    result[2] = 0;
 309    result[3] = 1;
 310 }
 311
 312 static void do_IN2( struct arb_vp_machine *m, union instruction op )
 313 {
 314    GLfloat *result = m->reg[op.inr.reg];
 315    const GLfloat *src = get_input(m, op.inr.idx);
 316
 317    result[0] = src[0];
 318    result[1] = src[1];
 319    result[2] = 0;
 320    result[3] = 1;
 321 }
 322
 323 static void do_IN3( struct arb_vp_machine *m, union instruction op )
 324 {
 325    GLfloat *result = m->reg[op.inr.reg];
 326    const GLfloat *src = get_input(m, op.inr.idx);
 327
 328    result[0] = src[0];
 329    result[1] = src[1];
 330    result[2] = src[2];
 331    result[3] = 1;
 332 }
 333
 334 static void do_IN4( struct arb_vp_machine *m, union instruction op )
 335 {
 336    GLfloat *result = m->reg[op.inr.reg];
 337    const GLfloat *src = get_input(m, op.inr.idx);
 338
 339    result[0] = src[0];
 340    result[1] = src[1];
 341    result[2] = src[2];
 342    result[3] = src[3];
 343 }
 344
 345 /**
 346  * Perform a reduced swizzle:
 347  */
 348 static void do_RSW( struct arb_vp_machine *m, union instruction op )
 349 {
 350    GLfloat *result = m->reg[op.rsw.dst];
 351    const GLfloat *arg0 = m->reg[op.rsw.arg0];
 352    GLuint swz = op.rsw.swz;
 353    GLuint neg = op.rsw.neg;
 354    GLuint i;
 355
 356    if (neg)
 357       for (i = 0; i < 4; i++, swz >>= 2)
 358          result[i] = -arg0[swz & 0x3];
 359    else
 360       for (i = 0; i < 4; i++, swz >>= 2)
 361          result[i] = arg0[swz & 0x3];
 362 }
 363
 364
 365
 366 /**
 367  * Store 4 floats into an external address.
 368  */
 369 static void do_OUM( struct arb_vp_machine *m, union instruction op )
 370 {
 371    GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
 372    const GLfloat *value = m->reg[op.out.reg];
 373
 374    if (op.out.mask & 0x1) dst[0] = value[0];
 375    if (op.out.mask & 0x2) dst[1] = value[1];
 376    if (op.out.mask & 0x4) dst[2] = value[2];
 377    if (op.out.mask & 0x8) dst[3] = value[3];
 378 }
 379
 380 static void do_OUT( struct arb_vp_machine *m, union instruction op )
 381 {
 382    GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
 383    const GLfloat *value = m->reg[op.out.reg];
 384
 385    dst[0] = value[0];
 386    dst[1] = value[1];
 387    dst[2] = value[2];
 388    dst[3] = value[3];
 389 }
 390
 391 /* Register-to-register MOV with writemask.
 392  */
 393 static void do_MSK( struct arb_vp_machine *m, union instruction op )
 394 {
 395    GLfloat *dst = m->reg[op.msk.dst];
 396    const GLfloat *arg0 = m->reg[op.msk.arg0];
 397
 398    if (op.msk.mask & 0x1) dst[0] = arg0[0];
 399    if (op.msk.mask & 0x2) dst[1] = arg0[1];
 400    if (op.msk.mask & 0x4) dst[2] = arg0[2];
 401    if (op.msk.mask & 0x8) dst[3] = arg0[3];
 402 }
 403
 404
 405 /* Retreive parameters and other constant values:
 406  */
 407 static void do_PAR( struct arb_vp_machine *m, union instruction op )
 408 {
 409    GLfloat *result = m->reg[op.inr.reg];
 410    const GLfloat *src = m->File[op.inr.file][op.inr.idx];
 411
 412    result[0] = src[0];
 413    result[1] = src[1];
 414    result[2] = src[2];
 415    result[3] = src[3];
 416 }
 417
 418
 419 #define RELADDR_MASK MAX_NV_VERTEX_PROGRAM_PARAMS
 420
 421 static void do_PRL( struct arb_vp_machine *m, union instruction op )
 422 {
 423    GLfloat *result = m->reg[op.inr.reg];
 424    GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK;
 425    const GLfloat *src = m->File[op.inr.file][index];
 426
 427    result[0] = src[0];
 428    result[1] = src[1];
 429    result[2] = src[2];
 430    result[3] = src[3];
 431 }
 432
 433 static void do_PRT( struct arb_vp_machine *m, union instruction op )
 434 {
 435    const GLfloat *arg0 = m->reg[op.vec.arg0];
 436
 437    _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
 438                 arg0[0], arg0[1], arg0[2], arg0[3]);
 439 }
 440
 441
 442 /**
 443  * The traditional ALU and texturing instructions.  All operate on
 444  * internal registers and ignore write masks and swizzling issues.
 445  */
 446
 447 static void do_ABS( struct arb_vp_machine *m, union instruction op )
 448 {
 449    GLfloat *result = m->reg[op.vec.dst];
 450    const GLfloat *arg0 = m->reg[op.vec.arg0];
 451
 452    result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
 453    result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
 454    result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
 455    result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
 456 }
 457
 458 static void do_ADD( struct arb_vp_machine *m, union instruction op )
 459 {
 460    GLfloat *result = m->reg[op.vec.dst];
 461    const GLfloat *arg0 = m->reg[op.vec.arg0];
 462    const GLfloat *arg1 = m->reg[op.vec.arg1];
 463
 464    result[0] = arg0[0] + arg1[0];
 465    result[1] = arg0[1] + arg1[1];
 466    result[2] = arg0[2] + arg1[2];
 467    result[3] = arg0[3] + arg1[3];
 468 }
 469
 470
 471 static void do_ARL( struct arb_vp_machine *m, union instruction op )
 472 {
 473    const GLfloat *arg0 = m->reg[op.out.reg];
 474    m->AddressReg = (GLint) floor(arg0[0]);
 475 }
 476
 477
 478 static void do_DP3( struct arb_vp_machine *m, union instruction op )
 479 {
 480    GLfloat *result = m->reg[op.scl.dst];
 481    const GLfloat *arg0 = m->reg[op.scl.arg0];
 482    const GLfloat *arg1 = m->reg[op.scl.arg1];
 483
 484    result[0] = (arg0[0] * arg1[0] +
 485                 arg0[1] * arg1[1] +
 486                 arg0[2] * arg1[2]);
 487
 488    PUFF(result);
 489 }
 490
 491 static void do_DP4( struct arb_vp_machine *m, union instruction op )
 492 {
 493    GLfloat *result = m->reg[op.scl.dst];
 494    const GLfloat *arg0 = m->reg[op.scl.arg0];
 495    const GLfloat *arg1 = m->reg[op.scl.arg1];
 496
 497    result[0] = (arg0[0] * arg1[0] +
 498                 arg0[1] * arg1[1] +
 499                 arg0[2] * arg1[2] +
 500                 arg0[3] * arg1[3]);
 501
 502    PUFF(result);
 503 }
 504
 505 static void do_DPH( struct arb_vp_machine *m, union instruction op )
 506 {
 507    GLfloat *result = m->reg[op.scl.dst];
 508    const GLfloat *arg0 = m->reg[op.scl.arg0];
 509    const GLfloat *arg1 = m->reg[op.scl.arg1];
 510
 511    result[0] = (arg0[0] * arg1[0] +
 512                 arg0[1] * arg1[1] +
 513                 arg0[2] * arg1[2] +
 514                 1.0     * arg1[3]);
 515
 516    PUFF(result);
 517 }
 518
 519 static void do_DST( struct arb_vp_machine *m, union instruction op )
 520 {
 521    GLfloat *result = m->reg[op.vec.dst];
 522    const GLfloat *arg0 = m->reg[op.vec.arg0];
 523    const GLfloat *arg1 = m->reg[op.vec.arg1];
 524
 525    result[0] = 1.0F;
 526    result[1] = arg0[1] * arg1[1];
 527    result[2] = arg0[2];
 528    result[3] = arg1[3];
 529 }
 530
 531
 532 static void do_EX2( struct arb_vp_machine *m, union instruction op )
 533 {
 534    GLfloat *result = m->reg[op.scl.dst];
 535    const GLfloat *arg0 = m->reg[op.scl.arg0];
 536
 537    result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
 538    PUFF(result);
 539 }
 540
 541 static void do_EXP( struct arb_vp_machine *m, union instruction op )
 542 {
 543    GLfloat *result = m->reg[op.vec.dst];
 544    const GLfloat *arg0 = m->reg[op.vec.arg0];
 545    GLfloat tmp = arg0[0];
 546    GLfloat flr_tmp = FLOORF(tmp);
 547
 548    /* KW: previous definition of this instruction was really messed
 549     * up...  Maybe the nv instruction is quite different?
 550     */
 551    result[0] = (GLfloat) (1 << (int)flr_tmp);
 552    result[1] = tmp - flr_tmp;
 553    result[2] = RoughApproxPow2(tmp);
 554    result[3] = 1.0F;
 555 }
 556
 557 static void do_FLR( struct arb_vp_machine *m, union instruction op )
 558 {
 559    GLfloat *result = m->reg[op.vec.dst];
 560    const GLfloat *arg0 = m->reg[op.vec.arg0];
 561
 562    result[0] = FLOORF(arg0[0]);
 563    result[1] = FLOORF(arg0[1]);
 564    result[2] = FLOORF(arg0[2]);
 565    result[3] = FLOORF(arg0[3]);
 566 }
 567
 568 static void do_FRC( struct arb_vp_machine *m, union instruction op )
 569 {
 570    GLfloat *result = m->reg[op.vec.dst];
 571    const GLfloat *arg0 = m->reg[op.vec.arg0];
 572
 573    result[0] = arg0[0] - FLOORF(arg0[0]);
 574    result[1] = arg0[1] - FLOORF(arg0[1]);
 575    result[2] = arg0[2] - FLOORF(arg0[2]);
 576    result[3] = arg0[3] - FLOORF(arg0[3]);
 577 }
 578
 579 static void do_LG2( struct arb_vp_machine *m, union instruction op )
 580 {
 581    GLfloat *result = m->reg[op.scl.dst];
 582    const GLfloat *arg0 = m->reg[op.scl.arg0];
 583
 584    result[0] = RoughApproxLog2(arg0[0]);
 585    PUFF(result);
 586 }
 587
 588
 589
 590 static void do_LIT( struct arb_vp_machine *m, union instruction op )
 591 {
 592    GLfloat *result = m->reg[op.vec.dst];
 593    const GLfloat *arg0 = m->reg[op.vec.arg0];
 594
 595    const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
 596    GLfloat tmp[4];
 597
 598    tmp[0] = MAX2(arg0[0], 0.0F);
 599    tmp[1] = MAX2(arg0[1], 0.0F);
 600    tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
 601
 602    result[0] = 1.0;
 603    result[1] = tmp[0];
 604    result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
 605    result[3] = 1.0;
 606 }
 607
 608
 609 static void do_LOG( struct arb_vp_machine *m, union instruction op )
 610 {
 611    GLfloat *result = m->reg[op.vec.dst];
 612    const GLfloat *arg0 = m->reg[op.vec.arg0];
 613    GLfloat tmp = FABSF(arg0[0]);
 614    int exponent;
 615    GLfloat mantissa = FREXPF(tmp, &exponent);
 616
 617    result[0] = (GLfloat) (exponent - 1);
 618    result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
 619    result[2] = result[0] + LOG2(result[1]);
 620    result[3] = 1.0;
 621 }
 622
 623
 624 static void do_MAD( struct arb_vp_machine *m, union instruction op )
 625 {
 626    GLfloat *result = m->reg[op.vec.dst];
 627    const GLfloat *arg0 = m->reg[op.vec.arg0];
 628    const GLfloat *arg1 = m->reg[op.vec.arg1];
 629    const GLfloat *arg2 = m->reg[op.vec.arg2];
 630
 631    result[0] = arg0[0] * arg1[0] + arg2[0];
 632    result[1] = arg0[1] * arg1[1] + arg2[1];
 633    result[2] = arg0[2] * arg1[2] + arg2[2];
 634    result[3] = arg0[3] * arg1[3] + arg2[3];
 635 }
 636
 637 static void do_MAX( struct arb_vp_machine *m, union instruction op )
 638 {
 639    GLfloat *result = m->reg[op.vec.dst];
 640    const GLfloat *arg0 = m->reg[op.vec.arg0];
 641    const GLfloat *arg1 = m->reg[op.vec.arg1];
 642
 643    result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
 644    result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
 645    result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
 646    result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
 647 }
 648
 649
 650 static void do_MIN( struct arb_vp_machine *m, union instruction op )
 651 {
 652    GLfloat *result = m->reg[op.vec.dst];
 653    const GLfloat *arg0 = m->reg[op.vec.arg0];
 654    const GLfloat *arg1 = m->reg[op.vec.arg1];
 655
 656    result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
 657    result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
 658    result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
 659    result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
 660 }
 661
 662 static void do_MOV( struct arb_vp_machine *m, union instruction op )
 663 {
 664    GLfloat *result = m->reg[op.vec.dst];
 665    const GLfloat *arg0 = m->reg[op.vec.arg0];
 666
 667    result[0] = arg0[0];
 668    result[1] = arg0[1];
 669    result[2] = arg0[2];
 670    result[3] = arg0[3];
 671 }
 672
 673 static void do_MUL( struct arb_vp_machine *m, union instruction op )
 674 {
 675    GLfloat *result = m->reg[op.vec.dst];
 676    const GLfloat *arg0 = m->reg[op.vec.arg0];
 677    const GLfloat *arg1 = m->reg[op.vec.arg1];
 678
 679    result[0] = arg0[0] * arg1[0];
 680    result[1] = arg0[1] * arg1[1];
 681    result[2] = arg0[2] * arg1[2];
 682    result[3] = arg0[3] * arg1[3];
 683 }
 684
 685
 686 static void do_POW( struct arb_vp_machine *m, union instruction op )
 687 {
 688    GLfloat *result = m->reg[op.scl.dst];
 689    const GLfloat *arg0 = m->reg[op.scl.arg0];
 690    const GLfloat *arg1 = m->reg[op.scl.arg1];
 691
 692    result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
 693    PUFF(result);
 694 }
 695
 696 static void do_RCP( struct arb_vp_machine *m, union instruction op )
 697 {
 698    GLfloat *result = m->reg[op.scl.dst];
 699    const GLfloat *arg0 = m->reg[op.scl.arg0];
 700
 701    result[0] = 1.0F / arg0[0];
 702    PUFF(result);
 703 }
 704
 705 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
 706 {
 707    GLfloat *result = m->reg[op.scl.dst];
 708    const GLfloat *arg0 = m->reg[op.scl.arg0];
 709
 710    result[0] = INV_SQRTF(FABSF(arg0[0]));
 711    PUFF(result);
 712 }
 713
 714
 715 static void do_SGE( struct arb_vp_machine *m, union instruction op )
 716 {
 717    GLfloat *result = m->reg[op.vec.dst];
 718    const GLfloat *arg0 = m->reg[op.vec.arg0];
 719    const GLfloat *arg1 = m->reg[op.vec.arg1];
 720
 721    result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
 722    result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
 723    result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
 724    result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
 725 }
 726
 727
 728 static void do_SLT( struct arb_vp_machine *m, union instruction op )
 729 {
 730    GLfloat *result = m->reg[op.vec.dst];
 731    const GLfloat *arg0 = m->reg[op.vec.arg0];
 732    const GLfloat *arg1 = m->reg[op.vec.arg1];
 733
 734    result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
 735    result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
 736    result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
 737    result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
 738 }
 739
 740 static void do_SWZ( struct arb_vp_machine *m, union instruction op )
 741 {
 742    GLfloat *result = m->reg[op.swz.dst];
 743    const GLfloat *arg0 = m->reg[op.swz.arg0];
 744    GLuint swz = op.swz.swz;
 745    GLuint neg = op.swz.neg;
 746    GLuint i;
 747
 748    for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
 749       switch (swz & 0x7) {
 750       case SWIZZLE_ZERO: result[i] = 0.0; break;
 751       case SWIZZLE_ONE:  result[i] = 1.0; break;
 752       default:           result[i] = arg0[swz & 0x7]; break;
 753       }
 754       if (neg & 0x1)     result[i] = -result[i];
 755    }
 756 }
 757
 758 static void do_SUB( struct arb_vp_machine *m, union instruction op )
 759 {
 760    GLfloat *result = m->reg[op.vec.dst];
 761    const GLfloat *arg0 = m->reg[op.vec.arg0];
 762    const GLfloat *arg1 = m->reg[op.vec.arg1];
 763
 764    result[0] = arg0[0] - arg1[0];
 765    result[1] = arg0[1] - arg1[1];
 766    result[2] = arg0[2] - arg1[2];
 767    result[3] = arg0[3] - arg1[3];
 768 }
 769
 770
 771 static void do_XPD( struct arb_vp_machine *m, union instruction op )
 772 {
 773    GLfloat *result = m->reg[op.vec.dst];
 774    const GLfloat *arg0 = m->reg[op.vec.arg0];
 775    const GLfloat *arg1 = m->reg[op.vec.arg1];
 776
 777    result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
 778    result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
 779    result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
 780 }
 781
 782 static void do_NOP( struct arb_vp_machine *m, union instruction op )
 783 {
 784 }
 785
 786 /* Some useful debugging functions:
 787  */
 788 static void print_reg( GLuint reg )
 789 {
 790    if (reg == REG_RES)
 791       _mesa_printf("RES");
 792    else if (reg >= REG_ARG0 && reg <= REG_ARG2)
 793       _mesa_printf("ARG%d", reg - REG_ARG0);
 794    else if (reg >= REG_TMP0 && reg < REG_TMP_MAX)
 795       _mesa_printf("TMP%d", reg - REG_TMP0);
 796    else if (reg >= REG_PAR0 && reg < REG_PAR_MAX)
 797       _mesa_printf("PAR%d", reg - REG_PAR0);
 798    else
 799       _mesa_printf("???");
 800 }
 801
 802 static void print_mask( GLuint mask )
 803 {
 804    _mesa_printf(".");
 805    if (mask&0x1) _mesa_printf("x");
 806    if (mask&0x2) _mesa_printf("y");
 807    if (mask&0x4) _mesa_printf("z");
 808    if (mask&0x8) _mesa_printf("w");
 809 }
 810
 811 static void print_extern( GLuint file, GLuint idx )
 812 {
 813    static const char *reg_file[] = {
 814       "TEMPORARY",
 815       "INPUT",
 816       "OUTPUT",
 817       "LOCAL_PARAM",
 818       "ENV_PARAM",
 819       "NAMED_PARAM",
 820       "STATE_VAR",
 821       "WRITE_ONLY",
 822       "ADDRESS"
 823    };
 824
 825    _mesa_printf("%s:%d", reg_file[file], idx);
 826 }
 827
 828
 829
 830 static void print_SWZ( union instruction op, const struct opcode_info *info )
 831 {
 832    GLuint swz = op.swz.swz;
 833    GLuint neg = op.swz.neg;
 834    GLuint i;
 835
 836    _mesa_printf("%s ", info->string);
 837    print_reg(op.swz.dst);
 838    _mesa_printf(", ");
 839    print_reg(op.swz.arg0);
 840    _mesa_printf(".");
 841    for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
 842       const char *cswz = "xyzw01??";
 843       if (neg & 0x1)
 844          _mesa_printf("-");
 845       _mesa_printf("%c", cswz[swz&0x7]);
 846    }
 847    _mesa_printf("\n");
 848 }
 849
 850 static void print_RSW( union instruction op, const struct opcode_info *info )
 851 {
 852    GLuint swz = op.rsw.swz;
 853    GLuint neg = op.rsw.neg;
 854    GLuint i;
 855
 856    _mesa_printf("%s ", info->string);
 857    print_reg(op.rsw.dst);
 858    _mesa_printf(", ");
 859    print_reg(op.rsw.arg0);
 860    _mesa_printf(".");
 861    for (i = 0; i < 4; i++, swz >>= 2) {
 862       const char *cswz = "xyzw";
 863       if (neg)
 864          _mesa_printf("-");
 865       _mesa_printf("%c", cswz[swz&0x3]);
 866    }
 867    _mesa_printf("\n");
 868 }
 869
 870
 871 static void print_SCL( union instruction op, const struct opcode_info *info )
 872 {
 873    _mesa_printf("%s ", info->string);
 874    print_reg(op.scl.dst);
 875    _mesa_printf(", ");
 876    print_reg(op.scl.arg0);
 877    if (info->nr_args > 1) {
 878       _mesa_printf(", ");
 879       print_reg(op.scl.arg1);
 880    }
 881    _mesa_printf("\n");
 882 }
 883
 884
 885 static void print_VEC( union instruction op, const struct opcode_info *info )
 886 {
 887    _mesa_printf("%s ", info->string);
 888    print_reg(op.vec.dst);
 889    _mesa_printf(", ");
 890    print_reg(op.vec.arg0);
 891    if (info->nr_args > 1) {
 892       _mesa_printf(", ");
 893       print_reg(op.vec.arg1);
 894    }
 895    if (info->nr_args > 2) {
 896       _mesa_printf(", ");
 897       print_reg(op.vec.arg2);
 898    }
 899    _mesa_printf("\n");
 900 }
 901
 902 static void print_MSK( union instruction op, const struct opcode_info *info )
 903 {
 904    _mesa_printf("%s ", info->string);
 905    print_reg(op.msk.dst);
 906    print_mask(op.msk.mask);
 907    _mesa_printf(", ");
 908    print_reg(op.msk.arg0);
 909    _mesa_printf("\n");
 910 }
 911
 912 static void print_IN( union instruction op, const struct opcode_info *info )
 913 {
 914    _mesa_printf("%s ", info->string);
 915    print_reg(op.inr.reg);
 916    _mesa_printf(", ");
 917    print_extern(op.inr.file, op.inr.idx);
 918    _mesa_printf("\n");
 919 }
 920
 921 static void print_OUT( union instruction op, const struct opcode_info *info )
 922 {
 923    _mesa_printf("%s ", info->string);
 924    print_extern(op.out.file, op.out.idx);
 925    if (op.out.opcode == OUM)
 926       print_mask(op.out.mask);
 927    _mesa_printf(", ");
 928    print_reg(op.out.reg);
 929    _mesa_printf("\n");
 930 }
 931
 932 static void print_NOP( union instruction op, const struct opcode_info *info )
 933 {
 934 }
 935
 936 #define NOP 0
 937 #define VEC 1
 938 #define SCL 2
 939 #define SWZ 3
 940
 941 static const struct opcode_info opcode_info[] =
 942 {
 943    { VEC, 1, "ABS", do_ABS, print_VEC },
 944    { VEC, 2, "ADD", do_ADD, print_VEC },
 945    { OUT, 1, "ARL", do_ARL, print_OUT },
 946    { SCL, 2, "DP3", do_DP3, print_SCL },
 947    { SCL, 2, "DP4", do_DP4, print_SCL },
 948    { SCL, 2, "DPH", do_DPH, print_SCL },
 949    { VEC, 2, "DST", do_DST, print_VEC },
 950    { NOP, 0, "END", do_NOP, print_NOP },
 951    { SCL, 1, "EX2", do_EX2, print_VEC },
 952    { VEC, 1, "EXP", do_EXP, print_VEC },
 953    { VEC, 1, "FLR", do_FLR, print_VEC },
 954    { VEC, 1, "FRC", do_FRC, print_VEC },
 955    { SCL, 1, "LG2", do_LG2, print_VEC },
 956    { VEC, 1, "LIT", do_LIT, print_VEC },
 957    { VEC, 1, "LOG", do_LOG, print_VEC },
 958    { VEC, 3, "MAD", do_MAD, print_VEC },
 959    { VEC, 2, "MAX", do_MAX, print_VEC },
 960    { VEC, 2, "MIN", do_MIN, print_VEC },
 961    { VEC, 1, "MOV", do_MOV, print_VEC },
 962    { VEC, 2, "MUL", do_MUL, print_VEC },
 963    { SCL, 2, "POW", do_POW, print_VEC },
 964    { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */
 965    { NOP, 1, "RCC", do_NOP, print_NOP },
 966    { SCL, 1, "RCP", do_RCP, print_VEC },
 967    { SCL, 1, "RSQ", do_RSQ, print_VEC },
 968    { VEC, 2, "SGE", do_SGE, print_VEC },
 969    { VEC, 2, "SLT", do_SLT, print_VEC },
 970    { VEC, 2, "SUB", do_SUB, print_VEC },
 971    { SWZ, 1, "SWZ", do_SWZ, print_SWZ },
 972    { VEC, 2, "XPD", do_XPD, print_VEC },
 973    { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */
 974    { IN4, 1, "IN2", do_IN2, print_IN },
 975    { IN4, 1, "IN3", do_IN3, print_IN },
 976    { IN4, 1, "IN4", do_IN4, print_IN },
 977    { OUT, 1, "OUT", do_OUT, print_OUT },
 978    { OUT, 1, "OUM", do_OUM, print_OUT },
 979    { SWZ, 1, "RSW", do_RSW, print_RSW },
 980    { MSK, 1, "MSK", do_MSK, print_MSK },
 981    { IN4, 1, "PAR", do_PAR, print_IN },
 982    { IN4, 1, "PRL", do_PRL, print_IN },
 983 };
 984
 985
 986 static GLuint cvp_load_reg( struct compilation *cp,
 987                             GLuint file,
 988                             GLuint index,
 989                             GLuint rel )
 990 {
 991    GLuint i, op;
 992
 993    if (file == PROGRAM_TEMPORARY)
 994       return index + REG_TMP0;
 995
 996    /* Don't try to cache relatively addressed values yet:
 997     */
 998    if (!rel) {
 999       for (i = 0; i < REG_PAR_NR; i++) {
1000          if ((cp->par_active & (1<<i)) &&
1001              cp->reg[i].file == file &&
1002              cp->reg[i].idx == index) {
1003             cp->par_protected |= (1<<i);
1004             return i + REG_PAR0;
1005          }
1006       }
1007    }
1008
1009    /* Not already loaded, so identify a slot and load it.
1010     * TODO: preload these values once only!
1011     * TODO: better eviction strategy!
1012     */
1013    if (cp->par_active == ~0) {
1014       assert(cp->par_protected != ~0);
1015       cp->par_active = cp->par_protected;
1016    }
1017
1018    i = ffs(~cp->par_active);
1019    assert(i);
1020    i--;
1021
1022
1023    if (file == PROGRAM_INPUT)
1024       op = IN1 + cp->VB->AttribPtr[index]->size - 1;
1025    else if (rel)
1026       op = PRL;
1027    else
1028       op = PAR;
1029
1030    cp->csr->dword = 0;
1031    cp->csr->inr.opcode = op;
1032    cp->csr->inr.reg = i + REG_PAR0;
1033    cp->csr->inr.file = file;
1034    cp->csr->inr.idx = index;
1035    cp->csr++;
1036
1037    cp->reg[i].file = file;
1038    cp->reg[i].idx = index;
1039    cp->par_protected |= (1<<i);
1040    cp->par_active |= (1<<i);
1041    return i + REG_PAR0;
1042 }
1043
1044 static void cvp_release_regs( struct compilation *cp )
1045 {
1046    cp->par_protected = 0;
1047 }
1048
1049
1050
1051 static GLuint cvp_emit_arg( struct compilation *cp,
1052                             const struct vp_src_register *src,
1053                             GLuint arg )
1054 {
1055    GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr );
1056    union instruction rsw, noop;
1057
1058    /* Emit any necessary swizzling.
1059     */
1060    rsw.dword = 0;
1061    rsw.rsw.neg = src->Negate ? 1 : 0;
1062    rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) |
1063                   (GET_SWZ(src->Swizzle, 1) << 2) |
1064                   (GET_SWZ(src->Swizzle, 2) << 4) |
1065                   (GET_SWZ(src->Swizzle, 3) << 6));
1066
1067    noop.dword = 0;
1068    noop.rsw.neg = 0;
1069    noop.rsw.swz = ((0<<0) |
1070                    (1<<2) |
1071                    (2<<4) |
1072                    (3<<6));
1073
1074    if (rsw.dword != noop.dword) {
1075       GLuint rsw_reg = arg;
1076       cp->csr->dword = rsw.dword;
1077       cp->csr->rsw.opcode = RSW;
1078       cp->csr->rsw.arg0 = reg;
1079       cp->csr->rsw.dst = rsw_reg;
1080       cp->csr++;
1081       return rsw_reg;
1082    }
1083    else
1084       return reg;
1085 }
1086
1087 static GLuint cvp_choose_result( struct compilation *cp,
1088                                  const struct vp_dst_register *dst,
1089                                  union instruction *fixup,
1090                                  GLuint maxreg)
1091 {
1092    GLuint mask = dst->WriteMask;
1093
1094    if (dst->File == PROGRAM_TEMPORARY) {
1095
1096       /* Optimization: When writing (with a writemask) to an undefined
1097        * value for the first time, the writemask may be ignored.  In
1098        * practise this means that the MSK instruction to implement the
1099        * writemask can be dropped.
1100        */
1101       if (dst->Index < maxreg &&
1102           (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) {
1103          fixup->dword = 0;
1104          cp->tmp_active |= (1<<dst->Index);
1105          return REG_TMP0 + dst->Index;
1106       }
1107       else if (mask != 0xf) {
1108          fixup->msk.opcode = MSK;
1109          fixup->msk.arg0 = REG_RES;
1110          fixup->msk.dst = REG_TMP0 + dst->Index;
1111          fixup->msk.mask = mask;
1112          cp->tmp_active |= (1<<dst->Index);
1113          return REG_RES;
1114       }
1115       else {
1116          fixup->vec.opcode = VP_OPCODE_MOV;
1117          fixup->vec.arg0 = REG_RES;
1118          fixup->vec.dst = REG_TMP0 + dst->Index;
1119          cp->tmp_active |= (1<<dst->Index);
1120          return REG_RES;
1121       }
1122    }
1123    else {
1124       assert(dst->File == PROGRAM_OUTPUT);
1125       fixup->out.opcode = (mask == 0xf) ? OUT : OUM;
1126       fixup->out.reg = REG_RES;
1127       fixup->out.file = dst->File;
1128       fixup->out.idx = dst->Index;
1129       fixup->out.mask = mask;
1130       return REG_RES;
1131    }
1132 }
1133
1134
1135 static void cvp_emit_inst( struct compilation *cp,
1136                            const struct vp_instruction *inst )
1137 {
1138    const struct opcode_info *info = &opcode_info[inst->Opcode];
1139    union instruction fixup;
1140    GLuint reg[3];
1141    GLuint result, i;
1142
1143    /* Need to handle SWZ, ARL specially.
1144     */
1145    switch (info->type) {
1146    case OUT:
1147       assert(inst->Opcode == VP_OPCODE_ARL);
1148       reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1149
1150       cp->csr->dword = 0;
1151       cp->csr->out.opcode = inst->Opcode;
1152       cp->csr->out.reg = reg[0];
1153       cp->csr->out.file = PROGRAM_ADDRESS;
1154       cp->csr->out.idx = 0;
1155       break;
1156    case SWZ:
1157       assert(inst->Opcode == VP_OPCODE_SWZ);
1158       result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX );
1159
1160       reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1161
1162       cp->csr->dword = 0;
1163       cp->csr->swz.opcode = VP_OPCODE_SWZ;
1164       cp->csr->swz.arg0 = reg[0];
1165       cp->csr->swz.dst = result;
1166       cp->csr->swz.neg = inst->SrcReg[0].Negate;
1167       cp->csr->swz.swz = inst->SrcReg[0].Swizzle;
1168       cp->csr++;
1169
1170       if (result == REG_RES) {
1171          cp->csr->dword = fixup.dword;
1172          cp->csr++;
1173       }
1174       break;
1175
1176    case VEC:
1177    case SCL:                    /* for now */
1178       result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX );
1179
1180       reg[0] = reg[1] = reg[2] = 0;
1181
1182       for (i = 0; i < info->nr_args; i++)
1183          reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
1184
1185       cp->csr->dword = 0;
1186       cp->csr->vec.opcode = inst->Opcode;
1187       cp->csr->vec.arg0 = reg[0];
1188       cp->csr->vec.arg1 = reg[1];
1189       cp->csr->vec.arg2 = reg[2];
1190       cp->csr->vec.dst = result;
1191       cp->csr++;
1192
1193       if (result == REG_RES) {
1194          cp->csr->dword = fixup.dword;
1195          cp->csr++;
1196       }
1197       break;
1198
1199
1200    case NOP:
1201       break;
1202
1203    default:
1204       assert(0);
1205       break;
1206    }
1207
1208    cvp_release_regs( cp );
1209 }
1210
1211
1212 static void compile_vertex_program( struct arb_vp_machine *m,
1213                                     const struct vertex_program *program )
1214 {
1215    struct compilation cp;
1216    GLuint i;
1217
1218    /* Initialize cp:
1219     */
1220    memset(&cp, 0, sizeof(cp));
1221    cp.VB = m->VB;
1222    cp.csr = m->store;
1223
1224    /* Compile instructions:
1225     */
1226    for (i = 0; i < program->Base.NumInstructions; i++) {
1227       cvp_emit_inst(&cp, &program->Instructions[i]);
1228    }
1229
1230    /* Finish up:
1231     */
1232    m->instructions = m->store;
1233    m->nr_instructions = cp.csr - m->store;
1234
1235
1236    /* Print/disassemble:
1237     */
1238    if (0) {
1239       for (i = 0; i < m->nr_instructions; i++) {
1240          union instruction insn = m->instructions[i];
1241          const struct opcode_info *info = &opcode_info[insn.vec.opcode];
1242          info->print( insn, info );
1243       }
1244       _mesa_printf("\n\n");
1245    }
1246 }
1247
1248
1249
1250
1251 /* ----------------------------------------------------------------------
1252  * Execution
1253  */
1254 static void userclip( GLcontext *ctx,
1255                       GLvector4f *clip,
1256                       GLubyte *clipmask,
1257                       GLubyte *clipormask,
1258                       GLubyte *clipandmask )
1259 {
1260    GLuint p;
1261
1262    for (p = 0; p < ctx->Const.MaxClipPlanes; p++)
1263       if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1264          GLuint nr, i;
1265          const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1266          const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1267          const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1268          const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1269          GLfloat *coord = (GLfloat *)clip->data;
1270          GLuint stride = clip->stride;
1271          GLuint count = clip->count;
1272
1273          for (nr = 0, i = 0 ; i < count ; i++) {
1274             GLfloat dp = (coord[0] * a +
1275                           coord[1] * b +
1276                           coord[2] * c +
1277                           coord[3] * d);
1278
1279             if (dp < 0) {
1280                nr++;
1281                clipmask[i] |= CLIP_USER_BIT;
1282             }
1283
1284             STRIDE_F(coord, stride);
1285          }
1286
1287          if (nr > 0) {
1288             *clipormask |= CLIP_USER_BIT;
1289             if (nr == count) {
1290                *clipandmask |= CLIP_USER_BIT;
1291                return;
1292             }
1293          }
1294       }
1295 }
1296
1297
1298 static GLboolean do_ndc_cliptest( struct arb_vp_machine *m )
1299 {
1300    GLcontext *ctx = m->ctx;
1301    TNLcontext *tnl = TNL_CONTEXT(ctx);
1302    struct vertex_buffer *VB = m->VB;
1303
1304    /* Cliptest and perspective divide.  Clip functions must clear
1305     * the clipmask.
1306     */
1307    m->ormask = 0;
1308    m->andmask = CLIP_ALL_BITS;
1309
1310    if (tnl->NeedNdcCoords) {
1311       VB->NdcPtr =
1312          _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1313                                             &m->ndcCoords,
1314                                             m->clipmask,
1315                                             &m->ormask,
1316                                             &m->andmask );
1317    }
1318    else {
1319       VB->NdcPtr = NULL;
1320       _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1321                                             NULL,
1322                                             m->clipmask,
1323                                             &m->ormask,
1324                                             &m->andmask );
1325    }
1326
1327    if (m->andmask) {
1328       /* All vertices are outside the frustum */
1329       return GL_FALSE;
1330    }
1331
1332    /* Test userclip planes.  This contributes to VB->ClipMask.
1333     */
1334    if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) {
1335       userclip( ctx,
1336                 VB->ClipPtr,
1337                 m->clipmask,
1338                 &m->ormask,
1339                 &m->andmask );
1340
1341       if (m->andmask) {
1342          return GL_FALSE;
1343       }
1344    }
1345
1346    VB->ClipAndMask = m->andmask;
1347    VB->ClipOrMask = m->ormask;
1348    VB->ClipMask = m->clipmask;
1349
1350    return GL_TRUE;
1351 }
1352
1353
1354
1355
1356 /**
1357  * Execute the given vertex program.
1358  *
1359  * TODO: Integrate the t_vertex.c code here, to build machine vertices
1360  * directly at this point.
1361  *
1362  * TODO: Eliminate the VB struct entirely and just use
1363  * struct arb_vertex_machine.
1364  */
1365 static GLboolean
1366 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1367 {
1368    struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1369                                      ctx->VertexProgram.Current :
1370                                      &ctx->_TnlProgram);
1371    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1372    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1373    GLuint i, j, outputs = program->OutputsWritten;
1374
1375    if (program->Parameters) {
1376       _mesa_load_state_parameters(ctx, program->Parameters);
1377       m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues;
1378    }
1379
1380    /* Run the actual program:
1381     */
1382    for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1383       for (j = 0; j < m->nr_instructions; j++) {
1384          union instruction inst = m->instructions[j];
1385          opcode_info[inst.vec.opcode].func( m, inst );
1386       }
1387    }
1388
1389    /* Setup the VB pointers so that the next pipeline stages get
1390     * their data from the right place (the program output arrays).
1391     *
1392     * TODO: 1) Have tnl use these RESULT values for outputs rather
1393     * than trying to shoe-horn inputs and outputs into one set of
1394     * values.
1395     *
1396     * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1397     * and build machine vertices here.
1398     */
1399    VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1400    VB->ClipPtr->count = VB->Count;
1401
1402    if (outputs & (1<<VERT_RESULT_COL0)) {
1403       VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0];
1404       VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0];
1405    }
1406
1407    if (outputs & (1<<VERT_RESULT_BFC0)) {
1408       VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1409    }
1410
1411    if (outputs & (1<<VERT_RESULT_COL1)) {
1412       VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1];
1413       VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0];
1414    }
1415
1416    if (outputs & (1<<VERT_RESULT_BFC1)) {
1417       VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1418    }
1419
1420    if (outputs & (1<<VERT_RESULT_FOGC)) {
1421       VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC];
1422       VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr;
1423    }
1424
1425    if (outputs & (1<<VERT_RESULT_PSIZ)) {
1426       VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ];
1427       VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1428    }
1429
1430    for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1431       if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1432          VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i];
1433          VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i];
1434       }
1435    }
1436
1437 #if 0
1438    for (i = 0; i < VB->Count; i++) {
1439       printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1440              VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1441              VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1442              VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1443              VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1444              VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
1445              VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
1446              VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
1447              VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]);
1448    }
1449 #endif
1450
1451    /* Perform NDC and cliptest operations:
1452     */
1453    return do_ndc_cliptest(m);
1454 }
1455
1456
1457 static void
1458 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1459 {
1460    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1461    struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1462                                      ctx->VertexProgram.Current :
1463                                      &ctx->_TnlProgram);
1464
1465    compile_vertex_program( m, program );
1466
1467    /* Grab the state GL state and put into registers:
1468     */
1469    m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams;
1470    m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters;
1471    m->File[PROGRAM_STATE_VAR] = 0;
1472 }
1473
1474
1475
1476
1477
1478
1479
1480 /**
1481  * Called the first time stage->run is called.  In effect, don't
1482  * allocate data until the first time the stage is run.
1483  */
1484 static void init_vertex_program( GLcontext *ctx,
1485                                  struct tnl_pipeline_stage *stage )
1486 {
1487    TNLcontext *tnl = TNL_CONTEXT(ctx);
1488    struct vertex_buffer *VB = &(tnl->vb);
1489    struct arb_vp_machine *m;
1490    const GLuint size = VB->Size;
1491    GLuint i;
1492
1493    stage->privatePtr = MALLOC(sizeof(*m));
1494    m = ARB_VP_MACHINE(stage);
1495    if (!m)
1496       return;
1497
1498    /* arb_vertex_machine struct should subsume the VB:
1499     */
1500    m->VB = VB;
1501    m->ctx = ctx;
1502
1503    /* Allocate arrays of vertex output values */
1504    for (i = 0; i < VERT_RESULT_MAX; i++) {
1505       _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1506       m->attribs[i].size = 4;
1507    }
1508
1509    /* a few other misc allocations */
1510    _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1511    m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1512 }
1513
1514
1515
1516
1517 /**
1518  * Destructor for this pipeline stage.
1519  */
1520 static void dtr( struct tnl_pipeline_stage *stage )
1521 {
1522    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1523
1524    if (m) {
1525       GLuint i;
1526
1527       /* free the vertex program result arrays */
1528       for (i = 0; i < VERT_RESULT_MAX; i++)
1529          _mesa_vector4f_free( &m->attribs[i] );
1530
1531       /* free misc arrays */
1532       _mesa_vector4f_free( &m->ndcCoords );
1533       ALIGN_FREE( m->clipmask );
1534
1535       FREE( m );
1536       stage->privatePtr = NULL;
1537    }
1538 }
1539
1540 /**
1541  * Public description of this pipeline stage.
1542  */
1543 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1544 {
1545    "vertex-program",
1546    NULL,                        /* private_data */
1547    init_vertex_program,         /* create */
1548    dtr,                         /* destroy */
1549    validate_vertex_program,     /* validate */
1550    run_arb_vertex_program       /* run */
1551 };