src/mesa/tnl/t_vb_arbprogram.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.3
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file t_arb_program.c
  27  * Compile vertex programs to an intermediate representation.
  28  * Execute vertex programs over a buffer of vertices.
  29  * \author Keith Whitwell, Brian Paul
  30  */
  31
  32 #include "glheader.h"
  33 #include "context.h"
  34 #include "imports.h"
  35 #include "macros.h"
  36 #include "mtypes.h"
  37 #include "arbprogparse.h"
  38 #include "program.h"
  39 #include "math/m_matrix.h"
  40 #include "math/m_translate.h"
  41 #include "t_context.h"
  42 #include "t_pipeline.h"
  43 #include "t_vp_build.h"
  44 #include "t_vb_arbprogram.h"
  45
  46 #define DISASSEM 0
  47
  48 /*--------------------------------------------------------------------------- */
  49
  50 struct opcode_info {
  51    GLuint nr_args;
  52    const char *string;
  53    void (*print)( union instruction , const struct opcode_info * );
  54 };
  55
  56 struct compilation {
  57    GLuint reg_active;
  58    union instruction *csr;
  59    struct vertex_buffer *VB;    /* for input sizes! */
  60 };
  61
  62
  63 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
  64
  65
  66
  67 /**
  68  * Set x to positive or negative infinity.
  69  *
  70  * XXX: FIXME - type punning.
  71  */
  72 #if defined(USE_IEEE) || defined(_WIN32)
  73 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
  74 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
  75 #elif defined(VMS)
  76 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  77 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  78 #define IS_INF_OR_NAN(t)   ((t) == __MAXFLOAT)
  79 #else
  80 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  81 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  82 #endif
  83
  84 #define FREXPF(a,b) frexpf(a,b)
  85
  86 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
  87
  88 /* FIXME: more type punning (despite use of fi_type...)
  89  */
  90 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  91
  92
  93 static GLfloat RoughApproxLog2(GLfloat t)
  94 {
  95    return LOG2(t);
  96 }
  97
  98 static GLfloat RoughApproxPow2(GLfloat t)
  99 {
 100    GLfloat q;
 101 #ifdef USE_IEEE
 102    GLint ii = (GLint) t;
 103    ii = (ii < 23) + 0x3f800000;
 104    SET_FLOAT_BITS(q, ii);
 105    q = *((GLfloat *) (void *)&ii);
 106 #else
 107    q = (GLfloat) pow(2.0, floor_t0);
 108 #endif
 109    return q;
 110 }
 111
 112 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
 113 {
 114    return (GLfloat) _mesa_pow(x, y);
 115 }
 116
 117
 118
 119
 120
 121 /**
 122  * Perform a reduced swizzle:
 123  */
 124 static void do_RSW( struct arb_vp_machine *m, union instruction op )
 125 {
 126    GLfloat *result = m->File[0][op.rsw.dst];
 127    const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
 128    GLuint swz = op.rsw.swz;
 129    GLuint neg = op.rsw.neg;
 130
 131    result[0] = arg0[GET_RSW(swz, 0)];
 132    result[1] = arg0[GET_RSW(swz, 1)];
 133    result[2] = arg0[GET_RSW(swz, 2)];
 134    result[3] = arg0[GET_RSW(swz, 3)];
 135
 136    if (neg) {
 137       if (neg & 0x1) result[0] = -result[0];
 138       if (neg & 0x2) result[1] = -result[1];
 139       if (neg & 0x4) result[2] = -result[2];
 140       if (neg & 0x8) result[3] = -result[3];
 141    }
 142 }
 143
 144 /* Used to implement write masking.  To make things easier for the sse
 145  * generator I've gone back to a 1 argument version of this function
 146  * (dst.msk = arg), rather than the semantically cleaner (dst = SEL
 147  * arg0, arg1, msk)
 148  *
 149  * That means this is the only instruction which doesn't write a full
 150  * 4 dwords out.  This would make such a program harder to analyse,
 151  * but it looks like analysis is going to take place on a higher level
 152  * anyway.
 153  */
 154 static void do_MSK( struct arb_vp_machine *m, union instruction op )
 155 {
 156    GLfloat *dst = m->File[0][op.msk.dst];
 157    const GLfloat *arg = m->File[op.msk.file][op.msk.idx];
 158
 159    if (op.msk.mask & 0x1) dst[0] = arg[0];
 160    if (op.msk.mask & 0x2) dst[1] = arg[1];
 161    if (op.msk.mask & 0x4) dst[2] = arg[2];
 162    if (op.msk.mask & 0x8) dst[3] = arg[3];
 163 }
 164
 165
 166 static void do_PRT( struct arb_vp_machine *m, union instruction op )
 167 {
 168    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 169
 170    _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
 171                 arg0[0], arg0[1], arg0[2], arg0[3]);
 172 }
 173
 174
 175 /**
 176  * The traditional ALU and texturing instructions.  All operate on
 177  * internal registers and ignore write masks and swizzling issues.
 178  */
 179
 180 static void do_ABS( struct arb_vp_machine *m, union instruction op )
 181 {
 182    GLfloat *result = m->File[0][op.alu.dst];
 183    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 184
 185    result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
 186    result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
 187    result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
 188    result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
 189 }
 190
 191 static void do_ADD( struct arb_vp_machine *m, union instruction op )
 192 {
 193    GLfloat *result = m->File[0][op.alu.dst];
 194    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 195    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 196
 197    result[0] = arg0[0] + arg1[0];
 198    result[1] = arg0[1] + arg1[1];
 199    result[2] = arg0[2] + arg1[2];
 200    result[3] = arg0[3] + arg1[3];
 201 }
 202
 203
 204 static void do_DP3( struct arb_vp_machine *m, union instruction op )
 205 {
 206    GLfloat *result = m->File[0][op.alu.dst];
 207    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 208    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 209
 210    result[0] = (arg0[0] * arg1[0] +
 211                 arg0[1] * arg1[1] +
 212                 arg0[2] * arg1[2]);
 213
 214    PUFF(result);
 215 }
 216
 217
 218
 219 static void do_DP4( struct arb_vp_machine *m, union instruction op )
 220 {
 221    GLfloat *result = m->File[0][op.alu.dst];
 222    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 223    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 224
 225    result[0] = (arg0[0] * arg1[0] +
 226                 arg0[1] * arg1[1] +
 227                 arg0[2] * arg1[2] +
 228                 arg0[3] * arg1[3]);
 229
 230    PUFF(result);
 231 }
 232
 233 static void do_DPH( struct arb_vp_machine *m, union instruction op )
 234 {
 235    GLfloat *result = m->File[0][op.alu.dst];
 236    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 237    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 238
 239    result[0] = (arg0[0] * arg1[0] +
 240                 arg0[1] * arg1[1] +
 241                 arg0[2] * arg1[2] +
 242                 1.0     * arg1[3]);
 243
 244    PUFF(result);
 245 }
 246
 247 static void do_DST( struct arb_vp_machine *m, union instruction op )
 248 {
 249    GLfloat *result = m->File[0][op.alu.dst];
 250    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 251    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 252
 253    result[0] = 1.0F;
 254    result[1] = arg0[1] * arg1[1];
 255    result[2] = arg0[2];
 256    result[3] = arg1[3];
 257 }
 258
 259
 260 static void do_EX2( struct arb_vp_machine *m, union instruction op )
 261 {
 262    GLfloat *result = m->File[0][op.alu.dst];
 263    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 264
 265    result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
 266    PUFF(result);
 267 }
 268
 269 static void do_EXP( struct arb_vp_machine *m, union instruction op )
 270 {
 271    GLfloat *result = m->File[0][op.alu.dst];
 272    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 273    GLfloat tmp = arg0[0];
 274    GLfloat flr_tmp = FLOORF(tmp);
 275
 276    /* KW: nvvertexec has an optimized version of this which is pretty
 277     * hard to understand/validate, but avoids the RoughApproxPow2.
 278     */
 279    result[0] = (GLfloat) (1 << (int)flr_tmp);
 280    result[1] = tmp - flr_tmp;
 281    result[2] = RoughApproxPow2(tmp);
 282    result[3] = 1.0F;
 283 }
 284
 285 static void do_FLR( struct arb_vp_machine *m, union instruction op )
 286 {
 287    GLfloat *result = m->File[0][op.alu.dst];
 288    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 289
 290    result[0] = FLOORF(arg0[0]);
 291    result[1] = FLOORF(arg0[1]);
 292    result[2] = FLOORF(arg0[2]);
 293    result[3] = FLOORF(arg0[3]);
 294 }
 295
 296 static void do_FRC( struct arb_vp_machine *m, union instruction op )
 297 {
 298    GLfloat *result = m->File[0][op.alu.dst];
 299    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 300
 301    result[0] = arg0[0] - FLOORF(arg0[0]);
 302    result[1] = arg0[1] - FLOORF(arg0[1]);
 303    result[2] = arg0[2] - FLOORF(arg0[2]);
 304    result[3] = arg0[3] - FLOORF(arg0[3]);
 305 }
 306
 307 static void do_LG2( struct arb_vp_machine *m, union instruction op )
 308 {
 309    GLfloat *result = m->File[0][op.alu.dst];
 310    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 311
 312    result[0] = RoughApproxLog2(arg0[0]);
 313    PUFF(result);
 314 }
 315
 316
 317
 318 static void do_LIT( struct arb_vp_machine *m, union instruction op )
 319 {
 320    GLfloat *result = m->File[0][op.alu.dst];
 321    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 322
 323    const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
 324    GLfloat tmp[4];
 325
 326    tmp[0] = MAX2(arg0[0], 0.0F);
 327    tmp[1] = MAX2(arg0[1], 0.0F);
 328    tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
 329
 330    result[0] = 1.0;
 331    result[1] = tmp[0];
 332    result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
 333    result[3] = 1.0;
 334 }
 335
 336
 337 static void do_LOG( struct arb_vp_machine *m, union instruction op )
 338 {
 339    GLfloat *result = m->File[0][op.alu.dst];
 340    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 341    GLfloat tmp = FABSF(arg0[0]);
 342    int exponent;
 343    GLfloat mantissa = FREXPF(tmp, &exponent);
 344
 345    result[0] = (GLfloat) (exponent - 1);
 346    result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
 347    result[2] = result[0] + LOG2(result[1]);
 348    result[3] = 1.0;
 349 }
 350
 351 static void do_MAX( struct arb_vp_machine *m, union instruction op )
 352 {
 353    GLfloat *result = m->File[0][op.alu.dst];
 354    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 355    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 356
 357    result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
 358    result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
 359    result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
 360    result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
 361 }
 362
 363
 364 static void do_MIN( struct arb_vp_machine *m, union instruction op )
 365 {
 366    GLfloat *result = m->File[0][op.alu.dst];
 367    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 368    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 369
 370    result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
 371    result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
 372    result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
 373    result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
 374 }
 375
 376 static void do_MOV( struct arb_vp_machine *m, union instruction op )
 377 {
 378    GLfloat *result = m->File[0][op.alu.dst];
 379    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 380
 381    result[0] = arg0[0];
 382    result[1] = arg0[1];
 383    result[2] = arg0[2];
 384    result[3] = arg0[3];
 385 }
 386
 387 static void do_MUL( struct arb_vp_machine *m, union instruction op )
 388 {
 389    GLfloat *result = m->File[0][op.alu.dst];
 390    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 391    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 392
 393    result[0] = arg0[0] * arg1[0];
 394    result[1] = arg0[1] * arg1[1];
 395    result[2] = arg0[2] * arg1[2];
 396    result[3] = arg0[3] * arg1[3];
 397 }
 398
 399
 400 static void do_POW( struct arb_vp_machine *m, union instruction op )
 401 {
 402    GLfloat *result = m->File[0][op.alu.dst];
 403    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 404    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 405
 406    result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
 407    PUFF(result);
 408 }
 409
 410 static void do_REL( struct arb_vp_machine *m, union instruction op )
 411 {
 412    GLfloat *result = m->File[0][op.alu.dst];
 413    GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
 414    const GLfloat *arg0 = m->File[op.alu.file0][idx];
 415
 416    result[0] = arg0[0];
 417    result[1] = arg0[1];
 418    result[2] = arg0[2];
 419    result[3] = arg0[3];
 420 }
 421
 422 static void do_RCP( struct arb_vp_machine *m, union instruction op )
 423 {
 424    GLfloat *result = m->File[0][op.alu.dst];
 425    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 426
 427    result[0] = 1.0F / arg0[0];
 428    PUFF(result);
 429 }
 430
 431 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
 432 {
 433    GLfloat *result = m->File[0][op.alu.dst];
 434    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 435
 436    result[0] = INV_SQRTF(FABSF(arg0[0]));
 437    PUFF(result);
 438 }
 439
 440
 441 static void do_SGE( struct arb_vp_machine *m, union instruction op )
 442 {
 443    GLfloat *result = m->File[0][op.alu.dst];
 444    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 445    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 446
 447    result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
 448    result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
 449    result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
 450    result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
 451 }
 452
 453
 454 static void do_SLT( struct arb_vp_machine *m, union instruction op )
 455 {
 456    GLfloat *result = m->File[0][op.alu.dst];
 457    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 458    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 459
 460    result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
 461    result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
 462    result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
 463    result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
 464 }
 465
 466 static void do_SUB( struct arb_vp_machine *m, union instruction op )
 467 {
 468    GLfloat *result = m->File[0][op.alu.dst];
 469    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 470    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 471
 472    result[0] = arg0[0] - arg1[0];
 473    result[1] = arg0[1] - arg1[1];
 474    result[2] = arg0[2] - arg1[2];
 475    result[3] = arg0[3] - arg1[3];
 476 }
 477
 478
 479 static void do_XPD( struct arb_vp_machine *m, union instruction op )
 480 {
 481    GLfloat *result = m->File[0][op.alu.dst];
 482    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 483    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 484
 485    result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
 486    result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
 487    result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
 488 }
 489
 490 static void do_NOP( struct arb_vp_machine *m, union instruction op )
 491 {
 492 }
 493
 494 /* Some useful debugging functions:
 495  */
 496 static void print_mask( GLuint mask )
 497 {
 498    _mesa_printf(".");
 499    if (mask&0x1) _mesa_printf("x");
 500    if (mask&0x2) _mesa_printf("y");
 501    if (mask&0x4) _mesa_printf("z");
 502    if (mask&0x8) _mesa_printf("w");
 503 }
 504
 505 static void print_reg( GLuint file, GLuint reg )
 506 {
 507    static const char *reg_file[] = {
 508       "REG",
 509       "LOCAL_PARAM",
 510       "ENV_PARAM",
 511       "STATE_VAR",
 512    };
 513
 514    if (file == 0) {
 515       if (reg == REG_RES)
 516          _mesa_printf("RES");
 517       else if (reg >= REG_ARG0 && reg <= REG_ARG1)
 518          _mesa_printf("ARG%d", reg - REG_ARG0);
 519       else if (reg >= REG_TMP0 && reg <= REG_TMP11)
 520          _mesa_printf("TMP%d", reg - REG_TMP0);
 521       else if (reg >= REG_IN0 && reg <= REG_IN15)
 522          _mesa_printf("IN%d", reg - REG_IN0);
 523       else if (reg >= REG_OUT0 && reg <= REG_OUT14)
 524          _mesa_printf("OUT%d", reg - REG_OUT0);
 525       else if (reg == REG_ADDR)
 526          _mesa_printf("ADDR");
 527       else if (reg == REG_ID)
 528          _mesa_printf("ID");
 529       else
 530          _mesa_printf("REG%d", reg);
 531    }
 532    else
 533       _mesa_printf("%s:%d", reg_file[file], reg);
 534 }
 535
 536
 537 static void print_RSW( union instruction op, const struct opcode_info *info )
 538 {
 539    GLuint swz = op.rsw.swz;
 540    GLuint neg = op.rsw.neg;
 541    GLuint i;
 542
 543    _mesa_printf("%s ", info->string);
 544    print_reg(0, op.rsw.dst);
 545    _mesa_printf(", ");
 546    print_reg(op.rsw.file0, op.rsw.idx0);
 547    _mesa_printf(".");
 548    for (i = 0; i < 4; i++, swz >>= 2) {
 549       const char *cswz = "xyzw";
 550       if (neg & (1<<i))
 551          _mesa_printf("-");
 552       _mesa_printf("%c", cswz[swz&0x3]);
 553    }
 554    _mesa_printf("\n");
 555 }
 556
 557
 558 static void print_ALU( union instruction op, const struct opcode_info *info )
 559 {
 560    _mesa_printf("%s ", info->string);
 561    print_reg(0, op.alu.dst);
 562    _mesa_printf(", ");
 563    print_reg(op.alu.file0, op.alu.idx0);
 564    if (info->nr_args > 1) {
 565       _mesa_printf(", ");
 566       print_reg(op.alu.file1, op.alu.idx1);
 567    }
 568    _mesa_printf("\n");
 569 }
 570
 571 static void print_MSK( union instruction op, const struct opcode_info *info )
 572 {
 573    _mesa_printf("%s ", info->string);
 574    print_reg(0, op.msk.dst);
 575    print_mask(op.msk.mask);
 576    _mesa_printf(", ");
 577    print_reg(op.msk.file, op.msk.idx);
 578    _mesa_printf("\n");
 579 }
 580
 581
 582 static void print_NOP( union instruction op, const struct opcode_info *info )
 583 {
 584 }
 585
 586 #define NOP 0
 587 #define ALU 1
 588 #define SWZ 2
 589
 590 static const struct opcode_info opcode_info[] =
 591 {
 592    { 1, "ABS", print_ALU },
 593    { 2, "ADD", print_ALU },
 594    { 1, "ARL", print_NOP },
 595    { 2, "DP3", print_ALU },
 596    { 2, "DP4", print_ALU },
 597    { 2, "DPH", print_ALU },
 598    { 2, "DST", print_ALU },
 599    { 0, "END", print_NOP },
 600    { 1, "EX2", print_ALU },
 601    { 1, "EXP", print_ALU },
 602    { 1, "FLR", print_ALU },
 603    { 1, "FRC", print_ALU },
 604    { 1, "LG2", print_ALU },
 605    { 1, "LIT", print_ALU },
 606    { 1, "LOG", print_ALU },
 607    { 3, "MAD", print_NOP },
 608    { 2, "MAX", print_ALU },
 609    { 2, "MIN", print_ALU },
 610    { 1, "MOV", print_ALU },
 611    { 2, "MUL", print_ALU },
 612    { 2, "POW", print_ALU },
 613    { 1, "PRT", print_ALU }, /* PRINT */
 614    { 1, "RCC", print_NOP },
 615    { 1, "RCP", print_ALU },
 616    { 1, "RSQ", print_ALU },
 617    { 2, "SGE", print_ALU },
 618    { 2, "SLT", print_ALU },
 619    { 2, "SUB", print_ALU },
 620    { 1, "SWZ", print_NOP },
 621    { 2, "XPD", print_ALU },
 622    { 1, "RSW", print_RSW },
 623    { 2, "MSK", print_MSK },
 624    { 1, "REL", print_ALU },
 625 };
 626
 627 void _tnl_disassem_vba_insn( union instruction op )
 628 {
 629    const struct opcode_info *info = &opcode_info[op.alu.opcode];
 630    info->print( op, info );
 631 }
 632
 633
 634 static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) =
 635 {
 636    do_ABS,
 637    do_ADD,
 638    do_NOP,
 639    do_DP3,
 640    do_DP4,
 641    do_DPH,
 642    do_DST,
 643    do_NOP,
 644    do_EX2,
 645    do_EXP,
 646    do_FLR,
 647    do_FRC,
 648    do_LG2,
 649    do_LIT,
 650    do_LOG,
 651    do_NOP,
 652    do_MAX,
 653    do_MIN,
 654    do_MOV,
 655    do_MUL,
 656    do_POW,
 657    do_PRT,
 658    do_NOP,
 659    do_RCP,
 660    do_RSQ,
 661    do_SGE,
 662    do_SLT,
 663    do_SUB,
 664    do_RSW,
 665    do_XPD,
 666    do_RSW,
 667    do_MSK,
 668    do_REL,
 669 };
 670
 671 static union instruction *cvp_next_instruction( struct compilation *cp )
 672 {
 673    union instruction *op = cp->csr++;
 674    op->dword = 0;
 675    return op;
 676 }
 677
 678 static struct reg cvp_make_reg( GLuint file, GLuint idx )
 679 {
 680    struct reg reg;
 681    reg.file = file;
 682    reg.idx = idx;
 683    return reg;
 684 }
 685
 686 static struct reg cvp_emit_rel( struct compilation *cp,
 687                                 struct reg reg,
 688                                 struct reg tmpreg )
 689 {
 690    union instruction *op = cvp_next_instruction(cp);
 691    op->alu.opcode = REL;
 692    op->alu.file0 = reg.file;
 693    op->alu.idx0 = reg.idx;
 694    op->alu.dst = tmpreg.idx;
 695    return tmpreg;
 696 }
 697
 698
 699 static struct reg cvp_load_reg( struct compilation *cp,
 700                                 GLuint file,
 701                                 GLuint index,
 702                                 GLuint rel,
 703                                 GLuint tmpidx )
 704 {
 705    struct reg tmpreg = cvp_make_reg(FILE_REG, tmpidx);
 706    struct reg reg;
 707
 708    switch (file) {
 709    case PROGRAM_TEMPORARY:
 710       return cvp_make_reg(FILE_REG, REG_TMP0 + index);
 711
 712    case PROGRAM_INPUT:
 713       return cvp_make_reg(FILE_REG, REG_IN0 + index);
 714
 715    case PROGRAM_OUTPUT:
 716       return cvp_make_reg(FILE_REG, REG_OUT0 + index);
 717
 718       /* These two aren't populated by the parser?
 719        */
 720    case PROGRAM_LOCAL_PARAM:
 721       reg = cvp_make_reg(FILE_LOCAL_PARAM, index);
 722       if (rel)
 723          return cvp_emit_rel(cp, reg, tmpreg);
 724       else
 725          return reg;
 726
 727    case PROGRAM_ENV_PARAM:
 728       reg = cvp_make_reg(FILE_ENV_PARAM, index);
 729       if (rel)
 730          return cvp_emit_rel(cp, reg, tmpreg);
 731       else
 732          return reg;
 733
 734    case PROGRAM_STATE_VAR:
 735       reg = cvp_make_reg(FILE_STATE_PARAM, index);
 736       if (rel)
 737          return cvp_emit_rel(cp, reg, tmpreg);
 738       else
 739          return reg;
 740
 741       /* Invalid values:
 742        */
 743    case PROGRAM_WRITE_ONLY:
 744    case PROGRAM_ADDRESS:
 745    default:
 746       assert(0);
 747       return tmpreg;            /* can't happen */
 748    }
 749 }
 750
 751 static struct reg cvp_emit_arg( struct compilation *cp,
 752                                 const struct vp_src_register *src,
 753                                 GLuint arg )
 754 {
 755    struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg );
 756    union instruction rsw, noop;
 757
 758    /* Emit any necessary swizzling.
 759     */
 760    rsw.dword = 0;
 761    rsw.rsw.neg = src->Negate ? WRITEMASK_XYZW : 0;
 762    rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) |
 763                   (GET_SWZ(src->Swizzle, 1) << 2) |
 764                   (GET_SWZ(src->Swizzle, 2) << 4) |
 765                   (GET_SWZ(src->Swizzle, 3) << 6));
 766
 767    noop.dword = 0;
 768    noop.rsw.neg = 0;
 769    noop.rsw.swz = RSW_NOOP;
 770
 771    if (rsw.dword != noop.dword) {
 772       union instruction *op = cvp_next_instruction(cp);
 773       struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg);
 774       op->dword = rsw.dword;
 775       op->rsw.opcode = RSW;
 776       op->rsw.file0 = reg.file;
 777       op->rsw.idx0 = reg.idx;
 778       op->rsw.dst = rsw_reg.idx;
 779       return rsw_reg;
 780    }
 781    else
 782       return reg;
 783 }
 784
 785 static GLuint cvp_choose_result( struct compilation *cp,
 786                                  const struct vp_dst_register *dst,
 787                                  union instruction *fixup )
 788 {
 789    GLuint mask = dst->WriteMask;
 790    GLuint idx;
 791
 792    switch (dst->File) {
 793    case PROGRAM_TEMPORARY:
 794       idx = REG_TMP0 + dst->Index;
 795       break;
 796    case PROGRAM_OUTPUT:
 797       idx = REG_OUT0 + dst->Index;
 798       break;
 799    default:
 800       assert(0);
 801       return REG_RES;           /* can't happen */
 802    }
 803
 804    /* Optimization: When writing (with a writemask) to an undefined
 805     * value for the first time, the writemask may be ignored.
 806     */
 807    if (mask != WRITEMASK_XYZW && (cp->reg_active & (1 << idx))) {
 808       fixup->msk.opcode = MSK;
 809       fixup->msk.dst = idx;
 810       fixup->msk.file = FILE_REG;
 811       fixup->msk.idx = REG_RES;
 812       fixup->msk.mask = mask;
 813       cp->reg_active |= 1 << idx;
 814       return REG_RES;
 815    }
 816    else {
 817       fixup->dword = 0;
 818       cp->reg_active |= 1 << idx;
 819       return idx;
 820    }
 821 }
 822
 823 static struct reg cvp_emit_rsw( struct compilation *cp,
 824                                 GLuint dst,
 825                                 struct reg src,
 826                                 GLuint neg,
 827                                 GLuint swz,
 828                                 GLboolean force)
 829 {
 830    struct reg retval;
 831
 832    if (swz != RSW_NOOP || neg != 0) {
 833       union instruction *op = cvp_next_instruction(cp);
 834       op->rsw.opcode = RSW;
 835       op->rsw.dst = dst;
 836       op->rsw.file0 = src.file;
 837       op->rsw.idx0 = src.idx;
 838       op->rsw.neg = neg;
 839       op->rsw.swz = swz;
 840
 841       retval.file = FILE_REG;
 842       retval.idx = dst;
 843       return retval;
 844    }
 845    else if (force) {
 846       /* Oops.  Degenerate case:
 847        */
 848       union instruction *op = cvp_next_instruction(cp);
 849       op->alu.opcode = VP_OPCODE_MOV;
 850       op->alu.dst = dst;
 851       op->alu.file0 = src.file;
 852       op->alu.idx0 = src.idx;
 853
 854       retval.file = FILE_REG;
 855       retval.idx = dst;
 856       return retval;
 857    }
 858    else {
 859       return src;
 860    }
 861 }
 862
 863
 864 static void cvp_emit_inst( struct compilation *cp,
 865                            const struct vp_instruction *inst )
 866 {
 867    const struct opcode_info *info = &opcode_info[inst->Opcode];
 868    union instruction *op;
 869    union instruction fixup;
 870    struct reg reg[3];
 871    GLuint result, i;
 872
 873    assert(sizeof(*op) == sizeof(GLuint));
 874
 875    /* Need to handle SWZ, ARL specially.
 876     */
 877    switch (inst->Opcode) {
 878       /* Split into mul and add:
 879        */
 880    case VP_OPCODE_MAD:
 881       result = cvp_choose_result( cp, &inst->DstReg, &fixup );
 882       for (i = 0; i < 3; i++)
 883          reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i );
 884
 885       op = cvp_next_instruction(cp);
 886       op->alu.opcode = VP_OPCODE_MUL;
 887       op->alu.file0 = reg[0].file;
 888       op->alu.idx0 = reg[0].idx;
 889       op->alu.file1 = reg[1].file;
 890       op->alu.idx1 = reg[1].idx;
 891       op->alu.dst = REG_ARG0;
 892
 893       op = cvp_next_instruction(cp);
 894       op->alu.opcode = VP_OPCODE_ADD;
 895       op->alu.file0 = FILE_REG;
 896       op->alu.idx0 = REG_ARG0;
 897       op->alu.file1 = reg[2].file;
 898       op->alu.idx1 = reg[2].idx;
 899       op->alu.dst = result;
 900       break;
 901
 902    case VP_OPCODE_ARL:
 903       reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
 904
 905       op = cvp_next_instruction(cp);
 906       op->alu.opcode = VP_OPCODE_FLR;
 907       op->alu.dst = REG_ADDR;
 908       op->alu.file0 = reg[0].file;
 909       op->alu.idx0 = reg[0].idx;
 910       break;
 911
 912    case VP_OPCODE_SWZ: {
 913       GLuint swz0 = 0, swz1 = 0;
 914       GLuint neg0 = 0, neg1 = 0;
 915       GLuint mask = 0;
 916
 917       /* Translate 3-bit-per-element swizzle into two 2-bit swizzles,
 918        * one from the source register the other from a constant
 919        * {0,0,0,1}.
 920        */
 921       for (i = 0; i < 4; i++) {
 922          GLuint swzelt = GET_SWZ(inst->SrcReg[0].Swizzle, i);
 923          if (swzelt >= SWIZZLE_ZERO) {
 924             neg0 |= inst->SrcReg[0].Negate & (1<<i);
 925             if (swzelt == SWIZZLE_ONE)
 926                swz0 |= SWIZZLE_W << (i*2);
 927             else if (i < SWIZZLE_W)
 928                swz0 |= i << (i*2);
 929          }
 930          else {
 931             mask |= 1<<i;
 932             neg1 |= inst->SrcReg[0].Negate & (1<<i);
 933             swz1 |= swzelt << (i*2);
 934          }
 935       }
 936
 937       result = cvp_choose_result( cp, &inst->DstReg, &fixup );
 938       reg[0].file = FILE_REG;
 939       reg[0].idx = REG_ID;
 940       reg[1] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
 941
 942       if (mask == WRITEMASK_XYZW) {
 943          cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE);
 944
 945       }
 946       else if (mask == 0) {
 947          cvp_emit_rsw(cp, result, reg[1], neg1, swz1, GL_TRUE);
 948       }
 949       else {
 950          cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE);
 951          reg[1] = cvp_emit_rsw(cp, REG_ARG0, reg[1], neg1, swz1, GL_FALSE);
 952
 953          op = cvp_next_instruction(cp);
 954          op->msk.opcode = MSK;
 955          op->msk.dst = result;
 956          op->msk.file = reg[1].file;
 957          op->msk.idx = reg[1].idx;
 958          op->msk.mask = mask;
 959       }
 960
 961       if (result == REG_RES) {
 962          op = cvp_next_instruction(cp);
 963          op->dword = fixup.dword;
 964       }
 965       break;
 966    }
 967    case VP_OPCODE_PRINT:
 968    case VP_OPCODE_END:
 969       break;
 970
 971    default:
 972       result = cvp_choose_result( cp, &inst->DstReg, &fixup );
 973       for (i = 0; i < info->nr_args; i++)
 974          reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
 975
 976       op = cvp_next_instruction(cp);
 977       op->alu.opcode = inst->Opcode;
 978       op->alu.file0 = reg[0].file;
 979       op->alu.idx0 = reg[0].idx;
 980       op->alu.file1 = reg[1].file;
 981       op->alu.idx1 = reg[1].idx;
 982       op->alu.dst = result;
 983
 984       if (result == REG_RES) {
 985          op = cvp_next_instruction(cp);
 986          op->dword = fixup.dword;
 987       }
 988       break;
 989    }
 990 }
 991
 992
 993 static void compile_vertex_program( struct arb_vp_machine *m,
 994                                     const struct vertex_program *program )
 995 {
 996    struct compilation cp;
 997    GLuint i;
 998
 999    /* Initialize cp:
1000     */
1001    memset(&cp, 0, sizeof(cp));
1002    cp.VB = m->VB;
1003    cp.csr = m->store;
1004
1005    /* Compile instructions:
1006     */
1007    for (i = 0; i < program->Base.NumInstructions; i++) {
1008       cvp_emit_inst(&cp, &program->Instructions[i]);
1009    }
1010
1011    /* Finish up:
1012     */
1013    m->instructions = m->store;
1014    m->nr_instructions = cp.csr - m->store;
1015
1016
1017    /* Print/disassemble:
1018     */
1019    if (DISASSEM) {
1020       for (i = 0; i < m->nr_instructions; i++) {
1021          _tnl_disassem_vba_insn(m->instructions[i]);
1022       }
1023       _mesa_printf("\n\n");
1024    }
1025
1026 #ifdef USE_SSE_ASM
1027    /* TODO: check if anything changed...
1028     */
1029    if (m->try_codegen)
1030       _tnl_sse_codegen_vertex_program(m);
1031 #endif
1032
1033 }
1034
1035
1036
1037
1038 /* ----------------------------------------------------------------------
1039  * Execution
1040  */
1041 static void userclip( GLcontext *ctx,
1042                       GLvector4f *clip,
1043                       GLubyte *clipmask,
1044                       GLubyte *clipormask,
1045                       GLubyte *clipandmask )
1046 {
1047    GLuint p;
1048
1049    for (p = 0; p < ctx->Const.MaxClipPlanes; p++)
1050       if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1051          GLuint nr, i;
1052          const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1053          const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1054          const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1055          const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1056          GLfloat *coord = (GLfloat *)clip->data;
1057          GLuint stride = clip->stride;
1058          GLuint count = clip->count;
1059
1060          for (nr = 0, i = 0 ; i < count ; i++) {
1061             GLfloat dp = (coord[0] * a +
1062                           coord[1] * b +
1063                           coord[2] * c +
1064                           coord[3] * d);
1065
1066             if (dp < 0) {
1067                nr++;
1068                clipmask[i] |= CLIP_USER_BIT;
1069             }
1070
1071             STRIDE_F(coord, stride);
1072          }
1073
1074          if (nr > 0) {
1075             *clipormask |= CLIP_USER_BIT;
1076             if (nr == count) {
1077                *clipandmask |= CLIP_USER_BIT;
1078                return;
1079             }
1080          }
1081       }
1082 }
1083
1084
1085 static GLboolean do_ndc_cliptest( struct arb_vp_machine *m )
1086 {
1087    GLcontext *ctx = m->ctx;
1088    TNLcontext *tnl = TNL_CONTEXT(ctx);
1089    struct vertex_buffer *VB = m->VB;
1090
1091    /* Cliptest and perspective divide.  Clip functions must clear
1092     * the clipmask.
1093     */
1094    m->ormask = 0;
1095    m->andmask = CLIP_ALL_BITS;
1096
1097    if (tnl->NeedNdcCoords) {
1098       VB->NdcPtr =
1099          _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1100                                             &m->ndcCoords,
1101                                             m->clipmask,
1102                                             &m->ormask,
1103                                             &m->andmask );
1104    }
1105    else {
1106       VB->NdcPtr = NULL;
1107       _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1108                                             NULL,
1109                                             m->clipmask,
1110                                             &m->ormask,
1111                                             &m->andmask );
1112    }
1113
1114    if (m->andmask) {
1115       /* All vertices are outside the frustum */
1116       return GL_FALSE;
1117    }
1118
1119    /* Test userclip planes.  This contributes to VB->ClipMask.
1120     */
1121    if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) {
1122       userclip( ctx,
1123                 VB->ClipPtr,
1124                 m->clipmask,
1125                 &m->ormask,
1126                 &m->andmask );
1127
1128       if (m->andmask) {
1129          return GL_FALSE;
1130       }
1131    }
1132
1133    VB->ClipAndMask = m->andmask;
1134    VB->ClipOrMask = m->ormask;
1135    VB->ClipMask = m->clipmask;
1136
1137    return GL_TRUE;
1138 }
1139
1140
1141 static void call_func( struct arb_vp_machine *m )
1142 {
1143    m->func(m);
1144 }
1145
1146 /**
1147  * Execute the given vertex program.
1148  *
1149  * TODO: Integrate the t_vertex.c code here, to build machine vertices
1150  * directly at this point.
1151  *
1152  * TODO: Eliminate the VB struct entirely and just use
1153  * struct arb_vertex_machine.
1154  */
1155 static GLboolean
1156 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1157 {
1158    struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1159                                      ctx->VertexProgram.Current :
1160                                      ctx->_TnlProgram);
1161    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1162    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1163    GLuint i, j, outputs = program->OutputsWritten;
1164
1165    if (program->Parameters) {
1166       _mesa_load_state_parameters(ctx, program->Parameters);
1167    }
1168
1169
1170    /* Initialize regs where necessary:
1171     */
1172    ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
1173
1174    m->nr_inputs = m->nr_outputs = 0;
1175
1176    for (i = 0; i < 16; i++) {
1177       if (program->InputsRead & (1<<i)) {
1178          GLuint j = m->nr_inputs++;
1179          m->input[j].idx = i;
1180          m->input[j].data = m->VB->AttribPtr[i]->data;
1181          m->input[j].stride = m->VB->AttribPtr[i]->stride;
1182          m->input[j].size = m->VB->AttribPtr[i]->size;
1183          ASSIGN_4V(m->File[0][REG_IN0 + i], 0, 0, 0, 1);
1184       }
1185    }
1186
1187    for (i = 0; i < 15; i++) {
1188       if (program->OutputsWritten & (1<<i)) {
1189          GLuint j = m->nr_outputs++;
1190          m->output[j].idx = i;
1191          m->output[j].data = m->attribs[i].data;
1192       }
1193    }
1194
1195
1196    /* Run the actual program:
1197     */
1198    for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1199       for (j = 0; j < m->nr_inputs; j++) {
1200          GLuint idx = REG_IN0 + m->input[j].idx;
1201          switch (m->input[j].size) {
1202          case 4: m->File[0][idx][3] = m->input[j].data[3];
1203          case 3: m->File[0][idx][2] = m->input[j].data[2];
1204          case 2: m->File[0][idx][1] = m->input[j].data[1];
1205          case 1: m->File[0][idx][0] = m->input[j].data[0];
1206          }
1207
1208          STRIDE_F(m->input[j].data, m->input[j].stride);
1209       }
1210
1211       if (m->func) {
1212          call_func( m );
1213       }
1214       else {
1215          for (j = 0; j < m->nr_instructions; j++) {
1216             union instruction inst = m->instructions[j];
1217             opcode_func[inst.alu.opcode]( m, inst );
1218          }
1219       }
1220
1221       for (j = 0; j < m->nr_outputs; j++) {
1222          GLuint idx = REG_OUT0 + m->output[j].idx;
1223          m->output[j].data[0] = m->File[0][idx][0];
1224          m->output[j].data[1] = m->File[0][idx][1];
1225          m->output[j].data[2] = m->File[0][idx][2];
1226          m->output[j].data[3] = m->File[0][idx][3];
1227          m->output[j].data += 4;
1228       }
1229    }
1230
1231    /* Setup the VB pointers so that the next pipeline stages get
1232     * their data from the right place (the program output arrays).
1233     *
1234     * TODO: 1) Have tnl use these RESULT values for outputs rather
1235     * than trying to shoe-horn inputs and outputs into one set of
1236     * values.
1237     *
1238     * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1239     * and build machine vertices here.
1240     */
1241    VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1242    VB->ClipPtr->count = VB->Count;
1243
1244    if (outputs & (1<<VERT_RESULT_COL0)) {
1245       VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0];
1246       VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0];
1247    }
1248
1249    if (outputs & (1<<VERT_RESULT_BFC0)) {
1250       VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1251    }
1252
1253    if (outputs & (1<<VERT_RESULT_COL1)) {
1254       VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1];
1255       VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0];
1256    }
1257
1258    if (outputs & (1<<VERT_RESULT_BFC1)) {
1259       VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1260    }
1261
1262    if (outputs & (1<<VERT_RESULT_FOGC)) {
1263       VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC];
1264       VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr;
1265    }
1266
1267    if (outputs & (1<<VERT_RESULT_PSIZ)) {
1268       VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ];
1269       VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1270    }
1271
1272    for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1273       if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1274          VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i];
1275          VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i];
1276       }
1277    }
1278
1279 #if 0
1280    for (i = 0; i < VB->Count; i++) {
1281       printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1282              VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1283              VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1284              VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1285              VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1286              VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[0],
1287              VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[1],
1288              VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[2],
1289              VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[3]);
1290    }
1291 #endif
1292
1293    /* Perform NDC and cliptest operations:
1294     */
1295    return do_ndc_cliptest(m);
1296 }
1297
1298
1299 static void
1300 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1301 {
1302    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1303    struct vertex_program *program =
1304       (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0);
1305
1306 #if TNL_FIXED_FUNCTION_PROGRAM
1307    if (!program) {
1308       program = ctx->_TnlProgram;
1309    }
1310 #endif
1311
1312    if (program) {
1313       compile_vertex_program( m, program );
1314
1315       /* Grab the state GL state and put into registers:
1316        */
1317       m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams;
1318       m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters;
1319       m->File[FILE_STATE_PARAM] = program->Parameters->ParameterValues;
1320    }
1321 }
1322
1323
1324
1325
1326
1327
1328
1329 /**
1330  * Called the first time stage->run is called.  In effect, don't
1331  * allocate data until the first time the stage is run.
1332  */
1333 static GLboolean init_vertex_program( GLcontext *ctx,
1334                                       struct tnl_pipeline_stage *stage )
1335 {
1336    TNLcontext *tnl = TNL_CONTEXT(ctx);
1337    struct vertex_buffer *VB = &(tnl->vb);
1338    struct arb_vp_machine *m;
1339    const GLuint size = VB->Size;
1340    GLuint i;
1341
1342    stage->privatePtr = MALLOC(sizeof(*m));
1343    m = ARB_VP_MACHINE(stage);
1344    if (!m)
1345       return GL_FALSE;
1346
1347    /* arb_vertex_machine struct should subsume the VB:
1348     */
1349    m->VB = VB;
1350    m->ctx = ctx;
1351
1352    m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
1353
1354    if (_mesa_getenv("MESA_EXPERIMENTAL"))
1355       m->try_codegen = 1;
1356
1357    _mesa_printf("try_codegen %d\n", m->try_codegen);
1358
1359    /* Allocate arrays of vertex output values */
1360    for (i = 0; i < VERT_RESULT_MAX; i++) {
1361       _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1362       m->attribs[i].size = 4;
1363    }
1364
1365    /* a few other misc allocations */
1366    _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1367    m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1368
1369
1370 #if TNL_FIXED_FUNCTION_PROGRAM
1371    _mesa_allow_light_in_model( ctx, GL_FALSE );
1372 #endif
1373
1374
1375    return GL_TRUE;
1376 }
1377
1378
1379
1380
1381 /**
1382  * Destructor for this pipeline stage.
1383  */
1384 static void dtr( struct tnl_pipeline_stage *stage )
1385 {
1386    struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1387
1388    if (m) {
1389       GLuint i;
1390
1391       /* free the vertex program result arrays */
1392       for (i = 0; i < VERT_RESULT_MAX; i++)
1393          _mesa_vector4f_free( &m->attribs[i] );
1394
1395       /* free misc arrays */
1396       _mesa_vector4f_free( &m->ndcCoords );
1397       ALIGN_FREE( m->clipmask );
1398       ALIGN_FREE( m->File[0] );
1399
1400       FREE( m );
1401       stage->privatePtr = NULL;
1402    }
1403 }
1404
1405 /**
1406  * Public description of this pipeline stage.
1407  */
1408 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1409 {
1410    "vertex-program",
1411    NULL,                        /* private_data */
1412    init_vertex_program,         /* create */
1413    dtr,                         /* destroy */
1414    validate_vertex_program,     /* validate */
1415    run_arb_vertex_program       /* run */
1416 };