src/gallium/docs/source/tgsi.rst

   1 TGSI
   2 ====
   3
   4 TGSI, Tungsten Graphics Shader Instructions, is an intermediate language
   5 for describing shaders. Since Gallium is inherently shaderful, shaders are
   6 an important part of the API. TGSI is the only intermediate representation
   7 used by all drivers.
   8
   9 From GL_NV_vertex_program
  10 -------------------------
  11
  12
  13 ARL - Address Register Load
  14
  15 .. math::
  16
  17   dst.x = \lfloor src.x\rfloor
  18
  19   dst.y = \lfloor src.y\rfloor
  20
  21   dst.z = \lfloor src.z\rfloor
  22
  23   dst.w = \lfloor src.w\rfloor
  24
  25
  26 MOV - Move
  27
  28 .. math::
  29
  30   dst.x = src.x
  31
  32   dst.y = src.y
  33
  34   dst.z = src.z
  35
  36   dst.w = src.w
  37
  38
  39 LIT - Light Coefficients
  40
  41 .. math::
  42
  43   dst.x = 1.0
  44
  45   dst.y = max(src.x, 0.0)
  46
  47   dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
  48
  49   dst.w = 1.0
  50
  51
  52 RCP - Reciprocal
  53
  54 .. math::
  55
  56   dst.x = 1.0 / src.x
  57
  58   dst.y = 1.0 / src.x
  59
  60   dst.z = 1.0 / src.x
  61
  62   dst.w = 1.0 / src.x
  63
  64
  65 RSQ - Reciprocal Square Root
  66
  67 .. math::
  68
  69   dst.x = 1.0 / sqrt(abs(src.x))
  70
  71   dst.y = 1.0 / sqrt(abs(src.x))
  72
  73   dst.z = 1.0 / sqrt(abs(src.x))
  74
  75   dst.w = 1.0 / sqrt(abs(src.x))
  76
  77
  78 EXP - Approximate Exponential Base 2
  79
  80 .. math::
  81
  82   dst.x = pow(2.0, \lfloor src.x\rfloor)
  83
  84   dst.y = src.x - \lfloor src.x\rfloor
  85
  86   dst.z = pow(2.0, src.x)
  87
  88   dst.w = 1.0
  89
  90
  91 LOG - Approximate Logarithm Base 2
  92
  93 .. math::
  94
  95   dst.x = \lfloor lg2(abs(src.x)))\rfloor
  96
  97   dst.y = abs(src.x) / pow(2.0, \lfloor lg2(abs(src.x))\rfloor )
  98
  99   dst.z = lg2(abs(src.x))
 100
 101   dst.w = 1.0
 102
 103
 104 MUL - Multiply
 105
 106 .. math::
 107
 108   dst.x = src0.x * src1.x
 109
 110   dst.y = src0.y * src1.y
 111
 112   dst.z = src0.z * src1.z
 113
 114   dst.w = src0.w * src1.w
 115
 116
 117 ADD - Add
 118
 119 .. math::
 120
 121   dst.x = src0.x + src1.x
 122
 123   dst.y = src0.y + src1.y
 124
 125   dst.z = src0.z + src1.z
 126
 127   dst.w = src0.w + src1.w
 128
 129
 130 DP3 - 3-component Dot Product
 131
 132 .. math::
 133
 134   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 135
 136   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 137
 138   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 139
 140   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 141
 142
 143 DP4 - 4-component Dot Product
 144
 145 .. math::
 146
 147   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 148
 149   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 150
 151   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 152
 153   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 154
 155
 156 DST - Distance Vector
 157
 158 .. math::
 159
 160   dst.x = 1.0
 161
 162   dst.y = src0.y * src1.y
 163
 164   dst.z = src0.z
 165
 166   dst.w = src1.w
 167
 168
 169 MIN - Minimum
 170
 171 .. math::
 172
 173   dst.x = min(src0.x, src1.x)
 174
 175   dst.y = min(src0.y, src1.y)
 176
 177   dst.z = min(src0.z, src1.z)
 178
 179   dst.w = min(src0.w, src1.w)
 180
 181
 182 MAX - Maximum
 183
 184 .. math::
 185
 186   dst.x = max(src0.x, src1.x)
 187
 188   dst.y = max(src0.y, src1.y)
 189
 190   dst.z = max(src0.z, src1.z)
 191
 192   dst.w = max(src0.w, src1.w)
 193
 194
 195 SLT - Set On Less Than
 196
 197 .. math::
 198
 199   dst.x = (src0.x < src1.x) ? 1.0 : 0.0
 200
 201   dst.y = (src0.y < src1.y) ? 1.0 : 0.0
 202
 203   dst.z = (src0.z < src1.z) ? 1.0 : 0.0
 204
 205   dst.w = (src0.w < src1.w) ? 1.0 : 0.0
 206
 207
 208 SGE - Set On Greater Equal Than
 209
 210 .. math::
 211
 212   dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
 213
 214   dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
 215
 216   dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
 217
 218   dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
 219
 220
 221 MAD - Multiply And Add
 222
 223 .. math::
 224
 225   dst.x = src0.x * src1.x + src2.x
 226
 227   dst.y = src0.y * src1.y + src2.y
 228
 229   dst.z = src0.z * src1.z + src2.z
 230
 231   dst.w = src0.w * src1.w + src2.w
 232
 233
 234 SUB - Subtract
 235
 236 .. math::
 237
 238   dst.x = src0.x - src1.x
 239
 240   dst.y = src0.y - src1.y
 241
 242   dst.z = src0.z - src1.z
 243
 244   dst.w = src0.w - src1.w
 245
 246
 247 LRP - Linear Interpolate
 248
 249 .. math::
 250
 251   dst.x = src0.x * (src1.x - src2.x) + src2.x
 252
 253   dst.y = src0.y * (src1.y - src2.y) + src2.y
 254
 255   dst.z = src0.z * (src1.z - src2.z) + src2.z
 256
 257   dst.w = src0.w * (src1.w - src2.w) + src2.w
 258
 259
 260 CND - Condition
 261
 262 .. math::
 263
 264   dst.x = (src2.x > 0.5) ? src0.x : src1.x
 265
 266   dst.y = (src2.y > 0.5) ? src0.y : src1.y
 267
 268   dst.z = (src2.z > 0.5) ? src0.z : src1.z
 269
 270   dst.w = (src2.w > 0.5) ? src0.w : src1.w
 271
 272
 273 DP2A - 2-component Dot Product And Add
 274
 275 .. math::
 276
 277   dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
 278
 279   dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
 280
 281   dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
 282
 283   dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
 284
 285
 286 FRAC - Fraction
 287
 288 .. math::
 289
 290   dst.x = src.x - \lfloor src.x\rfloor
 291
 292   dst.y = src.y - \lfloor src.y\rfloor
 293
 294   dst.z = src.z - \lfloor src.z\rfloor
 295
 296   dst.w = src.w - \lfloor src.w\rfloor
 297
 298
 299 CLAMP - Clamp
 300
 301 .. math::
 302
 303   dst.x = clamp(src0.x, src1.x, src2.x)
 304   dst.y = clamp(src0.y, src1.y, src2.y)
 305   dst.z = clamp(src0.z, src1.z, src2.z)
 306   dst.w = clamp(src0.w, src1.w, src2.w)
 307
 308
 309 FLR - Floor
 310
 311 This is identical to ARL.
 312
 313 .. math::
 314
 315   dst.x = \lfloor src.x\rfloor
 316
 317   dst.y = \lfloor src.y\rfloor
 318
 319   dst.z = \lfloor src.z\rfloor
 320
 321   dst.w = \lfloor src.w\rfloor
 322
 323
 324 1.3.9  ROUND - Round
 325
 326 .. math::
 327
 328   dst.x = round(src.x)
 329   dst.y = round(src.y)
 330   dst.z = round(src.z)
 331   dst.w = round(src.w)
 332
 333
 334 1.3.10  EX2 - Exponential Base 2
 335
 336 .. math::
 337
 338   dst.x = pow(2.0, src.x)
 339   dst.y = pow(2.0, src.x)
 340   dst.z = pow(2.0, src.x)
 341   dst.w = pow(2.0, src.x)
 342
 343
 344 1.3.11  LG2 - Logarithm Base 2
 345
 346 .. math::
 347
 348   dst.x = lg2(src.x)
 349   dst.y = lg2(src.x)
 350   dst.z = lg2(src.x)
 351   dst.w = lg2(src.x)
 352
 353
 354 1.3.12  POW - Power
 355
 356 .. math::
 357
 358   dst.x = pow(src0.x, src1.x)
 359   dst.y = pow(src0.x, src1.x)
 360   dst.z = pow(src0.x, src1.x)
 361   dst.w = pow(src0.x, src1.x)
 362
 363 1.3.15  XPD - Cross Product
 364
 365 .. math::
 366
 367   dst.x = src0.y * src1.z - src1.y * src0.z
 368   dst.y = src0.z * src1.x - src1.z * src0.x
 369   dst.z = src0.x * src1.y - src1.x * src0.y
 370   dst.w = 1.0
 371
 372
 373 1.4.1  ABS - Absolute
 374
 375 .. math::
 376
 377   dst.x = abs(src.x)
 378   dst.y = abs(src.y)
 379   dst.z = abs(src.z)
 380   dst.w = abs(src.w)
 381
 382
 383 1.4.2  RCC - Reciprocal Clamped
 384
 385 .. math::
 386
 387   dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 388   dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 389   dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 390   dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 391
 392
 393 1.4.3  DPH - Homogeneous Dot Product
 394
 395 .. math::
 396
 397   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 398   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 399   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 400   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 401
 402
 403 COS - Cosine
 404
 405 .. math::
 406
 407   dst.x = \cos{src.x}
 408
 409   dst.y = \cos{src.x}
 410
 411   dst.z = \cos{src.x}
 412
 413   dst.w = \cos{src.w}
 414
 415
 416 1.5.2  DDX - Derivative Relative To X
 417
 418 .. math::
 419
 420   dst.x = partialx(src.x)
 421   dst.y = partialx(src.y)
 422   dst.z = partialx(src.z)
 423   dst.w = partialx(src.w)
 424
 425
 426 1.5.3  DDY - Derivative Relative To Y
 427
 428 .. math::
 429
 430   dst.x = partialy(src.x)
 431   dst.y = partialy(src.y)
 432   dst.z = partialy(src.z)
 433   dst.w = partialy(src.w)
 434
 435
 436 1.5.7  KILP - Predicated Discard
 437
 438 .. math::
 439
 440   discard
 441
 442
 443 1.5.10  PK2H - Pack Two 16-bit Floats
 444
 445   TBD
 446
 447
 448 1.5.11  PK2US - Pack Two Unsigned 16-bit Scalars
 449
 450   TBD
 451
 452
 453 1.5.12  PK4B - Pack Four Signed 8-bit Scalars
 454
 455   TBD
 456
 457
 458 1.5.13  PK4UB - Pack Four Unsigned 8-bit Scalars
 459
 460   TBD
 461
 462
 463 1.5.15  RFL - Reflection Vector
 464
 465 .. math::
 466
 467   dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
 468   dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
 469   dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
 470   dst.w = 1.0
 471
 472 Considered for removal.
 473
 474
 475 1.5.16  SEQ - Set On Equal
 476
 477 .. math::
 478
 479   dst.x = (src0.x == src1.x) ? 1.0 : 0.0
 480   dst.y = (src0.y == src1.y) ? 1.0 : 0.0
 481   dst.z = (src0.z == src1.z) ? 1.0 : 0.0
 482   dst.w = (src0.w == src1.w) ? 1.0 : 0.0
 483
 484
 485 1.5.17  SFL - Set On False
 486
 487 .. math::
 488
 489   dst.x = 0.0
 490   dst.y = 0.0
 491   dst.z = 0.0
 492   dst.w = 0.0
 493
 494 Considered for removal.
 495
 496 1.5.18  SGT - Set On Greater Than
 497
 498 .. math::
 499
 500   dst.x = (src0.x > src1.x) ? 1.0 : 0.0
 501   dst.y = (src0.y > src1.y) ? 1.0 : 0.0
 502   dst.z = (src0.z > src1.z) ? 1.0 : 0.0
 503   dst.w = (src0.w > src1.w) ? 1.0 : 0.0
 504
 505
 506 SIN - Sine
 507
 508 .. math::
 509
 510   dst.x = \sin{src.x}
 511
 512   dst.y = \sin{src.x}
 513
 514   dst.z = \sin{src.x}
 515
 516   dst.w = \sin{src.w}
 517
 518
 519 1.5.20  SLE - Set On Less Equal Than
 520
 521 .. math::
 522
 523   dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
 524   dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
 525   dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
 526   dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
 527
 528
 529 1.5.21  SNE - Set On Not Equal
 530
 531 .. math::
 532
 533   dst.x = (src0.x != src1.x) ? 1.0 : 0.0
 534   dst.y = (src0.y != src1.y) ? 1.0 : 0.0
 535   dst.z = (src0.z != src1.z) ? 1.0 : 0.0
 536   dst.w = (src0.w != src1.w) ? 1.0 : 0.0
 537
 538
 539 1.5.22  STR - Set On True
 540
 541 .. math::
 542
 543   dst.x = 1.0
 544   dst.y = 1.0
 545   dst.z = 1.0
 546   dst.w = 1.0
 547
 548
 549 1.5.23  TEX - Texture Lookup
 550
 551   TBD
 552
 553
 554 1.5.24  TXD - Texture Lookup with Derivatives
 555
 556   TBD
 557
 558
 559 1.5.25  TXP - Projective Texture Lookup
 560
 561   TBD
 562
 563
 564 1.5.26  UP2H - Unpack Two 16-Bit Floats
 565
 566   TBD
 567
 568   Considered for removal.
 569
 570 1.5.27  UP2US - Unpack Two Unsigned 16-Bit Scalars
 571
 572   TBD
 573
 574   Considered for removal.
 575
 576 1.5.28  UP4B - Unpack Four Signed 8-Bit Values
 577
 578   TBD
 579
 580   Considered for removal.
 581
 582 1.5.29  UP4UB - Unpack Four Unsigned 8-Bit Scalars
 583
 584   TBD
 585
 586   Considered for removal.
 587
 588 1.5.30  X2D - 2D Coordinate Transformation
 589
 590 .. math::
 591
 592   dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
 593   dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
 594   dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
 595   dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
 596
 597 Considered for removal.
 598
 599
 600 1.6  GL_NV_vertex_program2
 601 --------------------------
 602
 603
 604 1.6.1  ARA - Address Register Add
 605
 606   TBD
 607
 608   Considered for removal.
 609
 610 1.6.2  ARR - Address Register Load With Round
 611
 612 .. math::
 613
 614   dst.x = round(src.x)
 615   dst.y = round(src.y)
 616   dst.z = round(src.z)
 617   dst.w = round(src.w)
 618
 619
 620 1.6.3  BRA - Branch
 621
 622   pc = target
 623
 624   Considered for removal.
 625
 626 1.6.4  CAL - Subroutine Call
 627
 628   push(pc)
 629   pc = target
 630
 631
 632 1.6.5  RET - Subroutine Call Return
 633
 634   pc = pop()
 635
 636   Potential restrictions:
 637   * Only occurs at end of function.
 638
 639 1.6.6  SSG - Set Sign
 640
 641 .. math::
 642
 643   dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
 644   dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
 645   dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
 646   dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
 647
 648
 649 1.8.1  CMP - Compare
 650
 651 .. math::
 652
 653   dst.x = (src0.x < 0.0) ? src1.x : src2.x
 654   dst.y = (src0.y < 0.0) ? src1.y : src2.y
 655   dst.z = (src0.z < 0.0) ? src1.z : src2.z
 656   dst.w = (src0.w < 0.0) ? src1.w : src2.w
 657
 658
 659 1.8.2  KIL - Conditional Discard
 660
 661 .. math::
 662
 663   if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
 664     discard
 665   endif
 666
 667
 668 SCS - Sine Cosine
 669
 670 .. math::
 671
 672   dst.x = \cos{src.x}
 673
 674   dst.y = \sin{src.x}
 675
 676   dst.z = 0.0
 677
 678   dst.y = 1.0
 679
 680
 681 1.8.4  TXB - Texture Lookup With Bias
 682
 683   TBD
 684
 685
 686 1.9.1  NRM - 3-component Vector Normalise
 687
 688 .. math::
 689
 690   dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
 691   dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
 692   dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
 693   dst.w = 1.0
 694
 695
 696 1.9.2  DIV - Divide
 697
 698 .. math::
 699
 700   dst.x = src0.x / src1.x
 701   dst.y = src0.y / src1.y
 702   dst.z = src0.z / src1.z
 703   dst.w = src0.w / src1.w
 704
 705
 706 1.9.3  DP2 - 2-component Dot Product
 707
 708 .. math::
 709
 710   dst.x = src0.x * src1.x + src0.y * src1.y
 711   dst.y = src0.x * src1.x + src0.y * src1.y
 712   dst.z = src0.x * src1.x + src0.y * src1.y
 713   dst.w = src0.x * src1.x + src0.y * src1.y
 714
 715
 716 1.9.5  TXL - Texture Lookup With LOD
 717
 718   TBD
 719
 720
 721 1.9.6  BRK - Break
 722
 723   TBD
 724
 725
 726 1.9.7  IF - If
 727
 728   TBD
 729
 730
 731 1.9.8  BGNFOR - Begin a For-Loop
 732
 733   dst.x = floor(src.x)
 734   dst.y = floor(src.y)
 735   dst.z = floor(src.z)
 736
 737   if (dst.y <= 0)
 738     pc = [matching ENDFOR] + 1
 739   endif
 740
 741   Note: The destination must be a loop register.
 742         The source must be a constant register.
 743
 744   Considered for cleanup / removal.
 745
 746
 747 1.9.9  REP - Repeat
 748
 749   TBD
 750
 751
 752 1.9.10  ELSE - Else
 753
 754   TBD
 755
 756
 757 1.9.11  ENDIF - End If
 758
 759   TBD
 760
 761
 762 1.9.12  ENDFOR - End a For-Loop
 763
 764   dst.x = dst.x + dst.z
 765   dst.y = dst.y - 1.0
 766
 767   if (dst.y > 0)
 768     pc = [matching BGNFOR instruction] + 1
 769   endif
 770
 771   Note: The destination must be a loop register.
 772
 773   Considered for cleanup / removal.
 774
 775 1.9.13  ENDREP - End Repeat
 776
 777   TBD
 778
 779
 780 1.10.1  PUSHA - Push Address Register On Stack
 781
 782   push(src.x)
 783   push(src.y)
 784   push(src.z)
 785   push(src.w)
 786
 787   Considered for cleanup / removal.
 788
 789 1.10.2  POPA - Pop Address Register From Stack
 790
 791   dst.w = pop()
 792   dst.z = pop()
 793   dst.y = pop()
 794   dst.x = pop()
 795
 796   Considered for cleanup / removal.
 797
 798
 799 1.11  GL_NV_gpu_program4
 800 ------------------------
 801
 802 Support for these opcodes indicated by a special pipe capability bit (TBD).
 803
 804 1.11.1  CEIL - Ceiling
 805
 806 .. math::
 807
 808   dst.x = ceil(src.x)
 809   dst.y = ceil(src.y)
 810   dst.z = ceil(src.z)
 811   dst.w = ceil(src.w)
 812
 813
 814 1.11.2  I2F - Integer To Float
 815
 816 .. math::
 817
 818   dst.x = (float) src.x
 819   dst.y = (float) src.y
 820   dst.z = (float) src.z
 821   dst.w = (float) src.w
 822
 823
 824 1.11.3  NOT - Bitwise Not
 825
 826 .. math::
 827
 828   dst.x = ~src.x
 829   dst.y = ~src.y
 830   dst.z = ~src.z
 831   dst.w = ~src.w
 832
 833
 834 1.11.4  TRUNC - Truncate
 835
 836 .. math::
 837
 838   dst.x = trunc(src.x)
 839   dst.y = trunc(src.y)
 840   dst.z = trunc(src.z)
 841   dst.w = trunc(src.w)
 842
 843
 844 1.11.5  SHL - Shift Left
 845
 846 .. math::
 847
 848   dst.x = src0.x << src1.x
 849   dst.y = src0.y << src1.x
 850   dst.z = src0.z << src1.x
 851   dst.w = src0.w << src1.x
 852
 853
 854 1.11.6  SHR - Shift Right
 855
 856 .. math::
 857
 858   dst.x = src0.x >> src1.x
 859   dst.y = src0.y >> src1.x
 860   dst.z = src0.z >> src1.x
 861   dst.w = src0.w >> src1.x
 862
 863
 864 1.11.7  AND - Bitwise And
 865
 866 .. math::
 867
 868   dst.x = src0.x & src1.x
 869   dst.y = src0.y & src1.y
 870   dst.z = src0.z & src1.z
 871   dst.w = src0.w & src1.w
 872
 873
 874 1.11.8  OR - Bitwise Or
 875
 876 .. math::
 877
 878   dst.x = src0.x | src1.x
 879   dst.y = src0.y | src1.y
 880   dst.z = src0.z | src1.z
 881   dst.w = src0.w | src1.w
 882
 883
 884 1.11.9  MOD - Modulus
 885
 886 .. math::
 887
 888   dst.x = src0.x % src1.x
 889   dst.y = src0.y % src1.y
 890   dst.z = src0.z % src1.z
 891   dst.w = src0.w % src1.w
 892
 893
 894 1.11.10  XOR - Bitwise Xor
 895
 896 .. math::
 897
 898   dst.x = src0.x ^ src1.x
 899   dst.y = src0.y ^ src1.y
 900   dst.z = src0.z ^ src1.z
 901   dst.w = src0.w ^ src1.w
 902
 903
 904 1.11.11  SAD - Sum Of Absolute Differences
 905
 906 .. math::
 907
 908   dst.x = abs(src0.x - src1.x) + src2.x
 909   dst.y = abs(src0.y - src1.y) + src2.y
 910   dst.z = abs(src0.z - src1.z) + src2.z
 911   dst.w = abs(src0.w - src1.w) + src2.w
 912
 913
 914 1.11.12  TXF - Texel Fetch
 915
 916   TBD
 917
 918
 919 1.11.13  TXQ - Texture Size Query
 920
 921   TBD
 922
 923
 924 1.11.14  CONT - Continue
 925
 926   TBD
 927
 928
 929 1.12  GL_NV_geometry_program4
 930 -----------------------------
 931
 932
 933 1.12.1  EMIT - Emit
 934
 935   TBD
 936
 937
 938 1.12.2  ENDPRIM - End Primitive
 939
 940   TBD
 941
 942
 943 1.13  GLSL
 944 ----------
 945
 946
 947 1.13.1  BGNLOOP - Begin a Loop
 948
 949   TBD
 950
 951
 952 1.13.2  BGNSUB - Begin Subroutine
 953
 954   TBD
 955
 956
 957 1.13.3  ENDLOOP - End a Loop
 958
 959   TBD
 960
 961
 962 1.13.4  ENDSUB - End Subroutine
 963
 964   TBD
 965
 966
 967
 968 1.13.10  NOP - No Operation
 969
 970   Do nothing.
 971
 972
 973
 974 1.16.7  NRM4 - 4-component Vector Normalise
 975
 976 .. math::
 977
 978   dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 979   dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 980   dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 981   dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 982
 983
 984 1.17  ps_2_x
 985 ------------
 986
 987
 988 1.17.2  CALLNZ - Subroutine Call If Not Zero
 989
 990   TBD
 991
 992
 993 1.17.3  IFC - If
 994
 995   TBD
 996
 997
 998 1.17.5  BREAKC - Break Conditional
 999
1000   TBD
1001
1002
1003 2  Explanation of symbols used
1004 ==============================
1005
1006
1007 2.1  Functions
1008 --------------
1009
1010
1011   abs(x)            Absolute value of x.
1012                     (x < 0.0) ? -x : x
1013
1014   ceil(x)           Ceiling of x.
1015
1016   clamp(x,y,z)      Clamp x between y and z.
1017                     (x < y) ? y : (x > z) ? z : x
1018
1019   :math:`\lfloor x\rfloor` Floor of x.
1020
1021   lg2(x)            Logarithm base 2 of x.
1022
1023   max(x,y)          Maximum of x and y.
1024                     (x > y) ? x : y
1025
1026   min(x,y)          Minimum of x and y.
1027                     (x < y) ? x : y
1028
1029   partialx(x)       Derivative of x relative to fragment's X.
1030
1031   partialy(x)       Derivative of x relative to fragment's Y.
1032
1033   pop()             Pop from stack.
1034
1035   pow(x,y)          Raise x to power of y.
1036
1037   push(x)           Push x on stack.
1038
1039   round(x)          Round x.
1040
1041   sqrt(x)           Square root of x.
1042
1043   trunc(x)          Truncate x.
1044
1045
1046 2.2  Keywords
1047 -------------
1048
1049
1050   discard           Discard fragment.
1051
1052   dst               First destination register.
1053
1054   dst0              First destination register.
1055
1056   pc                Program counter.
1057
1058   src               First source register.
1059
1060   src0              First source register.
1061
1062   src1              Second source register.
1063
1064   src2              Third source register.
1065
1066   target            Label of target instruction.
1067
1068
1069 3  Other tokens
1070 ===============
1071
1072
1073 3.1  Declaration Semantic
1074 -------------------------
1075
1076
1077   Follows Declaration token if Semantic bit is set.
1078
1079   Since its purpose is to link a shader with other stages of the pipeline,
1080   it is valid to follow only those Declaration tokens that declare a register
1081   either in INPUT or OUTPUT file.
1082
1083   SemanticName field contains the semantic name of the register being declared.
1084   There is no default value.
1085
1086   SemanticIndex is an optional subscript that can be used to distinguish
1087   different register declarations with the same semantic name. The default value
1088   is 0.
1089
1090   The meanings of the individual semantic names are explained in the following
1091   sections.
1092
1093
1094 3.1.1  FACE
1095
1096   Valid only in a fragment shader INPUT declaration.
1097
1098   FACE.x is negative when the primitive is back facing. FACE.x is positive
1099   when the primitive is front facing.