src/gallium/docs/source/tgsi.rst

   1 TGSI
   2 ====
   3
   4 TGSI, Tungsten Graphics Shader Instructions, is an intermediate language
   5 for describing shaders. Since Gallium is inherently shaderful, shaders are
   6 an important part of the API. TGSI is the only intermediate representation
   7 used by all drivers.
   8
   9 From GL_NV_vertex_program
  10 -------------------------
  11
  12
  13 ARL - Address Register Load
  14
  15 .. math::
  16
  17   dst.x = \lfloor src.x\rfloor
  18
  19   dst.y = \lfloor src.y\rfloor
  20
  21   dst.z = \lfloor src.z\rfloor
  22
  23   dst.w = \lfloor src.w\rfloor
  24
  25
  26 MOV - Move
  27
  28 .. math::
  29
  30   dst.x = src.x
  31
  32   dst.y = src.y
  33
  34   dst.z = src.z
  35
  36   dst.w = src.w
  37
  38
  39 LIT - Light Coefficients
  40
  41 .. math::
  42
  43   dst.x = 1.0
  44
  45   dst.y = max(src.x, 0.0)
  46
  47   dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
  48
  49   dst.w = 1.0
  50
  51
  52 RCP - Reciprocal
  53
  54 .. math::
  55
  56   dst.x = \frac{1}{src.x}
  57
  58   dst.y = \frac{1}{src.x}
  59
  60   dst.z = \frac{1}{src.x}
  61
  62   dst.w = \frac{1}{src.x}
  63
  64
  65 RSQ - Reciprocal Square Root
  66
  67 .. math::
  68
  69   dst.x = \frac{1}{\sqrt{|src.x|}}
  70
  71   dst.y = \frac{1}{\sqrt{|src.x|}}
  72
  73   dst.z = \frac{1}{\sqrt{|src.x|}}
  74
  75   dst.w = \frac{1}{\sqrt{|src.x|}}
  76
  77
  78 EXP - Approximate Exponential Base 2
  79
  80 .. math::
  81
  82   dst.x = 2^{\lfloor src.x\rfloor}
  83
  84   dst.y = src.x - \lfloor src.x\rfloor
  85
  86   dst.z = 2^{src.x}
  87
  88   dst.w = 1.0
  89
  90
  91 LOG - Approximate Logarithm Base 2
  92
  93 .. math::
  94
  95   dst.x = \lfloor\log_2{|src.x|}\rfloor
  96
  97   dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
  98
  99   dst.z = \log_2{|src.x|}
 100
 101   dst.w = 1
 102
 103
 104 MUL - Multiply
 105
 106 .. math::
 107
 108   dst.x = src0.x * src1.x
 109
 110   dst.y = src0.y * src1.y
 111
 112   dst.z = src0.z * src1.z
 113
 114   dst.w = src0.w * src1.w
 115
 116
 117 ADD - Add
 118
 119 .. math::
 120
 121   dst.x = src0.x + src1.x
 122
 123   dst.y = src0.y + src1.y
 124
 125   dst.z = src0.z + src1.z
 126
 127   dst.w = src0.w + src1.w
 128
 129
 130 DP3 - 3-component Dot Product
 131
 132 .. math::
 133
 134   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 135
 136   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 137
 138   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 139
 140   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
 141
 142
 143 DP4 - 4-component Dot Product
 144
 145 .. math::
 146
 147   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 148
 149   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 150
 151   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 152
 153   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
 154
 155
 156 DST - Distance Vector
 157
 158 .. math::
 159
 160   dst.x = 1.0
 161
 162   dst.y = src0.y * src1.y
 163
 164   dst.z = src0.z
 165
 166   dst.w = src1.w
 167
 168
 169 MIN - Minimum
 170
 171 .. math::
 172
 173   dst.x = min(src0.x, src1.x)
 174
 175   dst.y = min(src0.y, src1.y)
 176
 177   dst.z = min(src0.z, src1.z)
 178
 179   dst.w = min(src0.w, src1.w)
 180
 181
 182 MAX - Maximum
 183
 184 .. math::
 185
 186   dst.x = max(src0.x, src1.x)
 187
 188   dst.y = max(src0.y, src1.y)
 189
 190   dst.z = max(src0.z, src1.z)
 191
 192   dst.w = max(src0.w, src1.w)
 193
 194
 195 SLT - Set On Less Than
 196
 197 .. math::
 198
 199   dst.x = (src0.x < src1.x) ? 1.0 : 0.0
 200
 201   dst.y = (src0.y < src1.y) ? 1.0 : 0.0
 202
 203   dst.z = (src0.z < src1.z) ? 1.0 : 0.0
 204
 205   dst.w = (src0.w < src1.w) ? 1.0 : 0.0
 206
 207
 208 SGE - Set On Greater Equal Than
 209
 210 .. math::
 211
 212   dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
 213
 214   dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
 215
 216   dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
 217
 218   dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
 219
 220
 221 MAD - Multiply And Add
 222
 223 .. math::
 224
 225   dst.x = src0.x * src1.x + src2.x
 226
 227   dst.y = src0.y * src1.y + src2.y
 228
 229   dst.z = src0.z * src1.z + src2.z
 230
 231   dst.w = src0.w * src1.w + src2.w
 232
 233
 234 SUB - Subtract
 235
 236 .. math::
 237
 238   dst.x = src0.x - src1.x
 239
 240   dst.y = src0.y - src1.y
 241
 242   dst.z = src0.z - src1.z
 243
 244   dst.w = src0.w - src1.w
 245
 246
 247 LRP - Linear Interpolate
 248
 249 .. math::
 250
 251   dst.x = src0.x * (src1.x - src2.x) + src2.x
 252
 253   dst.y = src0.y * (src1.y - src2.y) + src2.y
 254
 255   dst.z = src0.z * (src1.z - src2.z) + src2.z
 256
 257   dst.w = src0.w * (src1.w - src2.w) + src2.w
 258
 259
 260 CND - Condition
 261
 262 .. math::
 263
 264   dst.x = (src2.x > 0.5) ? src0.x : src1.x
 265
 266   dst.y = (src2.y > 0.5) ? src0.y : src1.y
 267
 268   dst.z = (src2.z > 0.5) ? src0.z : src1.z
 269
 270   dst.w = (src2.w > 0.5) ? src0.w : src1.w
 271
 272
 273 DP2A - 2-component Dot Product And Add
 274
 275 .. math::
 276
 277   dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
 278
 279   dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
 280
 281   dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
 282
 283   dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
 284
 285
 286 FRAC - Fraction
 287
 288 .. math::
 289
 290   dst.x = src.x - \lfloor src.x\rfloor
 291
 292   dst.y = src.y - \lfloor src.y\rfloor
 293
 294   dst.z = src.z - \lfloor src.z\rfloor
 295
 296   dst.w = src.w - \lfloor src.w\rfloor
 297
 298
 299 CLAMP - Clamp
 300
 301 .. math::
 302
 303   dst.x = clamp(src0.x, src1.x, src2.x)
 304   dst.y = clamp(src0.y, src1.y, src2.y)
 305   dst.z = clamp(src0.z, src1.z, src2.z)
 306   dst.w = clamp(src0.w, src1.w, src2.w)
 307
 308
 309 FLR - Floor
 310
 311 This is identical to ARL.
 312
 313 .. math::
 314
 315   dst.x = \lfloor src.x\rfloor
 316
 317   dst.y = \lfloor src.y\rfloor
 318
 319   dst.z = \lfloor src.z\rfloor
 320
 321   dst.w = \lfloor src.w\rfloor
 322
 323
 324 1.3.9  ROUND - Round
 325
 326 .. math::
 327
 328   dst.x = round(src.x)
 329   dst.y = round(src.y)
 330   dst.z = round(src.z)
 331   dst.w = round(src.w)
 332
 333
 334 EX2 - Exponential Base 2
 335
 336 .. math::
 337
 338   dst.x = 2^{src.x}
 339
 340   dst.y = 2^{src.x}
 341
 342   dst.z = 2^{src.x}
 343
 344   dst.w = 2^{src.x}
 345
 346
 347 LG2 - Logarithm Base 2
 348
 349 .. math::
 350
 351   dst.x = \log_2{src.x}
 352
 353   dst.y = \log_2{src.x}
 354
 355   dst.z = \log_2{src.x}
 356
 357   dst.w = \log_2{src.x}
 358
 359
 360 POW - Power
 361
 362 .. math::
 363
 364   dst.x = src0.x^{src1.x}
 365
 366   dst.y = src0.x^{src1.x}
 367
 368   dst.z = src0.x^{src1.x}
 369
 370   dst.w = src0.x^{src1.x}
 371
 372 1.3.15  XPD - Cross Product
 373
 374 .. math::
 375
 376   dst.x = src0.y * src1.z - src1.y * src0.z
 377   dst.y = src0.z * src1.x - src1.z * src0.x
 378   dst.z = src0.x * src1.y - src1.x * src0.y
 379   dst.w = 1.0
 380
 381
 382 ABS - Absolute
 383
 384 .. math::
 385
 386   dst.x = |src.x|
 387
 388   dst.y = |src.y|
 389
 390   dst.z = |src.z|
 391
 392   dst.w = |src.w|
 393
 394
 395 1.4.2  RCC - Reciprocal Clamped
 396
 397 .. math::
 398
 399   dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 400   dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 401   dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 402   dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
 403
 404
 405 1.4.3  DPH - Homogeneous Dot Product
 406
 407 .. math::
 408
 409   dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 410   dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 411   dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 412   dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
 413
 414
 415 COS - Cosine
 416
 417 .. math::
 418
 419   dst.x = \cos{src.x}
 420
 421   dst.y = \cos{src.x}
 422
 423   dst.z = \cos{src.x}
 424
 425   dst.w = \cos{src.w}
 426
 427
 428 1.5.2  DDX - Derivative Relative To X
 429
 430 .. math::
 431
 432   dst.x = partialx(src.x)
 433   dst.y = partialx(src.y)
 434   dst.z = partialx(src.z)
 435   dst.w = partialx(src.w)
 436
 437
 438 1.5.3  DDY - Derivative Relative To Y
 439
 440 .. math::
 441
 442   dst.x = partialy(src.x)
 443   dst.y = partialy(src.y)
 444   dst.z = partialy(src.z)
 445   dst.w = partialy(src.w)
 446
 447
 448 1.5.7  KILP - Predicated Discard
 449
 450 .. math::
 451
 452   discard
 453
 454
 455 1.5.10  PK2H - Pack Two 16-bit Floats
 456
 457   TBD
 458
 459
 460 1.5.11  PK2US - Pack Two Unsigned 16-bit Scalars
 461
 462   TBD
 463
 464
 465 1.5.12  PK4B - Pack Four Signed 8-bit Scalars
 466
 467   TBD
 468
 469
 470 1.5.13  PK4UB - Pack Four Unsigned 8-bit Scalars
 471
 472   TBD
 473
 474
 475 1.5.15  RFL - Reflection Vector
 476
 477 .. math::
 478
 479   dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
 480   dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
 481   dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
 482   dst.w = 1.0
 483
 484 Considered for removal.
 485
 486
 487 1.5.16  SEQ - Set On Equal
 488
 489 .. math::
 490
 491   dst.x = (src0.x == src1.x) ? 1.0 : 0.0
 492   dst.y = (src0.y == src1.y) ? 1.0 : 0.0
 493   dst.z = (src0.z == src1.z) ? 1.0 : 0.0
 494   dst.w = (src0.w == src1.w) ? 1.0 : 0.0
 495
 496
 497 1.5.17  SFL - Set On False
 498
 499 .. math::
 500
 501   dst.x = 0.0
 502   dst.y = 0.0
 503   dst.z = 0.0
 504   dst.w = 0.0
 505
 506 Considered for removal.
 507
 508 1.5.18  SGT - Set On Greater Than
 509
 510 .. math::
 511
 512   dst.x = (src0.x > src1.x) ? 1.0 : 0.0
 513   dst.y = (src0.y > src1.y) ? 1.0 : 0.0
 514   dst.z = (src0.z > src1.z) ? 1.0 : 0.0
 515   dst.w = (src0.w > src1.w) ? 1.0 : 0.0
 516
 517
 518 SIN - Sine
 519
 520 .. math::
 521
 522   dst.x = \sin{src.x}
 523
 524   dst.y = \sin{src.x}
 525
 526   dst.z = \sin{src.x}
 527
 528   dst.w = \sin{src.w}
 529
 530
 531 1.5.20  SLE - Set On Less Equal Than
 532
 533 .. math::
 534
 535   dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
 536   dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
 537   dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
 538   dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
 539
 540
 541 1.5.21  SNE - Set On Not Equal
 542
 543 .. math::
 544
 545   dst.x = (src0.x != src1.x) ? 1.0 : 0.0
 546   dst.y = (src0.y != src1.y) ? 1.0 : 0.0
 547   dst.z = (src0.z != src1.z) ? 1.0 : 0.0
 548   dst.w = (src0.w != src1.w) ? 1.0 : 0.0
 549
 550
 551 1.5.22  STR - Set On True
 552
 553 .. math::
 554
 555   dst.x = 1.0
 556   dst.y = 1.0
 557   dst.z = 1.0
 558   dst.w = 1.0
 559
 560
 561 1.5.23  TEX - Texture Lookup
 562
 563   TBD
 564
 565
 566 1.5.24  TXD - Texture Lookup with Derivatives
 567
 568   TBD
 569
 570
 571 1.5.25  TXP - Projective Texture Lookup
 572
 573   TBD
 574
 575
 576 1.5.26  UP2H - Unpack Two 16-Bit Floats
 577
 578   TBD
 579
 580   Considered for removal.
 581
 582 1.5.27  UP2US - Unpack Two Unsigned 16-Bit Scalars
 583
 584   TBD
 585
 586   Considered for removal.
 587
 588 1.5.28  UP4B - Unpack Four Signed 8-Bit Values
 589
 590   TBD
 591
 592   Considered for removal.
 593
 594 1.5.29  UP4UB - Unpack Four Unsigned 8-Bit Scalars
 595
 596   TBD
 597
 598   Considered for removal.
 599
 600 1.5.30  X2D - 2D Coordinate Transformation
 601
 602 .. math::
 603
 604   dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
 605   dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
 606   dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
 607   dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
 608
 609 Considered for removal.
 610
 611
 612 1.6  GL_NV_vertex_program2
 613 --------------------------
 614
 615
 616 1.6.1  ARA - Address Register Add
 617
 618   TBD
 619
 620   Considered for removal.
 621
 622 1.6.2  ARR - Address Register Load With Round
 623
 624 .. math::
 625
 626   dst.x = round(src.x)
 627   dst.y = round(src.y)
 628   dst.z = round(src.z)
 629   dst.w = round(src.w)
 630
 631
 632 1.6.3  BRA - Branch
 633
 634   pc = target
 635
 636   Considered for removal.
 637
 638 1.6.4  CAL - Subroutine Call
 639
 640   push(pc)
 641   pc = target
 642
 643
 644 1.6.5  RET - Subroutine Call Return
 645
 646   pc = pop()
 647
 648   Potential restrictions:
 649   * Only occurs at end of function.
 650
 651 1.6.6  SSG - Set Sign
 652
 653 .. math::
 654
 655   dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
 656   dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
 657   dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
 658   dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
 659
 660
 661 1.8.1  CMP - Compare
 662
 663 .. math::
 664
 665   dst.x = (src0.x < 0.0) ? src1.x : src2.x
 666   dst.y = (src0.y < 0.0) ? src1.y : src2.y
 667   dst.z = (src0.z < 0.0) ? src1.z : src2.z
 668   dst.w = (src0.w < 0.0) ? src1.w : src2.w
 669
 670
 671 1.8.2  KIL - Conditional Discard
 672
 673 .. math::
 674
 675   if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
 676     discard
 677   endif
 678
 679
 680 SCS - Sine Cosine
 681
 682 .. math::
 683
 684   dst.x = \cos{src.x}
 685
 686   dst.y = \sin{src.x}
 687
 688   dst.z = 0.0
 689
 690   dst.y = 1.0
 691
 692
 693 1.8.4  TXB - Texture Lookup With Bias
 694
 695   TBD
 696
 697
 698 1.9.1  NRM - 3-component Vector Normalise
 699
 700 .. math::
 701
 702   dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
 703   dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
 704   dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
 705   dst.w = 1.0
 706
 707
 708 1.9.2  DIV - Divide
 709
 710 .. math::
 711
 712   dst.x = src0.x / src1.x
 713   dst.y = src0.y / src1.y
 714   dst.z = src0.z / src1.z
 715   dst.w = src0.w / src1.w
 716
 717
 718 1.9.3  DP2 - 2-component Dot Product
 719
 720 .. math::
 721
 722   dst.x = src0.x * src1.x + src0.y * src1.y
 723   dst.y = src0.x * src1.x + src0.y * src1.y
 724   dst.z = src0.x * src1.x + src0.y * src1.y
 725   dst.w = src0.x * src1.x + src0.y * src1.y
 726
 727
 728 1.9.5  TXL - Texture Lookup With LOD
 729
 730   TBD
 731
 732
 733 1.9.6  BRK - Break
 734
 735   TBD
 736
 737
 738 1.9.7  IF - If
 739
 740   TBD
 741
 742
 743 1.9.8  BGNFOR - Begin a For-Loop
 744
 745   dst.x = floor(src.x)
 746   dst.y = floor(src.y)
 747   dst.z = floor(src.z)
 748
 749   if (dst.y <= 0)
 750     pc = [matching ENDFOR] + 1
 751   endif
 752
 753   Note: The destination must be a loop register.
 754         The source must be a constant register.
 755
 756   Considered for cleanup / removal.
 757
 758
 759 1.9.9  REP - Repeat
 760
 761   TBD
 762
 763
 764 1.9.10  ELSE - Else
 765
 766   TBD
 767
 768
 769 1.9.11  ENDIF - End If
 770
 771   TBD
 772
 773
 774 1.9.12  ENDFOR - End a For-Loop
 775
 776   dst.x = dst.x + dst.z
 777   dst.y = dst.y - 1.0
 778
 779   if (dst.y > 0)
 780     pc = [matching BGNFOR instruction] + 1
 781   endif
 782
 783   Note: The destination must be a loop register.
 784
 785   Considered for cleanup / removal.
 786
 787 1.9.13  ENDREP - End Repeat
 788
 789   TBD
 790
 791
 792 1.10.1  PUSHA - Push Address Register On Stack
 793
 794   push(src.x)
 795   push(src.y)
 796   push(src.z)
 797   push(src.w)
 798
 799   Considered for cleanup / removal.
 800
 801 1.10.2  POPA - Pop Address Register From Stack
 802
 803   dst.w = pop()
 804   dst.z = pop()
 805   dst.y = pop()
 806   dst.x = pop()
 807
 808   Considered for cleanup / removal.
 809
 810
 811 1.11  GL_NV_gpu_program4
 812 ------------------------
 813
 814 Support for these opcodes indicated by a special pipe capability bit (TBD).
 815
 816 CEIL - Ceiling
 817
 818 .. math::
 819
 820   dst.x = \lceil src.x\rceil
 821
 822   dst.y = \lceil src.y\rceil
 823
 824   dst.z = \lceil src.z\rceil
 825
 826   dst.w = \lceil src.w\rceil
 827
 828
 829 1.11.2  I2F - Integer To Float
 830
 831 .. math::
 832
 833   dst.x = (float) src.x
 834   dst.y = (float) src.y
 835   dst.z = (float) src.z
 836   dst.w = (float) src.w
 837
 838
 839 1.11.3  NOT - Bitwise Not
 840
 841 .. math::
 842
 843   dst.x = ~src.x
 844   dst.y = ~src.y
 845   dst.z = ~src.z
 846   dst.w = ~src.w
 847
 848
 849 1.11.4  TRUNC - Truncate
 850
 851 .. math::
 852
 853   dst.x = trunc(src.x)
 854   dst.y = trunc(src.y)
 855   dst.z = trunc(src.z)
 856   dst.w = trunc(src.w)
 857
 858
 859 1.11.5  SHL - Shift Left
 860
 861 .. math::
 862
 863   dst.x = src0.x << src1.x
 864   dst.y = src0.y << src1.x
 865   dst.z = src0.z << src1.x
 866   dst.w = src0.w << src1.x
 867
 868
 869 1.11.6  SHR - Shift Right
 870
 871 .. math::
 872
 873   dst.x = src0.x >> src1.x
 874   dst.y = src0.y >> src1.x
 875   dst.z = src0.z >> src1.x
 876   dst.w = src0.w >> src1.x
 877
 878
 879 1.11.7  AND - Bitwise And
 880
 881 .. math::
 882
 883   dst.x = src0.x & src1.x
 884   dst.y = src0.y & src1.y
 885   dst.z = src0.z & src1.z
 886   dst.w = src0.w & src1.w
 887
 888
 889 1.11.8  OR - Bitwise Or
 890
 891 .. math::
 892
 893   dst.x = src0.x | src1.x
 894   dst.y = src0.y | src1.y
 895   dst.z = src0.z | src1.z
 896   dst.w = src0.w | src1.w
 897
 898
 899 1.11.9  MOD - Modulus
 900
 901 .. math::
 902
 903   dst.x = src0.x % src1.x
 904   dst.y = src0.y % src1.y
 905   dst.z = src0.z % src1.z
 906   dst.w = src0.w % src1.w
 907
 908
 909 1.11.10  XOR - Bitwise Xor
 910
 911 .. math::
 912
 913   dst.x = src0.x ^ src1.x
 914   dst.y = src0.y ^ src1.y
 915   dst.z = src0.z ^ src1.z
 916   dst.w = src0.w ^ src1.w
 917
 918
 919 SAD - Sum Of Absolute Differences
 920
 921 .. math::
 922
 923   dst.x = |src0.x - src1.x| + src2.x
 924
 925   dst.y = |src0.y - src1.y| + src2.y
 926
 927   dst.z = |src0.z - src1.z| + src2.z
 928
 929   dst.w = |src0.w - src1.w| + src2.w
 930
 931
 932 1.11.12  TXF - Texel Fetch
 933
 934   TBD
 935
 936
 937 1.11.13  TXQ - Texture Size Query
 938
 939   TBD
 940
 941
 942 1.11.14  CONT - Continue
 943
 944   TBD
 945
 946
 947 1.12  GL_NV_geometry_program4
 948 -----------------------------
 949
 950
 951 1.12.1  EMIT - Emit
 952
 953   TBD
 954
 955
 956 1.12.2  ENDPRIM - End Primitive
 957
 958   TBD
 959
 960
 961 1.13  GLSL
 962 ----------
 963
 964
 965 1.13.1  BGNLOOP - Begin a Loop
 966
 967   TBD
 968
 969
 970 1.13.2  BGNSUB - Begin Subroutine
 971
 972   TBD
 973
 974
 975 1.13.3  ENDLOOP - End a Loop
 976
 977   TBD
 978
 979
 980 1.13.4  ENDSUB - End Subroutine
 981
 982   TBD
 983
 984
 985
 986 1.13.10  NOP - No Operation
 987
 988   Do nothing.
 989
 990
 991
 992 1.16.7  NRM4 - 4-component Vector Normalise
 993
 994 .. math::
 995
 996   dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 997   dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 998   dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
 999   dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
1000
1001
1002 1.17  ps_2_x
1003 ------------
1004
1005
1006 1.17.2  CALLNZ - Subroutine Call If Not Zero
1007
1008   TBD
1009
1010
1011 1.17.3  IFC - If
1012
1013   TBD
1014
1015
1016 1.17.5  BREAKC - Break Conditional
1017
1018   TBD
1019
1020
1021 2  Explanation of symbols used
1022 ==============================
1023
1024
1025 2.1  Functions
1026 --------------
1027
1028
1029   :math:`|x|`       Absolute value of `x`.
1030
1031   :math:`\lceil x \rceil` Ceiling of `x`.
1032
1033   clamp(x,y,z)      Clamp x between y and z.
1034                     (x < y) ? y : (x > z) ? z : x
1035
1036   :math:`\lfloor x\rfloor` Floor of `x`.
1037
1038   :math:`\log_2{x}` Logarithm of `x`, base 2.
1039
1040   max(x,y)          Maximum of x and y.
1041                     (x > y) ? x : y
1042
1043   min(x,y)          Minimum of x and y.
1044                     (x < y) ? x : y
1045
1046   partialx(x)       Derivative of x relative to fragment's X.
1047
1048   partialy(x)       Derivative of x relative to fragment's Y.
1049
1050   pop()             Pop from stack.
1051
1052   :math:`x^y`       `x` to the power `y`.
1053
1054   push(x)           Push x on stack.
1055
1056   round(x)          Round x.
1057
1058   trunc(x)          Truncate x.
1059
1060
1061 2.2  Keywords
1062 -------------
1063
1064
1065   discard           Discard fragment.
1066
1067   dst               First destination register.
1068
1069   dst0              First destination register.
1070
1071   pc                Program counter.
1072
1073   src               First source register.
1074
1075   src0              First source register.
1076
1077   src1              Second source register.
1078
1079   src2              Third source register.
1080
1081   target            Label of target instruction.
1082
1083
1084 3  Other tokens
1085 ===============
1086
1087
1088 3.1  Declaration Semantic
1089 -------------------------
1090
1091
1092   Follows Declaration token if Semantic bit is set.
1093
1094   Since its purpose is to link a shader with other stages of the pipeline,
1095   it is valid to follow only those Declaration tokens that declare a register
1096   either in INPUT or OUTPUT file.
1097
1098   SemanticName field contains the semantic name of the register being declared.
1099   There is no default value.
1100
1101   SemanticIndex is an optional subscript that can be used to distinguish
1102   different register declarations with the same semantic name. The default value
1103   is 0.
1104
1105   The meanings of the individual semantic names are explained in the following
1106   sections.
1107
1108
1109 3.1.1  FACE
1110
1111   Valid only in a fragment shader INPUT declaration.
1112
1113   FACE.x is negative when the primitive is back facing. FACE.x is positive
1114   when the primitive is front facing.