src/gallium/docs/source/tgsi.rst

   1 TGSI
   2 ====
   3
   4 TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language
   5 for describing shaders. Since Gallium is inherently shaderful, shaders are
   6 an important part of the API. TGSI is the only intermediate representation
   7 used by all drivers.
   8
   9 Basics
  10 ------
  11
  12 All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
  13 floating-point four-component vectors. An opcode may have up to one
  14 destination register, known as *dst*, and between zero and three source
  15 registers, called *src0* through *src2*, or simply *src* if there is only
  16 one.
  17
  18 Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
  19 components as integers. Other instructions permit using registers as
  20 two-component vectors with double precision; see :ref:`Double Opcodes`.
  21
  22 Instruction Set
  23 ---------------
  24
  25 From GL_NV_vertex_program
  26 ^^^^^^^^^^^^^^^^^^^^^^^^^
  27
  28
  29 .. opcode:: ARL - Address Register Load
  30
  31 .. math::
  32
  33   dst.x = \lfloor src.x\rfloor
  34
  35   dst.y = \lfloor src.y\rfloor
  36
  37   dst.z = \lfloor src.z\rfloor
  38
  39   dst.w = \lfloor src.w\rfloor
  40
  41
  42 .. opcode:: MOV - Move
  43
  44 .. math::
  45
  46   dst.x = src.x
  47
  48   dst.y = src.y
  49
  50   dst.z = src.z
  51
  52   dst.w = src.w
  53
  54
  55 .. opcode:: LIT - Light Coefficients
  56
  57 .. math::
  58
  59   dst.x = 1
  60
  61   dst.y = max(src.x, 0)
  62
  63   dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
  64
  65   dst.w = 1
  66
  67
  68 .. opcode:: RCP - Reciprocal
  69
  70 .. math::
  71
  72   dst.x = \frac{1}{src.x}
  73
  74   dst.y = \frac{1}{src.x}
  75
  76   dst.z = \frac{1}{src.x}
  77
  78   dst.w = \frac{1}{src.x}
  79
  80
  81 .. opcode:: RSQ - Reciprocal Square Root
  82
  83 .. math::
  84
  85   dst.x = \frac{1}{\sqrt{|src.x|}}
  86
  87   dst.y = \frac{1}{\sqrt{|src.x|}}
  88
  89   dst.z = \frac{1}{\sqrt{|src.x|}}
  90
  91   dst.w = \frac{1}{\sqrt{|src.x|}}
  92
  93
  94 .. opcode:: EXP - Approximate Exponential Base 2
  95
  96 .. math::
  97
  98   dst.x = 2^{\lfloor src.x\rfloor}
  99
 100   dst.y = src.x - \lfloor src.x\rfloor
 101
 102   dst.z = 2^{src.x}
 103
 104   dst.w = 1
 105
 106
 107 .. opcode:: LOG - Approximate Logarithm Base 2
 108
 109 .. math::
 110
 111   dst.x = \lfloor\log_2{|src.x|}\rfloor
 112
 113   dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 114
 115   dst.z = \log_2{|src.x|}
 116
 117   dst.w = 1
 118
 119
 120 .. opcode:: MUL - Multiply
 121
 122 .. math::
 123
 124   dst.x = src0.x \times src1.x
 125
 126   dst.y = src0.y \times src1.y
 127
 128   dst.z = src0.z \times src1.z
 129
 130   dst.w = src0.w \times src1.w
 131
 132
 133 .. opcode:: ADD - Add
 134
 135 .. math::
 136
 137   dst.x = src0.x + src1.x
 138
 139   dst.y = src0.y + src1.y
 140
 141   dst.z = src0.z + src1.z
 142
 143   dst.w = src0.w + src1.w
 144
 145
 146 .. opcode:: DP3 - 3-component Dot Product
 147
 148 .. math::
 149
 150   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 151
 152   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 153
 154   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 155
 156   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 157
 158
 159 .. opcode:: DP4 - 4-component Dot Product
 160
 161 .. math::
 162
 163   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 164
 165   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 166
 167   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 168
 169   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 170
 171
 172 .. opcode:: DST - Distance Vector
 173
 174 .. math::
 175
 176   dst.x = 1
 177
 178   dst.y = src0.y \times src1.y
 179
 180   dst.z = src0.z
 181
 182   dst.w = src1.w
 183
 184
 185 .. opcode:: MIN - Minimum
 186
 187 .. math::
 188
 189   dst.x = min(src0.x, src1.x)
 190
 191   dst.y = min(src0.y, src1.y)
 192
 193   dst.z = min(src0.z, src1.z)
 194
 195   dst.w = min(src0.w, src1.w)
 196
 197
 198 .. opcode:: MAX - Maximum
 199
 200 .. math::
 201
 202   dst.x = max(src0.x, src1.x)
 203
 204   dst.y = max(src0.y, src1.y)
 205
 206   dst.z = max(src0.z, src1.z)
 207
 208   dst.w = max(src0.w, src1.w)
 209
 210
 211 .. opcode:: SLT - Set On Less Than
 212
 213 .. math::
 214
 215   dst.x = (src0.x < src1.x) ? 1 : 0
 216
 217   dst.y = (src0.y < src1.y) ? 1 : 0
 218
 219   dst.z = (src0.z < src1.z) ? 1 : 0
 220
 221   dst.w = (src0.w < src1.w) ? 1 : 0
 222
 223
 224 .. opcode:: SGE - Set On Greater Equal Than
 225
 226 .. math::
 227
 228   dst.x = (src0.x >= src1.x) ? 1 : 0
 229
 230   dst.y = (src0.y >= src1.y) ? 1 : 0
 231
 232   dst.z = (src0.z >= src1.z) ? 1 : 0
 233
 234   dst.w = (src0.w >= src1.w) ? 1 : 0
 235
 236
 237 .. opcode:: MAD - Multiply And Add
 238
 239 .. math::
 240
 241   dst.x = src0.x \times src1.x + src2.x
 242
 243   dst.y = src0.y \times src1.y + src2.y
 244
 245   dst.z = src0.z \times src1.z + src2.z
 246
 247   dst.w = src0.w \times src1.w + src2.w
 248
 249
 250 .. opcode:: SUB - Subtract
 251
 252 .. math::
 253
 254   dst.x = src0.x - src1.x
 255
 256   dst.y = src0.y - src1.y
 257
 258   dst.z = src0.z - src1.z
 259
 260   dst.w = src0.w - src1.w
 261
 262
 263 .. opcode:: LRP - Linear Interpolate
 264
 265 .. math::
 266
 267   dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
 268
 269   dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
 270
 271   dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
 272
 273   dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
 274
 275
 276 .. opcode:: CND - Condition
 277
 278 .. math::
 279
 280   dst.x = (src2.x > 0.5) ? src0.x : src1.x
 281
 282   dst.y = (src2.y > 0.5) ? src0.y : src1.y
 283
 284   dst.z = (src2.z > 0.5) ? src0.z : src1.z
 285
 286   dst.w = (src2.w > 0.5) ? src0.w : src1.w
 287
 288
 289 .. opcode:: DP2A - 2-component Dot Product And Add
 290
 291 .. math::
 292
 293   dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
 294
 295   dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
 296
 297   dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
 298
 299   dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
 300
 301
 302 .. opcode:: FRAC - Fraction
 303
 304 .. math::
 305
 306   dst.x = src.x - \lfloor src.x\rfloor
 307
 308   dst.y = src.y - \lfloor src.y\rfloor
 309
 310   dst.z = src.z - \lfloor src.z\rfloor
 311
 312   dst.w = src.w - \lfloor src.w\rfloor
 313
 314
 315 .. opcode:: CLAMP - Clamp
 316
 317 .. math::
 318
 319   dst.x = clamp(src0.x, src1.x, src2.x)
 320
 321   dst.y = clamp(src0.y, src1.y, src2.y)
 322
 323   dst.z = clamp(src0.z, src1.z, src2.z)
 324
 325   dst.w = clamp(src0.w, src1.w, src2.w)
 326
 327
 328 .. opcode:: FLR - Floor
 329
 330 This is identical to ARL.
 331
 332 .. math::
 333
 334   dst.x = \lfloor src.x\rfloor
 335
 336   dst.y = \lfloor src.y\rfloor
 337
 338   dst.z = \lfloor src.z\rfloor
 339
 340   dst.w = \lfloor src.w\rfloor
 341
 342
 343 .. opcode:: ROUND - Round
 344
 345 .. math::
 346
 347   dst.x = round(src.x)
 348
 349   dst.y = round(src.y)
 350
 351   dst.z = round(src.z)
 352
 353   dst.w = round(src.w)
 354
 355
 356 .. opcode:: EX2 - Exponential Base 2
 357
 358 .. math::
 359
 360   dst.x = 2^{src.x}
 361
 362   dst.y = 2^{src.x}
 363
 364   dst.z = 2^{src.x}
 365
 366   dst.w = 2^{src.x}
 367
 368
 369 .. opcode:: LG2 - Logarithm Base 2
 370
 371 .. math::
 372
 373   dst.x = \log_2{src.x}
 374
 375   dst.y = \log_2{src.x}
 376
 377   dst.z = \log_2{src.x}
 378
 379   dst.w = \log_2{src.x}
 380
 381
 382 .. opcode:: POW - Power
 383
 384 .. math::
 385
 386   dst.x = src0.x^{src1.x}
 387
 388   dst.y = src0.x^{src1.x}
 389
 390   dst.z = src0.x^{src1.x}
 391
 392   dst.w = src0.x^{src1.x}
 393
 394 .. opcode:: XPD - Cross Product
 395
 396 .. math::
 397
 398   dst.x = src0.y \times src1.z - src1.y \times src0.z
 399
 400   dst.y = src0.z \times src1.x - src1.z \times src0.x
 401
 402   dst.z = src0.x \times src1.y - src1.x \times src0.y
 403
 404   dst.w = 1
 405
 406
 407 .. opcode:: ABS - Absolute
 408
 409 .. math::
 410
 411   dst.x = |src.x|
 412
 413   dst.y = |src.y|
 414
 415   dst.z = |src.z|
 416
 417   dst.w = |src.w|
 418
 419
 420 .. opcode:: RCC - Reciprocal Clamped
 421
 422 XXX cleanup on aisle three
 423
 424 .. math::
 425
 426   dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 427
 428   dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 429
 430   dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 431
 432   dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 433
 434
 435 .. opcode:: DPH - Homogeneous Dot Product
 436
 437 .. math::
 438
 439   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 440
 441   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 442
 443   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 444
 445   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 446
 447
 448 .. opcode:: COS - Cosine
 449
 450 .. math::
 451
 452   dst.x = \cos{src.x}
 453
 454   dst.y = \cos{src.x}
 455
 456   dst.z = \cos{src.x}
 457
 458   dst.w = \cos{src.x}
 459
 460
 461 .. opcode:: DDX - Derivative Relative To X
 462
 463 .. math::
 464
 465   dst.x = partialx(src.x)
 466
 467   dst.y = partialx(src.y)
 468
 469   dst.z = partialx(src.z)
 470
 471   dst.w = partialx(src.w)
 472
 473
 474 .. opcode:: DDY - Derivative Relative To Y
 475
 476 .. math::
 477
 478   dst.x = partialy(src.x)
 479
 480   dst.y = partialy(src.y)
 481
 482   dst.z = partialy(src.z)
 483
 484   dst.w = partialy(src.w)
 485
 486
 487 .. opcode:: KILP - Predicated Discard
 488
 489   discard
 490
 491
 492 .. opcode:: PK2H - Pack Two 16-bit Floats
 493
 494   TBD
 495
 496
 497 .. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
 498
 499   TBD
 500
 501
 502 .. opcode:: PK4B - Pack Four Signed 8-bit Scalars
 503
 504   TBD
 505
 506
 507 .. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
 508
 509   TBD
 510
 511
 512 .. opcode:: RFL - Reflection Vector
 513
 514 .. math::
 515
 516   dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
 517
 518   dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
 519
 520   dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
 521
 522   dst.w = 1
 523
 524 Considered for removal.
 525
 526
 527 .. opcode:: SEQ - Set On Equal
 528
 529 .. math::
 530
 531   dst.x = (src0.x == src1.x) ? 1 : 0
 532
 533   dst.y = (src0.y == src1.y) ? 1 : 0
 534
 535   dst.z = (src0.z == src1.z) ? 1 : 0
 536
 537   dst.w = (src0.w == src1.w) ? 1 : 0
 538
 539
 540 .. opcode:: SFL - Set On False
 541
 542 .. math::
 543
 544   dst.x = 0
 545
 546   dst.y = 0
 547
 548   dst.z = 0
 549
 550   dst.w = 0
 551
 552 Considered for removal.
 553
 554 .. opcode:: SGT - Set On Greater Than
 555
 556 .. math::
 557
 558   dst.x = (src0.x > src1.x) ? 1 : 0
 559
 560   dst.y = (src0.y > src1.y) ? 1 : 0
 561
 562   dst.z = (src0.z > src1.z) ? 1 : 0
 563
 564   dst.w = (src0.w > src1.w) ? 1 : 0
 565
 566
 567 .. opcode:: SIN - Sine
 568
 569 .. math::
 570
 571   dst.x = \sin{src.x}
 572
 573   dst.y = \sin{src.x}
 574
 575   dst.z = \sin{src.x}
 576
 577   dst.w = \sin{src.x}
 578
 579
 580 .. opcode:: SLE - Set On Less Equal Than
 581
 582 .. math::
 583
 584   dst.x = (src0.x <= src1.x) ? 1 : 0
 585
 586   dst.y = (src0.y <= src1.y) ? 1 : 0
 587
 588   dst.z = (src0.z <= src1.z) ? 1 : 0
 589
 590   dst.w = (src0.w <= src1.w) ? 1 : 0
 591
 592
 593 .. opcode:: SNE - Set On Not Equal
 594
 595 .. math::
 596
 597   dst.x = (src0.x != src1.x) ? 1 : 0
 598
 599   dst.y = (src0.y != src1.y) ? 1 : 0
 600
 601   dst.z = (src0.z != src1.z) ? 1 : 0
 602
 603   dst.w = (src0.w != src1.w) ? 1 : 0
 604
 605
 606 .. opcode:: STR - Set On True
 607
 608 .. math::
 609
 610   dst.x = 1
 611
 612   dst.y = 1
 613
 614   dst.z = 1
 615
 616   dst.w = 1
 617
 618
 619 .. opcode:: TEX - Texture Lookup
 620
 621   TBD
 622
 623
 624 .. opcode:: TXD - Texture Lookup with Derivatives
 625
 626   TBD
 627
 628
 629 .. opcode:: TXP - Projective Texture Lookup
 630
 631   TBD
 632
 633
 634 .. opcode:: UP2H - Unpack Two 16-Bit Floats
 635
 636   TBD
 637
 638   Considered for removal.
 639
 640 .. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
 641
 642   TBD
 643
 644   Considered for removal.
 645
 646 .. opcode:: UP4B - Unpack Four Signed 8-Bit Values
 647
 648   TBD
 649
 650   Considered for removal.
 651
 652 .. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
 653
 654   TBD
 655
 656   Considered for removal.
 657
 658 .. opcode:: X2D - 2D Coordinate Transformation
 659
 660 .. math::
 661
 662   dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
 663
 664   dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
 665
 666   dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
 667
 668   dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
 669
 670 Considered for removal.
 671
 672
 673 From GL_NV_vertex_program2
 674 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 675
 676
 677 .. opcode:: ARA - Address Register Add
 678
 679   TBD
 680
 681   Considered for removal.
 682
 683 .. opcode:: ARR - Address Register Load With Round
 684
 685 .. math::
 686
 687   dst.x = round(src.x)
 688
 689   dst.y = round(src.y)
 690
 691   dst.z = round(src.z)
 692
 693   dst.w = round(src.w)
 694
 695
 696 .. opcode:: BRA - Branch
 697
 698   pc = target
 699
 700   Considered for removal.
 701
 702 .. opcode:: CAL - Subroutine Call
 703
 704   push(pc)
 705   pc = target
 706
 707
 708 .. opcode:: RET - Subroutine Call Return
 709
 710   pc = pop()
 711
 712   Potential restrictions:
 713   * Only occurs at end of function.
 714
 715 .. opcode:: SSG - Set Sign
 716
 717 .. math::
 718
 719   dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
 720
 721   dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
 722
 723   dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
 724
 725   dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
 726
 727
 728 .. opcode:: CMP - Compare
 729
 730 .. math::
 731
 732   dst.x = (src0.x < 0) ? src1.x : src2.x
 733
 734   dst.y = (src0.y < 0) ? src1.y : src2.y
 735
 736   dst.z = (src0.z < 0) ? src1.z : src2.z
 737
 738   dst.w = (src0.w < 0) ? src1.w : src2.w
 739
 740
 741 .. opcode:: KIL - Conditional Discard
 742
 743 .. math::
 744
 745   if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
 746     discard
 747   endif
 748
 749
 750 .. opcode:: SCS - Sine Cosine
 751
 752 .. math::
 753
 754   dst.x = \cos{src.x}
 755
 756   dst.y = \sin{src.x}
 757
 758   dst.z = 0
 759
 760   dst.y = 1
 761
 762
 763 .. opcode:: TXB - Texture Lookup With Bias
 764
 765   TBD
 766
 767
 768 .. opcode:: NRM - 3-component Vector Normalise
 769
 770 .. math::
 771
 772   dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 773
 774   dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 775
 776   dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 777
 778   dst.w = 1
 779
 780
 781 .. opcode:: DIV - Divide
 782
 783 .. math::
 784
 785   dst.x = \frac{src0.x}{src1.x}
 786
 787   dst.y = \frac{src0.y}{src1.y}
 788
 789   dst.z = \frac{src0.z}{src1.z}
 790
 791   dst.w = \frac{src0.w}{src1.w}
 792
 793
 794 .. opcode:: DP2 - 2-component Dot Product
 795
 796 .. math::
 797
 798   dst.x = src0.x \times src1.x + src0.y \times src1.y
 799
 800   dst.y = src0.x \times src1.x + src0.y \times src1.y
 801
 802   dst.z = src0.x \times src1.x + src0.y \times src1.y
 803
 804   dst.w = src0.x \times src1.x + src0.y \times src1.y
 805
 806
 807 .. opcode:: TXL - Texture Lookup With LOD
 808
 809   TBD
 810
 811
 812 .. opcode:: BRK - Break
 813
 814   TBD
 815
 816
 817 .. opcode:: IF - If
 818
 819   TBD
 820
 821
 822 .. opcode:: BGNFOR - Begin a For-Loop
 823
 824   dst.x = floor(src.x)
 825   dst.y = floor(src.y)
 826   dst.z = floor(src.z)
 827
 828   if (dst.y <= 0)
 829     pc = [matching ENDFOR] + 1
 830   endif
 831
 832   Note: The destination must be a loop register.
 833         The source must be a constant register.
 834
 835   Considered for cleanup / removal.
 836
 837
 838 .. opcode:: REP - Repeat
 839
 840   TBD
 841
 842
 843 .. opcode:: ELSE - Else
 844
 845   TBD
 846
 847
 848 .. opcode:: ENDIF - End If
 849
 850   TBD
 851
 852
 853 .. opcode:: ENDFOR - End a For-Loop
 854
 855   dst.x = dst.x + dst.z
 856   dst.y = dst.y - 1.0
 857
 858   if (dst.y > 0)
 859     pc = [matching BGNFOR instruction] + 1
 860   endif
 861
 862   Note: The destination must be a loop register.
 863
 864   Considered for cleanup / removal.
 865
 866 .. opcode:: ENDREP - End Repeat
 867
 868   TBD
 869
 870
 871 .. opcode:: PUSHA - Push Address Register On Stack
 872
 873   push(src.x)
 874   push(src.y)
 875   push(src.z)
 876   push(src.w)
 877
 878   Considered for cleanup / removal.
 879
 880 .. opcode:: POPA - Pop Address Register From Stack
 881
 882   dst.w = pop()
 883   dst.z = pop()
 884   dst.y = pop()
 885   dst.x = pop()
 886
 887   Considered for cleanup / removal.
 888
 889
 890 From GL_NV_gpu_program4
 891 ^^^^^^^^^^^^^^^^^^^^^^^^
 892
 893 Support for these opcodes indicated by a special pipe capability bit (TBD).
 894
 895 .. opcode:: CEIL - Ceiling
 896
 897 .. math::
 898
 899   dst.x = \lceil src.x\rceil
 900
 901   dst.y = \lceil src.y\rceil
 902
 903   dst.z = \lceil src.z\rceil
 904
 905   dst.w = \lceil src.w\rceil
 906
 907
 908 .. opcode:: I2F - Integer To Float
 909
 910 .. math::
 911
 912   dst.x = (float) src.x
 913
 914   dst.y = (float) src.y
 915
 916   dst.z = (float) src.z
 917
 918   dst.w = (float) src.w
 919
 920
 921 .. opcode:: NOT - Bitwise Not
 922
 923 .. math::
 924
 925   dst.x = ~src.x
 926
 927   dst.y = ~src.y
 928
 929   dst.z = ~src.z
 930
 931   dst.w = ~src.w
 932
 933
 934 .. opcode:: TRUNC - Truncate
 935
 936 .. math::
 937
 938   dst.x = trunc(src.x)
 939
 940   dst.y = trunc(src.y)
 941
 942   dst.z = trunc(src.z)
 943
 944   dst.w = trunc(src.w)
 945
 946
 947 .. opcode:: SHL - Shift Left
 948
 949 .. math::
 950
 951   dst.x = src0.x << src1.x
 952
 953   dst.y = src0.y << src1.x
 954
 955   dst.z = src0.z << src1.x
 956
 957   dst.w = src0.w << src1.x
 958
 959
 960 .. opcode:: SHR - Shift Right
 961
 962 .. math::
 963
 964   dst.x = src0.x >> src1.x
 965
 966   dst.y = src0.y >> src1.x
 967
 968   dst.z = src0.z >> src1.x
 969
 970   dst.w = src0.w >> src1.x
 971
 972
 973 .. opcode:: AND - Bitwise And
 974
 975 .. math::
 976
 977   dst.x = src0.x & src1.x
 978
 979   dst.y = src0.y & src1.y
 980
 981   dst.z = src0.z & src1.z
 982
 983   dst.w = src0.w & src1.w
 984
 985
 986 .. opcode:: OR - Bitwise Or
 987
 988 .. math::
 989
 990   dst.x = src0.x | src1.x
 991
 992   dst.y = src0.y | src1.y
 993
 994   dst.z = src0.z | src1.z
 995
 996   dst.w = src0.w | src1.w
 997
 998
 999 .. opcode:: MOD - Modulus
1000
1001 .. math::
1002
1003   dst.x = src0.x \bmod src1.x
1004
1005   dst.y = src0.y \bmod src1.y
1006
1007   dst.z = src0.z \bmod src1.z
1008
1009   dst.w = src0.w \bmod src1.w
1010
1011
1012 .. opcode:: XOR - Bitwise Xor
1013
1014 .. math::
1015
1016   dst.x = src0.x \oplus src1.x
1017
1018   dst.y = src0.y \oplus src1.y
1019
1020   dst.z = src0.z \oplus src1.z
1021
1022   dst.w = src0.w \oplus src1.w
1023
1024
1025 .. opcode:: SAD - Sum Of Absolute Differences
1026
1027 .. math::
1028
1029   dst.x = |src0.x - src1.x| + src2.x
1030
1031   dst.y = |src0.y - src1.y| + src2.y
1032
1033   dst.z = |src0.z - src1.z| + src2.z
1034
1035   dst.w = |src0.w - src1.w| + src2.w
1036
1037
1038 .. opcode:: TXF - Texel Fetch
1039
1040   TBD
1041
1042
1043 .. opcode:: TXQ - Texture Size Query
1044
1045   TBD
1046
1047
1048 .. opcode:: CONT - Continue
1049
1050   TBD
1051
1052
1053 From GL_NV_geometry_program4
1054 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1055
1056
1057 .. opcode:: EMIT - Emit
1058
1059   TBD
1060
1061
1062 .. opcode:: ENDPRIM - End Primitive
1063
1064   TBD
1065
1066
1067 From GLSL
1068 ^^^^^^^^^^
1069
1070
1071 .. opcode:: BGNLOOP - Begin a Loop
1072
1073   TBD
1074
1075
1076 .. opcode:: BGNSUB - Begin Subroutine
1077
1078   TBD
1079
1080
1081 .. opcode:: ENDLOOP - End a Loop
1082
1083   TBD
1084
1085
1086 .. opcode:: ENDSUB - End Subroutine
1087
1088   TBD
1089
1090
1091 .. opcode:: NOP - No Operation
1092
1093   Do nothing.
1094
1095
1096 .. opcode:: NRM4 - 4-component Vector Normalise
1097
1098 .. math::
1099
1100   dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1101
1102   dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1103
1104   dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1105
1106   dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1107
1108
1109 ps_2_x
1110 ^^^^^^^^^^^^
1111
1112
1113 .. opcode:: CALLNZ - Subroutine Call If Not Zero
1114
1115   TBD
1116
1117
1118 .. opcode:: IFC - If
1119
1120   TBD
1121
1122
1123 .. opcode:: BREAKC - Break Conditional
1124
1125   TBD
1126
1127 .. _doubleopcodes:
1128
1129 Double Opcodes
1130 ^^^^^^^^^^^^^^^
1131
1132 .. opcode:: DADD - Add Double
1133
1134 .. math::
1135
1136   dst.xy = src0.xy + src1.xy
1137
1138   dst.zw = src0.zw + src1.zw
1139
1140
1141 .. opcode:: DDIV - Divide Double
1142
1143 .. math::
1144
1145   dst.xy = src0.xy / src1.xy
1146
1147   dst.zw = src0.zw / src1.zw
1148
1149 .. opcode:: DSEQ - Set Double on Equal
1150
1151 .. math::
1152
1153   dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
1154
1155   dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
1156
1157 .. opcode:: DSLT - Set Double on Less than
1158
1159 .. math::
1160
1161   dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
1162
1163   dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
1164
1165 .. opcode:: DFRAC - Double Fraction
1166
1167 .. math::
1168
1169   dst.xy = src.xy - \lfloor src.xy\rfloor
1170
1171   dst.zw = src.zw - \lfloor src.zw\rfloor
1172
1173
1174 .. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
1175
1176 .. math::
1177
1178   dst0.xy = frexp(src.xy, dst1.xy)
1179
1180   dst0.zw = frexp(src.zw, dst1.zw)
1181
1182 .. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
1183
1184 .. math::
1185
1186   dst.xy = ldexp(src0.xy, src1.xy)
1187
1188   dst.zw = ldexp(src0.zw, src1.zw)
1189
1190 .. opcode:: DMIN - Minimum Double
1191
1192 .. math::
1193
1194   dst.xy = min(src0.xy, src1.xy)
1195
1196   dst.zw = min(src0.zw, src1.zw)
1197
1198 .. opcode:: DMAX - Maximum Double
1199
1200 .. math::
1201
1202   dst.xy = max(src0.xy, src1.xy)
1203
1204   dst.zw = max(src0.zw, src1.zw)
1205
1206 .. opcode:: DMUL - Multiply Double
1207
1208 .. math::
1209
1210   dst.xy = src0.xy \times src1.xy
1211
1212   dst.zw = src0.zw \times src1.zw
1213
1214
1215 .. opcode:: DMAD - Multiply And Add Doubles
1216
1217 .. math::
1218
1219   dst.xy = src0.xy \times src1.xy + src2.xy
1220
1221   dst.zw = src0.zw \times src1.zw + src2.zw
1222
1223
1224 .. opcode:: DRCP - Reciprocal Double
1225
1226 .. math::
1227
1228    dst.xy = \frac{1}{src.xy}
1229
1230    dst.zw = \frac{1}{src.zw}
1231
1232 .. opcode:: DSQRT - Square root double
1233
1234 .. math::
1235
1236    dst.xy = \sqrt{src.xy}
1237
1238    dst.zw = \sqrt{src.zw}
1239
1240
1241 Explanation of symbols used
1242 ------------------------------
1243
1244
1245 Functions
1246 ^^^^^^^^^^^^^^
1247
1248
1249   :math:`|x|`       Absolute value of `x`.
1250
1251   :math:`\lceil x \rceil` Ceiling of `x`.
1252
1253   clamp(x,y,z)      Clamp x between y and z.
1254                     (x < y) ? y : (x > z) ? z : x
1255
1256   :math:`\lfloor x\rfloor` Floor of `x`.
1257
1258   :math:`\log_2{x}` Logarithm of `x`, base 2.
1259
1260   max(x,y)          Maximum of x and y.
1261                     (x > y) ? x : y
1262
1263   min(x,y)          Minimum of x and y.
1264                     (x < y) ? x : y
1265
1266   partialx(x)       Derivative of x relative to fragment's X.
1267
1268   partialy(x)       Derivative of x relative to fragment's Y.
1269
1270   pop()             Pop from stack.
1271
1272   :math:`x^y`       `x` to the power `y`.
1273
1274   push(x)           Push x on stack.
1275
1276   round(x)          Round x.
1277
1278   trunc(x)          Truncate x, i.e. drop the fraction bits.
1279
1280
1281 Keywords
1282 ^^^^^^^^^^^^^
1283
1284
1285   discard           Discard fragment.
1286
1287   pc                Program counter.
1288
1289   target            Label of target instruction.
1290
1291
1292 Other tokens
1293 ---------------
1294
1295
1296 Declaration Semantic
1297 ^^^^^^^^^^^^^^^^^^^^^^^^
1298
1299
1300   Follows Declaration token if Semantic bit is set.
1301
1302   Since its purpose is to link a shader with other stages of the pipeline,
1303   it is valid to follow only those Declaration tokens that declare a register
1304   either in INPUT or OUTPUT file.
1305
1306   SemanticName field contains the semantic name of the register being declared.
1307   There is no default value.
1308
1309   SemanticIndex is an optional subscript that can be used to distinguish
1310   different register declarations with the same semantic name. The default value
1311   is 0.
1312
1313   The meanings of the individual semantic names are explained in the following
1314   sections.
1315
1316 TGSI_SEMANTIC_POSITION
1317 """"""""""""""""""""""
1318
1319 Position, sometimes known as HPOS or WPOS for historical reasons, is the
1320 location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z``
1321 are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
1322 for the perspective divide, if enabled.
1323
1324 As a vertex shader output, position should be scaled to the viewport. When
1325 used in fragment shaders, position will be in window coordinates. The convention
1326 used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
1327
1328 XXX additionally, is there a way to configure the perspective divide? it's
1329 accelerated on most chipsets AFAIK...
1330
1331 Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can
1332 be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``.
1333
1334 XXX usually? can we solidify that?
1335
1336 TGSI_SEMANTIC_COLOR
1337 """""""""""""""""""
1338
1339 Colors are used to, well, color the primitives. Colors are always in
1340 ``(r, g, b, a)`` format.
1341
1342 If alpha is not specified, it defaults to 1.
1343
1344 TGSI_SEMANTIC_BCOLOR
1345 """"""""""""""""""""
1346
1347 Back-facing colors are only used for back-facing polygons, and are only valid
1348 in vertex shader outputs. After rasterization, all polygons are front-facing
1349 and COLOR and BCOLOR end up occupying the same slots in the fragment, so
1350 all BCOLORs effectively become regular COLORs in the fragment shader.
1351
1352 TGSI_SEMANTIC_FOG
1353 """""""""""""""""
1354
1355 The fog coordinate historically has been used to replace the depth coordinate
1356 for generation of fog in dedicated fog blocks. Gallium, however, does not use
1357 dedicated fog acceleration, placing it entirely in the fragment shader
1358 instead.
1359
1360 The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first
1361 component matters when writing from the vertex shader; the driver will ensure
1362 that the coordinate is in this format when used as a fragment shader input.
1363
1364 TGSI_SEMANTIC_PSIZE
1365 """""""""""""""""""
1366
1367 PSIZE, or point size, is used to specify point sizes per-vertex. It should
1368 be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum
1369 size, ``x`` is the maximum size, and ``f`` is the fade threshold.
1370
1371 XXX this is arb_vp. is this what we actually do? should double-check...
1372
1373 When using this semantic, be sure to set the appropriate state in the
1374 :ref:`rasterizer` first.
1375
1376 TGSI_SEMANTIC_GENERIC
1377 """""""""""""""""""""
1378
1379 Generic semantics are nearly always used for texture coordinate attributes,
1380 in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds
1381 of lookups, and ``q`` is the level-of-detail bias for biased sampling.
1382
1383 These attributes are called "generic" because they may be used for anything
1384 else, including parameters, texture generation information, or anything that
1385 can be stored inside a four-component vector.
1386
1387 TGSI_SEMANTIC_NORMAL
1388 """"""""""""""""""""
1389
1390 Vertex normal; could be used to implement per-pixel lighting for legacy APIs
1391 that allow mixing fixed-function and programmable stages.
1392
1393 TGSI_SEMANTIC_FACE
1394 """"""""""""""""""
1395
1396 FACE is the facing bit, to store the facing information for the fragment
1397 shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive
1398 when the fragment is front-facing, and negative when the component is
1399 back-facing.
1400
1401 TGSI_SEMANTIC_EDGEFLAG
1402 """"""""""""""""""""""
1403
1404 XXX no clue
1405
1406
1407 Properties
1408 ^^^^^^^^^^^^^^^^^^^^^^^^
1409
1410
1411   Properties are general directives that apply to the whole TGSI program.
1412
1413 FS_COORD_ORIGIN
1414 """""""""""""""
1415
1416 Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
1417 The default value is UPPER_LEFT.
1418
1419 If UPPER_LEFT, the position will be (0,0) at the upper left corner and
1420 increase downward and rightward.
1421 If LOWER_LEFT, the position will be (0,0) at the lower left corner and
1422 increase upward and rightward.
1423
1424 OpenGL defaults to LOWER_LEFT, and is configurable with the
1425 GL_ARB_fragment_coord_conventions extension.
1426
1427 DirectX 9/10 use UPPER_LEFT.
1428
1429 FS_COORD_PIXEL_CENTER
1430 """""""""""""""""""""
1431
1432 Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
1433 The default value is HALF_INTEGER.
1434
1435 If HALF_INTEGER, the fractionary part of the position will be 0.5
1436 If INTEGER, the fractionary part of the position will be 0.0
1437
1438 Note that this does not affect the set of fragments generated by
1439 rasterization, which is instead controlled by gl_rasterization_rules in the
1440 rasterizer.
1441
1442 OpenGL defaults to HALF_INTEGER, and is configurable with the
1443 GL_ARB_fragment_coord_conventions extension.
1444
1445 DirectX 9 uses INTEGER.
1446 DirectX 10 uses HALF_INTEGER.
1447
1448
1449
1450 Texture Sampling and Texture Formats
1451 ------------------------------------
1452
1453 This table shows how texture image components are returned as (x,y,z,w)
1454 tuples by TGSI texture instructions, such as TEX, TXD, and TXP.
1455 For reference, OpenGL and Direct3D conventions are shown as well.
1456
1457 +--------------------+--------------+--------------------+--------------+
1458 | Texture Components | Gallium      | OpenGL             | Direct3D 9   |
1459 +====================+==============+====================+==============+
1460 | R                  | XXX TBD      | (r, 0, 0, 1)       | (r, 1, 1, 1) |
1461 +--------------------+--------------+--------------------+--------------+
1462 | RG                 | XXX TBD      | (r, g, 0, 1)       | (r, g, 1, 1) |
1463 +--------------------+--------------+--------------------+--------------+
1464 | RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
1465 +--------------------+--------------+--------------------+--------------+
1466 | RGBA               | (r, g, b, a) | (r, g, b, a)       | (r, g, b, a) |
1467 +--------------------+--------------+--------------------+--------------+
1468 | A                  | (0, 0, 0, a) | (0, 0, 0, a)       | (0, 0, 0, a) |
1469 +--------------------+--------------+--------------------+--------------+
1470 | L                  | (l, l, l, 1) | (l, l, l, 1)       | (l, l, l, 1) |
1471 +--------------------+--------------+--------------------+--------------+
1472 | LA                 | (l, l, l, a) | (l, l, l, a)       | (l, l, l, a) |
1473 +--------------------+--------------+--------------------+--------------+
1474 | I                  | (i, i, i, i) | (i, i, i, i)       | N/A          |
1475 +--------------------+--------------+--------------------+--------------+
1476 | UV                 | XXX TBD      | (0, 0, 0, 1)       | (u, v, 1, 1) |
1477 |                    |              | [#envmap-bumpmap]_ |              |
1478 +--------------------+--------------+--------------------+--------------+
1479 | Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
1480 |                    |              | [#depth-tex-mode]_ |              |
1481 +--------------------+--------------+--------------------+--------------+
1482
1483 .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
1484 .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
1485  or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.