src/gallium/docs/source/tgsi.rst

   1 TGSI
   2 ====
   3
   4 TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language
   5 for describing shaders. Since Gallium is inherently shaderful, shaders are
   6 an important part of the API. TGSI is the only intermediate representation
   7 used by all drivers.
   8
   9 Instruction Set
  10 ---------------
  11
  12 From GL_NV_vertex_program
  13 ^^^^^^^^^^^^^^^^^^^^^^^^^
  14
  15
  16 .. opcode:: ARL - Address Register Load
  17
  18 .. math::
  19
  20   dst.x = \lfloor src.x\rfloor
  21
  22   dst.y = \lfloor src.y\rfloor
  23
  24   dst.z = \lfloor src.z\rfloor
  25
  26   dst.w = \lfloor src.w\rfloor
  27
  28
  29 .. opcode:: MOV - Move
  30
  31 .. math::
  32
  33   dst.x = src.x
  34
  35   dst.y = src.y
  36
  37   dst.z = src.z
  38
  39   dst.w = src.w
  40
  41
  42 .. opcode:: LIT - Light Coefficients
  43
  44 .. math::
  45
  46   dst.x = 1
  47
  48   dst.y = max(src.x, 0)
  49
  50   dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
  51
  52   dst.w = 1
  53
  54
  55 .. opcode:: RCP - Reciprocal
  56
  57 .. math::
  58
  59   dst.x = \frac{1}{src.x}
  60
  61   dst.y = \frac{1}{src.x}
  62
  63   dst.z = \frac{1}{src.x}
  64
  65   dst.w = \frac{1}{src.x}
  66
  67
  68 .. opcode:: RSQ - Reciprocal Square Root
  69
  70 .. math::
  71
  72   dst.x = \frac{1}{\sqrt{|src.x|}}
  73
  74   dst.y = \frac{1}{\sqrt{|src.x|}}
  75
  76   dst.z = \frac{1}{\sqrt{|src.x|}}
  77
  78   dst.w = \frac{1}{\sqrt{|src.x|}}
  79
  80
  81 .. opcode:: EXP - Approximate Exponential Base 2
  82
  83 .. math::
  84
  85   dst.x = 2^{\lfloor src.x\rfloor}
  86
  87   dst.y = src.x - \lfloor src.x\rfloor
  88
  89   dst.z = 2^{src.x}
  90
  91   dst.w = 1
  92
  93
  94 .. opcode:: LOG - Approximate Logarithm Base 2
  95
  96 .. math::
  97
  98   dst.x = \lfloor\log_2{|src.x|}\rfloor
  99
 100   dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 101
 102   dst.z = \log_2{|src.x|}
 103
 104   dst.w = 1
 105
 106
 107 .. opcode:: MUL - Multiply
 108
 109 .. math::
 110
 111   dst.x = src0.x \times src1.x
 112
 113   dst.y = src0.y \times src1.y
 114
 115   dst.z = src0.z \times src1.z
 116
 117   dst.w = src0.w \times src1.w
 118
 119
 120 .. opcode:: ADD - Add
 121
 122 .. math::
 123
 124   dst.x = src0.x + src1.x
 125
 126   dst.y = src0.y + src1.y
 127
 128   dst.z = src0.z + src1.z
 129
 130   dst.w = src0.w + src1.w
 131
 132
 133 .. opcode:: DP3 - 3-component Dot Product
 134
 135 .. math::
 136
 137   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 138
 139   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 140
 141   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 142
 143   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 144
 145
 146 .. opcode:: DP4 - 4-component Dot Product
 147
 148 .. math::
 149
 150   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 151
 152   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 153
 154   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 155
 156   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 157
 158
 159 .. opcode:: DST - Distance Vector
 160
 161 .. math::
 162
 163   dst.x = 1
 164
 165   dst.y = src0.y \times src1.y
 166
 167   dst.z = src0.z
 168
 169   dst.w = src1.w
 170
 171
 172 .. opcode:: MIN - Minimum
 173
 174 .. math::
 175
 176   dst.x = min(src0.x, src1.x)
 177
 178   dst.y = min(src0.y, src1.y)
 179
 180   dst.z = min(src0.z, src1.z)
 181
 182   dst.w = min(src0.w, src1.w)
 183
 184
 185 .. opcode:: MAX - Maximum
 186
 187 .. math::
 188
 189   dst.x = max(src0.x, src1.x)
 190
 191   dst.y = max(src0.y, src1.y)
 192
 193   dst.z = max(src0.z, src1.z)
 194
 195   dst.w = max(src0.w, src1.w)
 196
 197
 198 .. opcode:: SLT - Set On Less Than
 199
 200 .. math::
 201
 202   dst.x = (src0.x < src1.x) ? 1 : 0
 203
 204   dst.y = (src0.y < src1.y) ? 1 : 0
 205
 206   dst.z = (src0.z < src1.z) ? 1 : 0
 207
 208   dst.w = (src0.w < src1.w) ? 1 : 0
 209
 210
 211 .. opcode:: SGE - Set On Greater Equal Than
 212
 213 .. math::
 214
 215   dst.x = (src0.x >= src1.x) ? 1 : 0
 216
 217   dst.y = (src0.y >= src1.y) ? 1 : 0
 218
 219   dst.z = (src0.z >= src1.z) ? 1 : 0
 220
 221   dst.w = (src0.w >= src1.w) ? 1 : 0
 222
 223
 224 .. opcode:: MAD - Multiply And Add
 225
 226 .. math::
 227
 228   dst.x = src0.x \times src1.x + src2.x
 229
 230   dst.y = src0.y \times src1.y + src2.y
 231
 232   dst.z = src0.z \times src1.z + src2.z
 233
 234   dst.w = src0.w \times src1.w + src2.w
 235
 236
 237 .. opcode:: SUB - Subtract
 238
 239 .. math::
 240
 241   dst.x = src0.x - src1.x
 242
 243   dst.y = src0.y - src1.y
 244
 245   dst.z = src0.z - src1.z
 246
 247   dst.w = src0.w - src1.w
 248
 249
 250 .. opcode:: LRP - Linear Interpolate
 251
 252 .. math::
 253
 254   dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
 255
 256   dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
 257
 258   dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
 259
 260   dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
 261
 262
 263 .. opcode:: CND - Condition
 264
 265 .. math::
 266
 267   dst.x = (src2.x > 0.5) ? src0.x : src1.x
 268
 269   dst.y = (src2.y > 0.5) ? src0.y : src1.y
 270
 271   dst.z = (src2.z > 0.5) ? src0.z : src1.z
 272
 273   dst.w = (src2.w > 0.5) ? src0.w : src1.w
 274
 275
 276 .. opcode:: DP2A - 2-component Dot Product And Add
 277
 278 .. math::
 279
 280   dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
 281
 282   dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
 283
 284   dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
 285
 286   dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
 287
 288
 289 .. opcode:: FRAC - Fraction
 290
 291 .. math::
 292
 293   dst.x = src.x - \lfloor src.x\rfloor
 294
 295   dst.y = src.y - \lfloor src.y\rfloor
 296
 297   dst.z = src.z - \lfloor src.z\rfloor
 298
 299   dst.w = src.w - \lfloor src.w\rfloor
 300
 301
 302 .. opcode:: CLAMP - Clamp
 303
 304 .. math::
 305
 306   dst.x = clamp(src0.x, src1.x, src2.x)
 307
 308   dst.y = clamp(src0.y, src1.y, src2.y)
 309
 310   dst.z = clamp(src0.z, src1.z, src2.z)
 311
 312   dst.w = clamp(src0.w, src1.w, src2.w)
 313
 314
 315 .. opcode:: FLR - Floor
 316
 317 This is identical to ARL.
 318
 319 .. math::
 320
 321   dst.x = \lfloor src.x\rfloor
 322
 323   dst.y = \lfloor src.y\rfloor
 324
 325   dst.z = \lfloor src.z\rfloor
 326
 327   dst.w = \lfloor src.w\rfloor
 328
 329
 330 .. opcode:: ROUND - Round
 331
 332 .. math::
 333
 334   dst.x = round(src.x)
 335
 336   dst.y = round(src.y)
 337
 338   dst.z = round(src.z)
 339
 340   dst.w = round(src.w)
 341
 342
 343 .. opcode:: EX2 - Exponential Base 2
 344
 345 .. math::
 346
 347   dst.x = 2^{src.x}
 348
 349   dst.y = 2^{src.x}
 350
 351   dst.z = 2^{src.x}
 352
 353   dst.w = 2^{src.x}
 354
 355
 356 .. opcode:: LG2 - Logarithm Base 2
 357
 358 .. math::
 359
 360   dst.x = \log_2{src.x}
 361
 362   dst.y = \log_2{src.x}
 363
 364   dst.z = \log_2{src.x}
 365
 366   dst.w = \log_2{src.x}
 367
 368
 369 .. opcode:: POW - Power
 370
 371 .. math::
 372
 373   dst.x = src0.x^{src1.x}
 374
 375   dst.y = src0.x^{src1.x}
 376
 377   dst.z = src0.x^{src1.x}
 378
 379   dst.w = src0.x^{src1.x}
 380
 381 .. opcode:: XPD - Cross Product
 382
 383 .. math::
 384
 385   dst.x = src0.y \times src1.z - src1.y \times src0.z
 386
 387   dst.y = src0.z \times src1.x - src1.z \times src0.x
 388
 389   dst.z = src0.x \times src1.y - src1.x \times src0.y
 390
 391   dst.w = 1
 392
 393
 394 .. opcode:: ABS - Absolute
 395
 396 .. math::
 397
 398   dst.x = |src.x|
 399
 400   dst.y = |src.y|
 401
 402   dst.z = |src.z|
 403
 404   dst.w = |src.w|
 405
 406
 407 .. opcode:: RCC - Reciprocal Clamped
 408
 409 XXX cleanup on aisle three
 410
 411 .. math::
 412
 413   dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 414
 415   dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 416
 417   dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 418
 419   dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 420
 421
 422 .. opcode:: DPH - Homogeneous Dot Product
 423
 424 .. math::
 425
 426   dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 427
 428   dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 429
 430   dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 431
 432   dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 433
 434
 435 .. opcode:: COS - Cosine
 436
 437 .. math::
 438
 439   dst.x = \cos{src.x}
 440
 441   dst.y = \cos{src.x}
 442
 443   dst.z = \cos{src.x}
 444
 445   dst.w = \cos{src.x}
 446
 447
 448 .. opcode:: DDX - Derivative Relative To X
 449
 450 .. math::
 451
 452   dst.x = partialx(src.x)
 453
 454   dst.y = partialx(src.y)
 455
 456   dst.z = partialx(src.z)
 457
 458   dst.w = partialx(src.w)
 459
 460
 461 .. opcode:: DDY - Derivative Relative To Y
 462
 463 .. math::
 464
 465   dst.x = partialy(src.x)
 466
 467   dst.y = partialy(src.y)
 468
 469   dst.z = partialy(src.z)
 470
 471   dst.w = partialy(src.w)
 472
 473
 474 .. opcode:: KILP - Predicated Discard
 475
 476   discard
 477
 478
 479 .. opcode:: PK2H - Pack Two 16-bit Floats
 480
 481   TBD
 482
 483
 484 .. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
 485
 486   TBD
 487
 488
 489 .. opcode:: PK4B - Pack Four Signed 8-bit Scalars
 490
 491   TBD
 492
 493
 494 .. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
 495
 496   TBD
 497
 498
 499 .. opcode:: RFL - Reflection Vector
 500
 501 .. math::
 502
 503   dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
 504
 505   dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
 506
 507   dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
 508
 509   dst.w = 1
 510
 511 Considered for removal.
 512
 513
 514 .. opcode:: SEQ - Set On Equal
 515
 516 .. math::
 517
 518   dst.x = (src0.x == src1.x) ? 1 : 0
 519
 520   dst.y = (src0.y == src1.y) ? 1 : 0
 521
 522   dst.z = (src0.z == src1.z) ? 1 : 0
 523
 524   dst.w = (src0.w == src1.w) ? 1 : 0
 525
 526
 527 .. opcode:: SFL - Set On False
 528
 529 .. math::
 530
 531   dst.x = 0
 532
 533   dst.y = 0
 534
 535   dst.z = 0
 536
 537   dst.w = 0
 538
 539 Considered for removal.
 540
 541 .. opcode:: SGT - Set On Greater Than
 542
 543 .. math::
 544
 545   dst.x = (src0.x > src1.x) ? 1 : 0
 546
 547   dst.y = (src0.y > src1.y) ? 1 : 0
 548
 549   dst.z = (src0.z > src1.z) ? 1 : 0
 550
 551   dst.w = (src0.w > src1.w) ? 1 : 0
 552
 553
 554 .. opcode:: SIN - Sine
 555
 556 .. math::
 557
 558   dst.x = \sin{src.x}
 559
 560   dst.y = \sin{src.x}
 561
 562   dst.z = \sin{src.x}
 563
 564   dst.w = \sin{src.x}
 565
 566
 567 .. opcode:: SLE - Set On Less Equal Than
 568
 569 .. math::
 570
 571   dst.x = (src0.x <= src1.x) ? 1 : 0
 572
 573   dst.y = (src0.y <= src1.y) ? 1 : 0
 574
 575   dst.z = (src0.z <= src1.z) ? 1 : 0
 576
 577   dst.w = (src0.w <= src1.w) ? 1 : 0
 578
 579
 580 .. opcode:: SNE - Set On Not Equal
 581
 582 .. math::
 583
 584   dst.x = (src0.x != src1.x) ? 1 : 0
 585
 586   dst.y = (src0.y != src1.y) ? 1 : 0
 587
 588   dst.z = (src0.z != src1.z) ? 1 : 0
 589
 590   dst.w = (src0.w != src1.w) ? 1 : 0
 591
 592
 593 .. opcode:: STR - Set On True
 594
 595 .. math::
 596
 597   dst.x = 1
 598
 599   dst.y = 1
 600
 601   dst.z = 1
 602
 603   dst.w = 1
 604
 605
 606 .. opcode:: TEX - Texture Lookup
 607
 608   TBD
 609
 610
 611 .. opcode:: TXD - Texture Lookup with Derivatives
 612
 613   TBD
 614
 615
 616 .. opcode:: TXP - Projective Texture Lookup
 617
 618   TBD
 619
 620
 621 .. opcode:: UP2H - Unpack Two 16-Bit Floats
 622
 623   TBD
 624
 625   Considered for removal.
 626
 627 .. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
 628
 629   TBD
 630
 631   Considered for removal.
 632
 633 .. opcode:: UP4B - Unpack Four Signed 8-Bit Values
 634
 635   TBD
 636
 637   Considered for removal.
 638
 639 .. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
 640
 641   TBD
 642
 643   Considered for removal.
 644
 645 .. opcode:: X2D - 2D Coordinate Transformation
 646
 647 .. math::
 648
 649   dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
 650
 651   dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
 652
 653   dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
 654
 655   dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
 656
 657 Considered for removal.
 658
 659
 660 From GL_NV_vertex_program2
 661 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 662
 663
 664 .. opcode:: ARA - Address Register Add
 665
 666   TBD
 667
 668   Considered for removal.
 669
 670 .. opcode:: ARR - Address Register Load With Round
 671
 672 .. math::
 673
 674   dst.x = round(src.x)
 675
 676   dst.y = round(src.y)
 677
 678   dst.z = round(src.z)
 679
 680   dst.w = round(src.w)
 681
 682
 683 .. opcode:: BRA - Branch
 684
 685   pc = target
 686
 687   Considered for removal.
 688
 689 .. opcode:: CAL - Subroutine Call
 690
 691   push(pc)
 692   pc = target
 693
 694
 695 .. opcode:: RET - Subroutine Call Return
 696
 697   pc = pop()
 698
 699   Potential restrictions:
 700   * Only occurs at end of function.
 701
 702 .. opcode:: SSG - Set Sign
 703
 704 .. math::
 705
 706   dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
 707
 708   dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
 709
 710   dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
 711
 712   dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
 713
 714
 715 .. opcode:: CMP - Compare
 716
 717 .. math::
 718
 719   dst.x = (src0.x < 0) ? src1.x : src2.x
 720
 721   dst.y = (src0.y < 0) ? src1.y : src2.y
 722
 723   dst.z = (src0.z < 0) ? src1.z : src2.z
 724
 725   dst.w = (src0.w < 0) ? src1.w : src2.w
 726
 727
 728 .. opcode:: KIL - Conditional Discard
 729
 730 .. math::
 731
 732   if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
 733     discard
 734   endif
 735
 736
 737 .. opcode:: SCS - Sine Cosine
 738
 739 .. math::
 740
 741   dst.x = \cos{src.x}
 742
 743   dst.y = \sin{src.x}
 744
 745   dst.z = 0
 746
 747   dst.y = 1
 748
 749
 750 .. opcode:: TXB - Texture Lookup With Bias
 751
 752   TBD
 753
 754
 755 .. opcode:: NRM - 3-component Vector Normalise
 756
 757 .. math::
 758
 759   dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 760
 761   dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 762
 763   dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 764
 765   dst.w = 1
 766
 767
 768 .. opcode:: DIV - Divide
 769
 770 .. math::
 771
 772   dst.x = \frac{src0.x}{src1.x}
 773
 774   dst.y = \frac{src0.y}{src1.y}
 775
 776   dst.z = \frac{src0.z}{src1.z}
 777
 778   dst.w = \frac{src0.w}{src1.w}
 779
 780
 781 .. opcode:: DP2 - 2-component Dot Product
 782
 783 .. math::
 784
 785   dst.x = src0.x \times src1.x + src0.y \times src1.y
 786
 787   dst.y = src0.x \times src1.x + src0.y \times src1.y
 788
 789   dst.z = src0.x \times src1.x + src0.y \times src1.y
 790
 791   dst.w = src0.x \times src1.x + src0.y \times src1.y
 792
 793
 794 .. opcode:: TXL - Texture Lookup With LOD
 795
 796   TBD
 797
 798
 799 .. opcode:: BRK - Break
 800
 801   TBD
 802
 803
 804 .. opcode:: IF - If
 805
 806   TBD
 807
 808
 809 .. opcode:: BGNFOR - Begin a For-Loop
 810
 811   dst.x = floor(src.x)
 812   dst.y = floor(src.y)
 813   dst.z = floor(src.z)
 814
 815   if (dst.y <= 0)
 816     pc = [matching ENDFOR] + 1
 817   endif
 818
 819   Note: The destination must be a loop register.
 820         The source must be a constant register.
 821
 822   Considered for cleanup / removal.
 823
 824
 825 .. opcode:: REP - Repeat
 826
 827   TBD
 828
 829
 830 .. opcode:: ELSE - Else
 831
 832   TBD
 833
 834
 835 .. opcode:: ENDIF - End If
 836
 837   TBD
 838
 839
 840 .. opcode:: ENDFOR - End a For-Loop
 841
 842   dst.x = dst.x + dst.z
 843   dst.y = dst.y - 1.0
 844
 845   if (dst.y > 0)
 846     pc = [matching BGNFOR instruction] + 1
 847   endif
 848
 849   Note: The destination must be a loop register.
 850
 851   Considered for cleanup / removal.
 852
 853 .. opcode:: ENDREP - End Repeat
 854
 855   TBD
 856
 857
 858 .. opcode:: PUSHA - Push Address Register On Stack
 859
 860   push(src.x)
 861   push(src.y)
 862   push(src.z)
 863   push(src.w)
 864
 865   Considered for cleanup / removal.
 866
 867 .. opcode:: POPA - Pop Address Register From Stack
 868
 869   dst.w = pop()
 870   dst.z = pop()
 871   dst.y = pop()
 872   dst.x = pop()
 873
 874   Considered for cleanup / removal.
 875
 876
 877 From GL_NV_gpu_program4
 878 ^^^^^^^^^^^^^^^^^^^^^^^^
 879
 880 Support for these opcodes indicated by a special pipe capability bit (TBD).
 881
 882 .. opcode:: CEIL - Ceiling
 883
 884 .. math::
 885
 886   dst.x = \lceil src.x\rceil
 887
 888   dst.y = \lceil src.y\rceil
 889
 890   dst.z = \lceil src.z\rceil
 891
 892   dst.w = \lceil src.w\rceil
 893
 894
 895 .. opcode:: I2F - Integer To Float
 896
 897 .. math::
 898
 899   dst.x = (float) src.x
 900
 901   dst.y = (float) src.y
 902
 903   dst.z = (float) src.z
 904
 905   dst.w = (float) src.w
 906
 907
 908 .. opcode:: NOT - Bitwise Not
 909
 910 .. math::
 911
 912   dst.x = ~src.x
 913
 914   dst.y = ~src.y
 915
 916   dst.z = ~src.z
 917
 918   dst.w = ~src.w
 919
 920
 921 .. opcode:: TRUNC - Truncate
 922
 923 .. math::
 924
 925   dst.x = trunc(src.x)
 926
 927   dst.y = trunc(src.y)
 928
 929   dst.z = trunc(src.z)
 930
 931   dst.w = trunc(src.w)
 932
 933
 934 .. opcode:: SHL - Shift Left
 935
 936 .. math::
 937
 938   dst.x = src0.x << src1.x
 939
 940   dst.y = src0.y << src1.x
 941
 942   dst.z = src0.z << src1.x
 943
 944   dst.w = src0.w << src1.x
 945
 946
 947 .. opcode:: SHR - Shift Right
 948
 949 .. math::
 950
 951   dst.x = src0.x >> src1.x
 952
 953   dst.y = src0.y >> src1.x
 954
 955   dst.z = src0.z >> src1.x
 956
 957   dst.w = src0.w >> src1.x
 958
 959
 960 .. opcode:: AND - Bitwise And
 961
 962 .. math::
 963
 964   dst.x = src0.x & src1.x
 965
 966   dst.y = src0.y & src1.y
 967
 968   dst.z = src0.z & src1.z
 969
 970   dst.w = src0.w & src1.w
 971
 972
 973 .. opcode:: OR - Bitwise Or
 974
 975 .. math::
 976
 977   dst.x = src0.x | src1.x
 978
 979   dst.y = src0.y | src1.y
 980
 981   dst.z = src0.z | src1.z
 982
 983   dst.w = src0.w | src1.w
 984
 985
 986 .. opcode:: MOD - Modulus
 987
 988 .. math::
 989
 990   dst.x = src0.x \bmod src1.x
 991
 992   dst.y = src0.y \bmod src1.y
 993
 994   dst.z = src0.z \bmod src1.z
 995
 996   dst.w = src0.w \bmod src1.w
 997
 998
 999 .. opcode:: XOR - Bitwise Xor
1000
1001 .. math::
1002
1003   dst.x = src0.x \oplus src1.x
1004
1005   dst.y = src0.y \oplus src1.y
1006
1007   dst.z = src0.z \oplus src1.z
1008
1009   dst.w = src0.w \oplus src1.w
1010
1011
1012 .. opcode:: SAD - Sum Of Absolute Differences
1013
1014 .. math::
1015
1016   dst.x = |src0.x - src1.x| + src2.x
1017
1018   dst.y = |src0.y - src1.y| + src2.y
1019
1020   dst.z = |src0.z - src1.z| + src2.z
1021
1022   dst.w = |src0.w - src1.w| + src2.w
1023
1024
1025 .. opcode:: TXF - Texel Fetch
1026
1027   TBD
1028
1029
1030 .. opcode:: TXQ - Texture Size Query
1031
1032   TBD
1033
1034
1035 .. opcode:: CONT - Continue
1036
1037   TBD
1038
1039
1040 From GL_NV_geometry_program4
1041 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1042
1043
1044 .. opcode:: EMIT - Emit
1045
1046   TBD
1047
1048
1049 .. opcode:: ENDPRIM - End Primitive
1050
1051   TBD
1052
1053
1054 From GLSL
1055 ^^^^^^^^^^
1056
1057
1058 .. opcode:: BGNLOOP - Begin a Loop
1059
1060   TBD
1061
1062
1063 .. opcode:: BGNSUB - Begin Subroutine
1064
1065   TBD
1066
1067
1068 .. opcode:: ENDLOOP - End a Loop
1069
1070   TBD
1071
1072
1073 .. opcode:: ENDSUB - End Subroutine
1074
1075   TBD
1076
1077
1078 .. opcode:: NOP - No Operation
1079
1080   Do nothing.
1081
1082
1083 .. opcode:: NRM4 - 4-component Vector Normalise
1084
1085 .. math::
1086
1087   dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1088
1089   dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1090
1091   dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1092
1093   dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1094
1095
1096 ps_2_x
1097 ^^^^^^^^^^^^
1098
1099
1100 .. opcode:: CALLNZ - Subroutine Call If Not Zero
1101
1102   TBD
1103
1104
1105 .. opcode:: IFC - If
1106
1107   TBD
1108
1109
1110 .. opcode:: BREAKC - Break Conditional
1111
1112   TBD
1113
1114 Double Opcodes
1115 ^^^^^^^^^^^^^^^
1116
1117 .. opcode:: DADD - Add Double
1118
1119 .. math::
1120
1121   dst.xy = src0.xy + src1.xy
1122
1123   dst.zw = src0.zw + src1.zw
1124
1125
1126 .. opcode:: DDIV - Divide Double
1127
1128 .. math::
1129
1130   dst.xy = src0.xy / src1.xy
1131
1132   dst.zw = src0.zw / src1.zw
1133
1134 .. opcode:: DSEQ - Set Double on Equal
1135
1136 .. math::
1137
1138   dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
1139
1140   dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
1141
1142 .. opcode:: DSLT - Set Double on Less than
1143
1144 .. math::
1145
1146   dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
1147
1148   dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
1149
1150 .. opcode:: DFRAC - Double Fraction
1151
1152 .. math::
1153
1154   dst.xy = src.xy - \lfloor src.xy\rfloor
1155
1156   dst.zw = src.zw - \lfloor src.zw\rfloor
1157
1158
1159 .. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
1160
1161 .. math::
1162
1163   dst0.xy = frexp(src.xy, dst1.xy)
1164
1165   dst0.zw = frexp(src.zw, dst1.zw)
1166
1167 .. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
1168
1169 .. math::
1170
1171   dst.xy = ldexp(src0.xy, src1.xy)
1172
1173   dst.zw = ldexp(src0.zw, src1.zw)
1174
1175 .. opcode:: DMIN - Minimum Double
1176
1177 .. math::
1178
1179   dst.xy = min(src0.xy, src1.xy)
1180
1181   dst.zw = min(src0.zw, src1.zw)
1182
1183 .. opcode:: DMAX - Maximum Double
1184
1185 .. math::
1186
1187   dst.xy = max(src0.xy, src1.xy)
1188
1189   dst.zw = max(src0.zw, src1.zw)
1190
1191 .. opcode:: DMUL - Multiply Double
1192
1193 .. math::
1194
1195   dst.xy = src0.xy \times src1.xy
1196
1197   dst.zw = src0.zw \times src1.zw
1198
1199
1200 .. opcode:: DMAD - Multiply And Add Doubles
1201
1202 .. math::
1203
1204   dst.xy = src0.xy \times src1.xy + src2.xy
1205
1206   dst.zw = src0.zw \times src1.zw + src2.zw
1207
1208
1209 .. opcode:: DRCP - Reciprocal Double
1210
1211 .. math::
1212
1213    dst.xy = \frac{1}{src.xy}
1214
1215    dst.zw = \frac{1}{src.zw}
1216
1217 .. opcode:: DSQRT - Square root double
1218
1219 .. math::
1220
1221    dst.xy = \sqrt{src.xy}
1222
1223    dst.zw = \sqrt{src.zw}
1224
1225
1226 Explanation of symbols used
1227 ------------------------------
1228
1229
1230 Functions
1231 ^^^^^^^^^^^^^^
1232
1233
1234   :math:`|x|`       Absolute value of `x`.
1235
1236   :math:`\lceil x \rceil` Ceiling of `x`.
1237
1238   clamp(x,y,z)      Clamp x between y and z.
1239                     (x < y) ? y : (x > z) ? z : x
1240
1241   :math:`\lfloor x\rfloor` Floor of `x`.
1242
1243   :math:`\log_2{x}` Logarithm of `x`, base 2.
1244
1245   max(x,y)          Maximum of x and y.
1246                     (x > y) ? x : y
1247
1248   min(x,y)          Minimum of x and y.
1249                     (x < y) ? x : y
1250
1251   partialx(x)       Derivative of x relative to fragment's X.
1252
1253   partialy(x)       Derivative of x relative to fragment's Y.
1254
1255   pop()             Pop from stack.
1256
1257   :math:`x^y`       `x` to the power `y`.
1258
1259   push(x)           Push x on stack.
1260
1261   round(x)          Round x.
1262
1263   trunc(x)          Truncate x, i.e. drop the fraction bits.
1264
1265
1266 Keywords
1267 ^^^^^^^^^^^^^
1268
1269
1270   discard           Discard fragment.
1271
1272   dst               First destination register.
1273
1274   dst0              First destination register.
1275
1276   pc                Program counter.
1277
1278   src               First source register.
1279
1280   src0              First source register.
1281
1282   src1              Second source register.
1283
1284   src2              Third source register.
1285
1286   target            Label of target instruction.
1287
1288
1289 Other tokens
1290 ---------------
1291
1292
1293 Declaration Semantic
1294 ^^^^^^^^^^^^^^^^^^^^^^^^
1295
1296
1297   Follows Declaration token if Semantic bit is set.
1298
1299   Since its purpose is to link a shader with other stages of the pipeline,
1300   it is valid to follow only those Declaration tokens that declare a register
1301   either in INPUT or OUTPUT file.
1302
1303   SemanticName field contains the semantic name of the register being declared.
1304   There is no default value.
1305
1306   SemanticIndex is an optional subscript that can be used to distinguish
1307   different register declarations with the same semantic name. The default value
1308   is 0.
1309
1310   The meanings of the individual semantic names are explained in the following
1311   sections.
1312
1313 TGSI_SEMANTIC_POSITION
1314 """"""""""""""""""""""
1315
1316 Position, sometimes known as HPOS or WPOS for historical reasons, is the
1317 location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z``
1318 are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
1319 for the perspective divide, if enabled.
1320
1321 As a vertex shader output, position should be scaled to the viewport. When
1322 used in fragment shaders, position will be in window coordinates. The convention
1323 used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
1324
1325 XXX additionally, is there a way to configure the perspective divide? it's
1326 accelerated on most chipsets AFAIK...
1327
1328 Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can
1329 be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``.
1330
1331 XXX usually? can we solidify that?
1332
1333 TGSI_SEMANTIC_COLOR
1334 """""""""""""""""""
1335
1336 Colors are used to, well, color the primitives. Colors are always in
1337 ``(r, g, b, a)`` format.
1338
1339 If alpha is not specified, it defaults to 1.
1340
1341 TGSI_SEMANTIC_BCOLOR
1342 """"""""""""""""""""
1343
1344 Back-facing colors are only used for back-facing polygons, and are only valid
1345 in vertex shader outputs. After rasterization, all polygons are front-facing
1346 and COLOR and BCOLOR end up occupying the same slots in the fragment, so
1347 all BCOLORs effectively become regular COLORs in the fragment shader.
1348
1349 TGSI_SEMANTIC_FOG
1350 """""""""""""""""
1351
1352 The fog coordinate historically has been used to replace the depth coordinate
1353 for generation of fog in dedicated fog blocks. Gallium, however, does not use
1354 dedicated fog acceleration, placing it entirely in the fragment shader
1355 instead.
1356
1357 The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first
1358 component matters when writing from the vertex shader; the driver will ensure
1359 that the coordinate is in this format when used as a fragment shader input.
1360
1361 TGSI_SEMANTIC_PSIZE
1362 """""""""""""""""""
1363
1364 PSIZE, or point size, is used to specify point sizes per-vertex. It should
1365 be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum
1366 size, ``x`` is the maximum size, and ``f`` is the fade threshold.
1367
1368 XXX this is arb_vp. is this what we actually do? should double-check...
1369
1370 When using this semantic, be sure to set the appropriate state in the
1371 :ref:`rasterizer` first.
1372
1373 TGSI_SEMANTIC_GENERIC
1374 """""""""""""""""""""
1375
1376 Generic semantics are nearly always used for texture coordinate attributes,
1377 in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds
1378 of lookups, and ``q`` is the level-of-detail bias for biased sampling.
1379
1380 These attributes are called "generic" because they may be used for anything
1381 else, including parameters, texture generation information, or anything that
1382 can be stored inside a four-component vector.
1383
1384 TGSI_SEMANTIC_NORMAL
1385 """"""""""""""""""""
1386
1387 Vertex normal; could be used to implement per-pixel lighting for legacy APIs
1388 that allow mixing fixed-function and programmable stages.
1389
1390 TGSI_SEMANTIC_FACE
1391 """"""""""""""""""
1392
1393 FACE is the facing bit, to store the facing information for the fragment
1394 shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive
1395 when the fragment is front-facing, and negative when the component is
1396 back-facing.
1397
1398 TGSI_SEMANTIC_EDGEFLAG
1399 """"""""""""""""""""""
1400
1401 XXX no clue
1402
1403
1404 Properties
1405 ^^^^^^^^^^^^^^^^^^^^^^^^
1406
1407
1408   Properties are general directives that apply to the whole TGSI program.
1409
1410 FS_COORD_ORIGIN
1411 """""""""""""""
1412
1413 Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
1414 The default value is UPPER_LEFT.
1415
1416 If UPPER_LEFT, the position will be (0,0) at the upper left corner and
1417 increase downward and rightward.
1418 If LOWER_LEFT, the position will be (0,0) at the lower left corner and
1419 increase upward and rightward.
1420
1421 OpenGL defaults to LOWER_LEFT, and is configurable with the
1422 GL_ARB_fragment_coord_conventions extension.
1423
1424 DirectX 9/10 use UPPER_LEFT.
1425
1426 FS_COORD_PIXEL_CENTER
1427 """""""""""""""""""""
1428
1429 Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
1430 The default value is HALF_INTEGER.
1431
1432 If HALF_INTEGER, the fractionary part of the position will be 0.5
1433 If INTEGER, the fractionary part of the position will be 0.0
1434
1435 Note that this does not affect the set of fragments generated by
1436 rasterization, which is instead controlled by gl_rasterization_rules in the
1437 rasterizer.
1438
1439 OpenGL defaults to HALF_INTEGER, and is configurable with the
1440 GL_ARB_fragment_coord_conventions extension.
1441
1442 DirectX 9 uses INTEGER.
1443 DirectX 10 uses HALF_INTEGER.
1444
1445
1446
1447 Texture Sampling and Texture Formats
1448 ------------------------------------
1449
1450 This table shows how texture image components are returned as (x,y,z,w)
1451 tuples by TGSI texture instructions, such as TEX, TXD, and TXP.
1452 For reference, OpenGL and Direct3D conventions are shown as well.
1453
1454 +--------------------+--------------+--------------------+--------------+
1455 | Texture Components | Gallium      | OpenGL             | Direct3D 9   |
1456 +====================+==============+====================+==============+
1457 | R                  | XXX TBD      | (r, 0, 0, 1)       | (r, 1, 1, 1) |
1458 +--------------------+--------------+--------------------+--------------+
1459 | RG                 | XXX TBD      | (r, g, 0, 1)       | (r, g, 1, 1) |
1460 +--------------------+--------------+--------------------+--------------+
1461 | RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
1462 +--------------------+--------------+--------------------+--------------+
1463 | RGBA               | (r, g, b, a) | (r, g, b, a)       | (r, g, b, a) |
1464 +--------------------+--------------+--------------------+--------------+
1465 | A                  | (0, 0, 0, a) | (0, 0, 0, a)       | (0, 0, 0, a) |
1466 +--------------------+--------------+--------------------+--------------+
1467 | L                  | (l, l, l, 1) | (l, l, l, 1)       | (l, l, l, 1) |
1468 +--------------------+--------------+--------------------+--------------+
1469 | LA                 | (l, l, l, a) | (l, l, l, a)       | (l, l, l, a) |
1470 +--------------------+--------------+--------------------+--------------+
1471 | I                  | (i, i, i, i) | (i, i, i, i)       | N/A          |
1472 +--------------------+--------------+--------------------+--------------+
1473 | UV                 | XXX TBD      | (0, 0, 0, 1)       | (u, v, 1, 1) |
1474 |                    |              | [#envmap-bumpmap]_ |              |
1475 +--------------------+--------------+--------------------+--------------+
1476 | Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
1477 |                    |              | [#depth-tex-mode]_ |              |
1478 +--------------------+--------------+--------------------+--------------+
1479
1480 .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
1481 .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
1482  or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.