4 TGSI, Tungsten Graphics Shader Instructions, is an intermediate language
5 for describing shaders. Since Gallium is inherently shaderful, shaders are
6 an important part of the API. TGSI is the only intermediate representation
9 From GL_NV_vertex_program
10 -------------------------
13 ARL - Address Register Load
17 dst.x = \lfloor src.x\rfloor
19 dst.y = \lfloor src.y\rfloor
21 dst.z = \lfloor src.z\rfloor
23 dst.w = \lfloor src.w\rfloor
39 LIT - Light Coefficients
45 dst.y = max(src.x, 0.0)
47 dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
65 RSQ - Reciprocal Square Root
69 dst.x = 1.0 / \sqrt{abs(src.x)}
71 dst.y = 1.0 / \sqrt{abs(src.x)}
73 dst.z = 1.0 / \sqrt{abs(src.x)}
75 dst.w = 1.0 / \sqrt{abs(src.x)}
78 EXP - Approximate Exponential Base 2
82 dst.x = 2^{\lfloor src.x\rfloor}
84 dst.y = src.x - \lfloor src.x\rfloor
91 LOG - Approximate Logarithm Base 2
95 dst.x = \lfloor lg2(abs(src.x)))\rfloor
97 dst.y = abs(src.x) / 2^{\lfloor lg2(abs(src.x))\rfloor}
99 dst.z = lg2(abs(src.x))
108 dst.x = src0.x * src1.x
110 dst.y = src0.y * src1.y
112 dst.z = src0.z * src1.z
114 dst.w = src0.w * src1.w
121 dst.x = src0.x + src1.x
123 dst.y = src0.y + src1.y
125 dst.z = src0.z + src1.z
127 dst.w = src0.w + src1.w
130 DP3 - 3-component Dot Product
134 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
136 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
138 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
140 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
143 DP4 - 4-component Dot Product
147 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
149 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
151 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
153 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
156 DST - Distance Vector
162 dst.y = src0.y * src1.y
173 dst.x = min(src0.x, src1.x)
175 dst.y = min(src0.y, src1.y)
177 dst.z = min(src0.z, src1.z)
179 dst.w = min(src0.w, src1.w)
186 dst.x = max(src0.x, src1.x)
188 dst.y = max(src0.y, src1.y)
190 dst.z = max(src0.z, src1.z)
192 dst.w = max(src0.w, src1.w)
195 SLT - Set On Less Than
199 dst.x = (src0.x < src1.x) ? 1.0 : 0.0
201 dst.y = (src0.y < src1.y) ? 1.0 : 0.0
203 dst.z = (src0.z < src1.z) ? 1.0 : 0.0
205 dst.w = (src0.w < src1.w) ? 1.0 : 0.0
208 SGE - Set On Greater Equal Than
212 dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
214 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
216 dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
218 dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
221 MAD - Multiply And Add
225 dst.x = src0.x * src1.x + src2.x
227 dst.y = src0.y * src1.y + src2.y
229 dst.z = src0.z * src1.z + src2.z
231 dst.w = src0.w * src1.w + src2.w
238 dst.x = src0.x - src1.x
240 dst.y = src0.y - src1.y
242 dst.z = src0.z - src1.z
244 dst.w = src0.w - src1.w
247 LRP - Linear Interpolate
251 dst.x = src0.x * (src1.x - src2.x) + src2.x
253 dst.y = src0.y * (src1.y - src2.y) + src2.y
255 dst.z = src0.z * (src1.z - src2.z) + src2.z
257 dst.w = src0.w * (src1.w - src2.w) + src2.w
264 dst.x = (src2.x > 0.5) ? src0.x : src1.x
266 dst.y = (src2.y > 0.5) ? src0.y : src1.y
268 dst.z = (src2.z > 0.5) ? src0.z : src1.z
270 dst.w = (src2.w > 0.5) ? src0.w : src1.w
273 DP2A - 2-component Dot Product And Add
277 dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
279 dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
281 dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
283 dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
290 dst.x = src.x - \lfloor src.x\rfloor
292 dst.y = src.y - \lfloor src.y\rfloor
294 dst.z = src.z - \lfloor src.z\rfloor
296 dst.w = src.w - \lfloor src.w\rfloor
303 dst.x = clamp(src0.x, src1.x, src2.x)
304 dst.y = clamp(src0.y, src1.y, src2.y)
305 dst.z = clamp(src0.z, src1.z, src2.z)
306 dst.w = clamp(src0.w, src1.w, src2.w)
311 This is identical to ARL.
315 dst.x = \lfloor src.x\rfloor
317 dst.y = \lfloor src.y\rfloor
319 dst.z = \lfloor src.z\rfloor
321 dst.w = \lfloor src.w\rfloor
334 EX2 - Exponential Base 2
347 1.3.11 LG2 - Logarithm Base 2
361 dst.x = src0.x^{src1.x}
363 dst.y = src0.x^{src1.x}
365 dst.z = src0.x^{src1.x}
367 dst.w = src0.x^{src1.x}
369 1.3.15 XPD - Cross Product
373 dst.x = src0.y * src1.z - src1.y * src0.z
374 dst.y = src0.z * src1.x - src1.z * src0.x
375 dst.z = src0.x * src1.y - src1.x * src0.y
389 1.4.2 RCC - Reciprocal Clamped
393 dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
394 dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
395 dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
396 dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
399 1.4.3 DPH - Homogeneous Dot Product
403 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
404 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
405 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
406 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
422 1.5.2 DDX - Derivative Relative To X
426 dst.x = partialx(src.x)
427 dst.y = partialx(src.y)
428 dst.z = partialx(src.z)
429 dst.w = partialx(src.w)
432 1.5.3 DDY - Derivative Relative To Y
436 dst.x = partialy(src.x)
437 dst.y = partialy(src.y)
438 dst.z = partialy(src.z)
439 dst.w = partialy(src.w)
442 1.5.7 KILP - Predicated Discard
449 1.5.10 PK2H - Pack Two 16-bit Floats
454 1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
459 1.5.12 PK4B - Pack Four Signed 8-bit Scalars
464 1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
469 1.5.15 RFL - Reflection Vector
473 dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
474 dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
475 dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
478 Considered for removal.
481 1.5.16 SEQ - Set On Equal
485 dst.x = (src0.x == src1.x) ? 1.0 : 0.0
486 dst.y = (src0.y == src1.y) ? 1.0 : 0.0
487 dst.z = (src0.z == src1.z) ? 1.0 : 0.0
488 dst.w = (src0.w == src1.w) ? 1.0 : 0.0
491 1.5.17 SFL - Set On False
500 Considered for removal.
502 1.5.18 SGT - Set On Greater Than
506 dst.x = (src0.x > src1.x) ? 1.0 : 0.0
507 dst.y = (src0.y > src1.y) ? 1.0 : 0.0
508 dst.z = (src0.z > src1.z) ? 1.0 : 0.0
509 dst.w = (src0.w > src1.w) ? 1.0 : 0.0
525 1.5.20 SLE - Set On Less Equal Than
529 dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
530 dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
531 dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
532 dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
535 1.5.21 SNE - Set On Not Equal
539 dst.x = (src0.x != src1.x) ? 1.0 : 0.0
540 dst.y = (src0.y != src1.y) ? 1.0 : 0.0
541 dst.z = (src0.z != src1.z) ? 1.0 : 0.0
542 dst.w = (src0.w != src1.w) ? 1.0 : 0.0
545 1.5.22 STR - Set On True
555 1.5.23 TEX - Texture Lookup
560 1.5.24 TXD - Texture Lookup with Derivatives
565 1.5.25 TXP - Projective Texture Lookup
570 1.5.26 UP2H - Unpack Two 16-Bit Floats
574 Considered for removal.
576 1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
580 Considered for removal.
582 1.5.28 UP4B - Unpack Four Signed 8-Bit Values
586 Considered for removal.
588 1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
592 Considered for removal.
594 1.5.30 X2D - 2D Coordinate Transformation
598 dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
599 dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
600 dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
601 dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
603 Considered for removal.
606 1.6 GL_NV_vertex_program2
607 --------------------------
610 1.6.1 ARA - Address Register Add
614 Considered for removal.
616 1.6.2 ARR - Address Register Load With Round
630 Considered for removal.
632 1.6.4 CAL - Subroutine Call
638 1.6.5 RET - Subroutine Call Return
642 Potential restrictions:
643 * Only occurs at end of function.
649 dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
650 dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
651 dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
652 dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
659 dst.x = (src0.x < 0.0) ? src1.x : src2.x
660 dst.y = (src0.y < 0.0) ? src1.y : src2.y
661 dst.z = (src0.z < 0.0) ? src1.z : src2.z
662 dst.w = (src0.w < 0.0) ? src1.w : src2.w
665 1.8.2 KIL - Conditional Discard
669 if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
687 1.8.4 TXB - Texture Lookup With Bias
692 1.9.1 NRM - 3-component Vector Normalise
696 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
697 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
698 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
706 dst.x = src0.x / src1.x
707 dst.y = src0.y / src1.y
708 dst.z = src0.z / src1.z
709 dst.w = src0.w / src1.w
712 1.9.3 DP2 - 2-component Dot Product
716 dst.x = src0.x * src1.x + src0.y * src1.y
717 dst.y = src0.x * src1.x + src0.y * src1.y
718 dst.z = src0.x * src1.x + src0.y * src1.y
719 dst.w = src0.x * src1.x + src0.y * src1.y
722 1.9.5 TXL - Texture Lookup With LOD
737 1.9.8 BGNFOR - Begin a For-Loop
744 pc = [matching ENDFOR] + 1
747 Note: The destination must be a loop register.
748 The source must be a constant register.
750 Considered for cleanup / removal.
763 1.9.11 ENDIF - End If
768 1.9.12 ENDFOR - End a For-Loop
770 dst.x = dst.x + dst.z
774 pc = [matching BGNFOR instruction] + 1
777 Note: The destination must be a loop register.
779 Considered for cleanup / removal.
781 1.9.13 ENDREP - End Repeat
786 1.10.1 PUSHA - Push Address Register On Stack
793 Considered for cleanup / removal.
795 1.10.2 POPA - Pop Address Register From Stack
802 Considered for cleanup / removal.
805 1.11 GL_NV_gpu_program4
806 ------------------------
808 Support for these opcodes indicated by a special pipe capability bit (TBD).
810 1.11.1 CEIL - Ceiling
820 1.11.2 I2F - Integer To Float
824 dst.x = (float) src.x
825 dst.y = (float) src.y
826 dst.z = (float) src.z
827 dst.w = (float) src.w
830 1.11.3 NOT - Bitwise Not
840 1.11.4 TRUNC - Truncate
850 1.11.5 SHL - Shift Left
854 dst.x = src0.x << src1.x
855 dst.y = src0.y << src1.x
856 dst.z = src0.z << src1.x
857 dst.w = src0.w << src1.x
860 1.11.6 SHR - Shift Right
864 dst.x = src0.x >> src1.x
865 dst.y = src0.y >> src1.x
866 dst.z = src0.z >> src1.x
867 dst.w = src0.w >> src1.x
870 1.11.7 AND - Bitwise And
874 dst.x = src0.x & src1.x
875 dst.y = src0.y & src1.y
876 dst.z = src0.z & src1.z
877 dst.w = src0.w & src1.w
880 1.11.8 OR - Bitwise Or
884 dst.x = src0.x | src1.x
885 dst.y = src0.y | src1.y
886 dst.z = src0.z | src1.z
887 dst.w = src0.w | src1.w
894 dst.x = src0.x % src1.x
895 dst.y = src0.y % src1.y
896 dst.z = src0.z % src1.z
897 dst.w = src0.w % src1.w
900 1.11.10 XOR - Bitwise Xor
904 dst.x = src0.x ^ src1.x
905 dst.y = src0.y ^ src1.y
906 dst.z = src0.z ^ src1.z
907 dst.w = src0.w ^ src1.w
910 1.11.11 SAD - Sum Of Absolute Differences
914 dst.x = abs(src0.x - src1.x) + src2.x
915 dst.y = abs(src0.y - src1.y) + src2.y
916 dst.z = abs(src0.z - src1.z) + src2.z
917 dst.w = abs(src0.w - src1.w) + src2.w
920 1.11.12 TXF - Texel Fetch
925 1.11.13 TXQ - Texture Size Query
930 1.11.14 CONT - Continue
935 1.12 GL_NV_geometry_program4
936 -----------------------------
944 1.12.2 ENDPRIM - End Primitive
953 1.13.1 BGNLOOP - Begin a Loop
958 1.13.2 BGNSUB - Begin Subroutine
963 1.13.3 ENDLOOP - End a Loop
968 1.13.4 ENDSUB - End Subroutine
974 1.13.10 NOP - No Operation
980 1.16.7 NRM4 - 4-component Vector Normalise
984 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
985 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
986 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
987 dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
994 1.17.2 CALLNZ - Subroutine Call If Not Zero
1004 1.17.5 BREAKC - Break Conditional
1009 2 Explanation of symbols used
1010 ==============================
1017 abs(x) Absolute value of x.
1020 ceil(x) Ceiling of x.
1022 clamp(x,y,z) Clamp x between y and z.
1023 (x < y) ? y : (x > z) ? z : x
1025 :math:`\lfloor x\rfloor` Floor of `x`.
1027 lg2(x) Logarithm base 2 of x.
1029 max(x,y) Maximum of x and y.
1032 min(x,y) Minimum of x and y.
1035 partialx(x) Derivative of x relative to fragment's X.
1037 partialy(x) Derivative of x relative to fragment's Y.
1039 pop() Pop from stack.
1041 :math:`x^y` `x` to the power `y`.
1043 push(x) Push x on stack.
1047 trunc(x) Truncate x.
1054 discard Discard fragment.
1056 dst First destination register.
1058 dst0 First destination register.
1062 src First source register.
1064 src0 First source register.
1066 src1 Second source register.
1068 src2 Third source register.
1070 target Label of target instruction.
1077 3.1 Declaration Semantic
1078 -------------------------
1081 Follows Declaration token if Semantic bit is set.
1083 Since its purpose is to link a shader with other stages of the pipeline,
1084 it is valid to follow only those Declaration tokens that declare a register
1085 either in INPUT or OUTPUT file.
1087 SemanticName field contains the semantic name of the register being declared.
1088 There is no default value.
1090 SemanticIndex is an optional subscript that can be used to distinguish
1091 different register declarations with the same semantic name. The default value
1094 The meanings of the individual semantic names are explained in the following
1100 Valid only in a fragment shader INPUT declaration.
1102 FACE.x is negative when the primitive is back facing. FACE.x is positive
1103 when the primitive is front facing.