4 TGSI, Tungsten Graphics Shader Instructions, is an intermediate language
5 for describing shaders. Since Gallium is inherently shaderful, shaders are
6 an important part of the API. TGSI is the only intermediate representation
12 From GL_NV_vertex_program
13 ^^^^^^^^^^^^^^^^^^^^^^^^^
16 ARL - Address Register Load
20 dst.x = \lfloor src.x\rfloor
22 dst.y = \lfloor src.y\rfloor
24 dst.z = \lfloor src.z\rfloor
26 dst.w = \lfloor src.w\rfloor
42 LIT - Light Coefficients
50 dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
59 dst.x = \frac{1}{src.x}
61 dst.y = \frac{1}{src.x}
63 dst.z = \frac{1}{src.x}
65 dst.w = \frac{1}{src.x}
68 RSQ - Reciprocal Square Root
72 dst.x = \frac{1}{\sqrt{|src.x|}}
74 dst.y = \frac{1}{\sqrt{|src.x|}}
76 dst.z = \frac{1}{\sqrt{|src.x|}}
78 dst.w = \frac{1}{\sqrt{|src.x|}}
81 EXP - Approximate Exponential Base 2
85 dst.x = 2^{\lfloor src.x\rfloor}
87 dst.y = src.x - \lfloor src.x\rfloor
94 LOG - Approximate Logarithm Base 2
98 dst.x = \lfloor\log_2{|src.x|}\rfloor
100 dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
102 dst.z = \log_2{|src.x|}
111 dst.x = src0.x \times src1.x
113 dst.y = src0.y \times src1.y
115 dst.z = src0.z \times src1.z
117 dst.w = src0.w \times src1.w
124 dst.x = src0.x + src1.x
126 dst.y = src0.y + src1.y
128 dst.z = src0.z + src1.z
130 dst.w = src0.w + src1.w
133 DP3 - 3-component Dot Product
137 dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
139 dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
141 dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
143 dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
146 DP4 - 4-component Dot Product
150 dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
152 dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
154 dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
156 dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
159 DST - Distance Vector
165 dst.y = src0.y \times src1.y
176 dst.x = min(src0.x, src1.x)
178 dst.y = min(src0.y, src1.y)
180 dst.z = min(src0.z, src1.z)
182 dst.w = min(src0.w, src1.w)
189 dst.x = max(src0.x, src1.x)
191 dst.y = max(src0.y, src1.y)
193 dst.z = max(src0.z, src1.z)
195 dst.w = max(src0.w, src1.w)
198 SLT - Set On Less Than
202 dst.x = (src0.x < src1.x) ? 1 : 0
204 dst.y = (src0.y < src1.y) ? 1 : 0
206 dst.z = (src0.z < src1.z) ? 1 : 0
208 dst.w = (src0.w < src1.w) ? 1 : 0
211 SGE - Set On Greater Equal Than
215 dst.x = (src0.x >= src1.x) ? 1 : 0
217 dst.y = (src0.y >= src1.y) ? 1 : 0
219 dst.z = (src0.z >= src1.z) ? 1 : 0
221 dst.w = (src0.w >= src1.w) ? 1 : 0
224 MAD - Multiply And Add
228 dst.x = src0.x \times src1.x + src2.x
230 dst.y = src0.y \times src1.y + src2.y
232 dst.z = src0.z \times src1.z + src2.z
234 dst.w = src0.w \times src1.w + src2.w
241 dst.x = src0.x - src1.x
243 dst.y = src0.y - src1.y
245 dst.z = src0.z - src1.z
247 dst.w = src0.w - src1.w
250 LRP - Linear Interpolate
254 dst.x = src0.x \times (src1.x - src2.x) + src2.x
256 dst.y = src0.y \times (src1.y - src2.y) + src2.y
258 dst.z = src0.z \times (src1.z - src2.z) + src2.z
260 dst.w = src0.w \times (src1.w - src2.w) + src2.w
267 dst.x = (src2.x > 0.5) ? src0.x : src1.x
269 dst.y = (src2.y > 0.5) ? src0.y : src1.y
271 dst.z = (src2.z > 0.5) ? src0.z : src1.z
273 dst.w = (src2.w > 0.5) ? src0.w : src1.w
276 DP2A - 2-component Dot Product And Add
280 dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
282 dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
284 dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
286 dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
293 dst.x = src.x - \lfloor src.x\rfloor
295 dst.y = src.y - \lfloor src.y\rfloor
297 dst.z = src.z - \lfloor src.z\rfloor
299 dst.w = src.w - \lfloor src.w\rfloor
306 dst.x = clamp(src0.x, src1.x, src2.x)
308 dst.y = clamp(src0.y, src1.y, src2.y)
310 dst.z = clamp(src0.z, src1.z, src2.z)
312 dst.w = clamp(src0.w, src1.w, src2.w)
317 This is identical to ARL.
321 dst.x = \lfloor src.x\rfloor
323 dst.y = \lfloor src.y\rfloor
325 dst.z = \lfloor src.z\rfloor
327 dst.w = \lfloor src.w\rfloor
343 EX2 - Exponential Base 2
356 LG2 - Logarithm Base 2
360 dst.x = \log_2{src.x}
362 dst.y = \log_2{src.x}
364 dst.z = \log_2{src.x}
366 dst.w = \log_2{src.x}
373 dst.x = src0.x^{src1.x}
375 dst.y = src0.x^{src1.x}
377 dst.z = src0.x^{src1.x}
379 dst.w = src0.x^{src1.x}
385 dst.x = src0.y \times src1.z - src1.y \times src0.z
387 dst.y = src0.z \times src1.x - src1.z \times src0.x
389 dst.z = src0.x \times src1.y - src1.x \times src0.y
407 RCC - Reciprocal Clamped
409 XXX cleanup on aisle three
413 dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
415 dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
417 dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
419 dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
422 DPH - Homogeneous Dot Product
426 dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
428 dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
430 dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
432 dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
448 DDX - Derivative Relative To X
452 dst.x = partialx(src.x)
454 dst.y = partialx(src.y)
456 dst.z = partialx(src.z)
458 dst.w = partialx(src.w)
461 DDY - Derivative Relative To Y
465 dst.x = partialy(src.x)
467 dst.y = partialy(src.y)
469 dst.z = partialy(src.z)
471 dst.w = partialy(src.w)
474 KILP - Predicated Discard
479 PK2H - Pack Two 16-bit Floats
484 PK2US - Pack Two Unsigned 16-bit Scalars
489 PK4B - Pack Four Signed 8-bit Scalars
494 PK4UB - Pack Four Unsigned 8-bit Scalars
499 RFL - Reflection Vector
503 dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
505 dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
507 dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
511 Considered for removal.
518 dst.x = (src0.x == src1.x) ? 1 : 0
519 dst.y = (src0.y == src1.y) ? 1 : 0
520 dst.z = (src0.z == src1.z) ? 1 : 0
521 dst.w = (src0.w == src1.w) ? 1 : 0
533 Considered for removal.
535 SGT - Set On Greater Than
539 dst.x = (src0.x > src1.x) ? 1 : 0
540 dst.y = (src0.y > src1.y) ? 1 : 0
541 dst.z = (src0.z > src1.z) ? 1 : 0
542 dst.w = (src0.w > src1.w) ? 1 : 0
558 SLE - Set On Less Equal Than
562 dst.x = (src0.x <= src1.x) ? 1 : 0
563 dst.y = (src0.y <= src1.y) ? 1 : 0
564 dst.z = (src0.z <= src1.z) ? 1 : 0
565 dst.w = (src0.w <= src1.w) ? 1 : 0
568 SNE - Set On Not Equal
572 dst.x = (src0.x != src1.x) ? 1 : 0
573 dst.y = (src0.y != src1.y) ? 1 : 0
574 dst.z = (src0.z != src1.z) ? 1 : 0
575 dst.w = (src0.w != src1.w) ? 1 : 0
593 TXD - Texture Lookup with Derivatives
598 TXP - Projective Texture Lookup
603 UP2H - Unpack Two 16-Bit Floats
607 Considered for removal.
609 UP2US - Unpack Two Unsigned 16-Bit Scalars
613 Considered for removal.
615 UP4B - Unpack Four Signed 8-Bit Values
619 Considered for removal.
621 UP4UB - Unpack Four Unsigned 8-Bit Scalars
625 Considered for removal.
627 X2D - 2D Coordinate Transformation
631 dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
632 dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
633 dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
634 dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
636 Considered for removal.
639 From GL_NV_vertex_program2
640 ^^^^^^^^^^^^^^^^^^^^^^^^^^
643 ARA - Address Register Add
647 Considered for removal.
649 ARR - Address Register Load With Round
666 Considered for removal.
668 CAL - Subroutine Call
674 RET - Subroutine Call Return
678 Potential restrictions:
679 \times Only occurs at end of function.
685 dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
687 dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
689 dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
691 dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
698 dst.x = (src0.x < 0) ? src1.x : src2.x
700 dst.y = (src0.y < 0) ? src1.y : src2.y
702 dst.z = (src0.z < 0) ? src1.z : src2.z
704 dst.w = (src0.w < 0) ? src1.w : src2.w
707 KIL - Conditional Discard
711 if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
729 TXB - Texture Lookup With Bias
734 NRM - 3-component Vector Normalise
738 dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
740 dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
742 dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
751 dst.x = \frac{src0.x}{src1.x}
753 dst.y = \frac{src0.y}{src1.y}
755 dst.z = \frac{src0.z}{src1.z}
757 dst.w = \frac{src0.w}{src1.w}
760 DP2 - 2-component Dot Product
764 dst.x = src0.x \times src1.x + src0.y \times src1.y
766 dst.y = src0.x \times src1.x + src0.y \times src1.y
768 dst.z = src0.x \times src1.x + src0.y \times src1.y
770 dst.w = src0.x \times src1.x + src0.y \times src1.y
773 TXL - Texture Lookup With LOD
788 BGNFOR - Begin a For-Loop
795 pc = [matching ENDFOR] + 1
798 Note: The destination must be a loop register.
799 The source must be a constant register.
801 Considered for cleanup / removal.
819 ENDFOR - End a For-Loop
821 dst.x = dst.x + dst.z
825 pc = [matching BGNFOR instruction] + 1
828 Note: The destination must be a loop register.
830 Considered for cleanup / removal.
837 PUSHA - Push Address Register On Stack
844 Considered for cleanup / removal.
846 POPA - Pop Address Register From Stack
853 Considered for cleanup / removal.
856 From GL_NV_gpu_program4
857 ^^^^^^^^^^^^^^^^^^^^^^^^
859 Support for these opcodes indicated by a special pipe capability bit (TBD).
865 dst.x = \lceil src.x\rceil
867 dst.y = \lceil src.y\rceil
869 dst.z = \lceil src.z\rceil
871 dst.w = \lceil src.w\rceil
874 I2F - Integer To Float
878 dst.x = (float) src.x
880 dst.y = (float) src.y
882 dst.z = (float) src.z
884 dst.w = (float) src.w
902 XXX how is this different from floor?
919 dst.x = src0.x << src1.x
921 dst.y = src0.y << src1.x
923 dst.z = src0.z << src1.x
925 dst.w = src0.w << src1.x
932 dst.x = src0.x >> src1.x
934 dst.y = src0.y >> src1.x
936 dst.z = src0.z >> src1.x
938 dst.w = src0.w >> src1.x
945 dst.x = src0.x & src1.x
947 dst.y = src0.y & src1.y
949 dst.z = src0.z & src1.z
951 dst.w = src0.w & src1.w
958 dst.x = src0.x | src1.x
960 dst.y = src0.y | src1.y
962 dst.z = src0.z | src1.z
964 dst.w = src0.w | src1.w
971 dst.x = src0.x \bmod src1.x
973 dst.y = src0.y \bmod src1.y
975 dst.z = src0.z \bmod src1.z
977 dst.w = src0.w \bmod src1.w
984 dst.x = src0.x ^ src1.x
986 dst.y = src0.y ^ src1.y
988 dst.z = src0.z ^ src1.z
990 dst.w = src0.w ^ src1.w
993 SAD - Sum Of Absolute Differences
997 dst.x = |src0.x - src1.x| + src2.x
999 dst.y = |src0.y - src1.y| + src2.y
1001 dst.z = |src0.z - src1.z| + src2.z
1003 dst.w = |src0.w - src1.w| + src2.w
1011 TXQ - Texture Size Query
1021 From GL_NV_geometry_program4
1022 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1030 ENDPRIM - End Primitive
1039 BGNLOOP - Begin a Loop
1044 BGNSUB - Begin Subroutine
1049 ENDLOOP - End a Loop
1054 ENDSUB - End Subroutine
1064 NRM4 - 4-component Vector Normalise
1068 dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1070 dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1072 dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1074 dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1081 CALLNZ - Subroutine Call If Not Zero
1091 BREAKC - Break Conditional
1096 Explanation of symbols used
1097 ------------------------------
1104 :math:`|x|` Absolute value of `x`.
1106 :math:`\lceil x \rceil` Ceiling of `x`.
1108 clamp(x,y,z) Clamp x between y and z.
1109 (x < y) ? y : (x > z) ? z : x
1111 :math:`\lfloor x\rfloor` Floor of `x`.
1113 :math:`\log_2{x}` Logarithm of `x`, base 2.
1115 max(x,y) Maximum of x and y.
1118 min(x,y) Minimum of x and y.
1121 partialx(x) Derivative of x relative to fragment's X.
1123 partialy(x) Derivative of x relative to fragment's Y.
1125 pop() Pop from stack.
1127 :math:`x^y` `x` to the power `y`.
1129 push(x) Push x on stack.
1133 trunc(x) Truncate x.
1140 discard Discard fragment.
1142 dst First destination register.
1144 dst0 First destination register.
1148 src First source register.
1150 src0 First source register.
1152 src1 Second source register.
1154 src2 Third source register.
1156 target Label of target instruction.
1163 Declaration Semantic
1164 ^^^^^^^^^^^^^^^^^^^^^^^^
1167 Follows Declaration token if Semantic bit is set.
1169 Since its purpose is to link a shader with other stages of the pipeline,
1170 it is valid to follow only those Declaration tokens that declare a register
1171 either in INPUT or OUTPUT file.
1173 SemanticName field contains the semantic name of the register being declared.
1174 There is no default value.
1176 SemanticIndex is an optional subscript that can be used to distinguish
1177 different register declarations with the same semantic name. The default value
1180 The meanings of the individual semantic names are explained in the following
1187 Valid only in a fragment shader INPUT declaration.
1189 FACE.x is negative when the primitive is back facing. FACE.x is positive
1190 when the primitive is front facing.