1 TGSI Instruction Specification
2 ==============================
3 ==============================
6 1 Instruction Set Operations
7 =============================
10 1.1 GL_NV_vertex_program
11 -------------------------
14 1.1.1 ARL - Address Register Load
30 1.1.3 LIT - Light Coefficients
33 dst.y = max(src.x, 0.0)
34 dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
38 1.1.4 RCP - Reciprocal
46 1.1.5 RSQ - Reciprocal Square Root
48 dst.x = 1.0 / sqrt(abs(src.x))
49 dst.y = 1.0 / sqrt(abs(src.x))
50 dst.z = 1.0 / sqrt(abs(src.x))
51 dst.w = 1.0 / sqrt(abs(src.x))
54 1.1.6 EXP - Approximate Exponential Base 2
56 dst.x = pow(2.0, floor(src.x))
57 dst.y = src.x - floor(src.x)
58 dst.z = pow(2.0, src.x)
62 1.1.7 LOG - Approximate Logarithm Base 2
64 dst.x = floor(lg2(abs(src.x)))
65 dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
66 dst.z = lg2(abs(src.x))
72 dst.x = src0.x * src1.x
73 dst.y = src0.y * src1.y
74 dst.z = src0.z * src1.z
75 dst.w = src0.w * src1.w
80 dst.x = src0.x + src1.x
81 dst.y = src0.y + src1.y
82 dst.z = src0.z + src1.z
83 dst.w = src0.w + src1.w
86 1.1.10 DP3 - 3-component Dot Product
88 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
89 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
90 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
91 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
94 1.1.11 DP4 - 4-component Dot Product
96 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
97 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
98 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
99 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
102 1.1.12 DST - Distance Vector
105 dst.y = src0.y * src1.y
112 dst.x = min(src0.x, src1.x)
113 dst.y = min(src0.y, src1.y)
114 dst.z = min(src0.z, src1.z)
115 dst.w = min(src0.w, src1.w)
120 dst.x = max(src0.x, src1.x)
121 dst.y = max(src0.y, src1.y)
122 dst.z = max(src0.z, src1.z)
123 dst.w = max(src0.w, src1.w)
126 1.1.15 SLT - Set On Less Than
128 dst.x = (src0.x < src1.x) ? 1.0 : 0.0
129 dst.y = (src0.y < src1.y) ? 1.0 : 0.0
130 dst.z = (src0.z < src1.z) ? 1.0 : 0.0
131 dst.w = (src0.w < src1.w) ? 1.0 : 0.0
134 1.1.16 SGE - Set On Greater Equal Than
136 dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
137 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
138 dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
139 dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
142 1.1.17 MAD - Multiply And Add
144 dst.x = src0.x * src1.x + src2.x
145 dst.y = src0.y * src1.y + src2.y
146 dst.z = src0.z * src1.z + src2.z
147 dst.w = src0.w * src1.w + src2.w
150 1.2 GL_ATI_fragment_shader
151 ---------------------------
156 dst.x = src0.x - src1.x
157 dst.y = src0.y - src1.y
158 dst.z = src0.z - src1.z
159 dst.w = src0.w - src1.w
162 1.2.2 DOT3 - 3-component Dot Product
167 1.2.3 DOT4 - 4-component Dot Product
172 1.2.4 LERP - Linear Interpolate
174 dst.x = src0.x * (src1.x - src2.x) + src2.x
175 dst.y = src0.y * (src1.y - src2.y) + src2.y
176 dst.z = src0.z * (src1.z - src2.z) + src2.z
177 dst.w = src0.w * (src1.w - src2.w) + src2.w
180 1.2.5 CND - Condition
182 dst.x = (src2.x > 0.5) ? src0.x : src1.x
183 dst.y = (src2.y > 0.5) ? src0.y : src1.y
184 dst.z = (src2.z > 0.5) ? src0.z : src1.z
185 dst.w = (src2.w > 0.5) ? src0.w : src1.w
188 1.2.6 CND0 - Condition Zero
190 dst.x = (src2.x >= 0.0) ? src0.x : src1.x
191 dst.y = (src2.y >= 0.0) ? src0.y : src1.y
192 dst.z = (src2.z >= 0.0) ? src0.z : src1.z
193 dst.w = (src2.w >= 0.0) ? src0.w : src1.w
196 1.2.7 DOT2ADD - 2-component Dot Product And Add
198 dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
199 dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
200 dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
201 dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
204 1.3 GL_EXT_vertex_shader
205 -------------------------
208 1.3.1 INDEX - Array Lookup
210 Considered for removal from language.
213 1.3.2 NEGATE - Negate
215 Considered for removal from language.
218 1.3.3 MADD - Multiply And Add
223 1.3.4 FRAC - Fraction
225 dst.x = src.x - floor(src.x)
226 dst.y = src.y - floor(src.y)
227 dst.z = src.z - floor(src.z)
228 dst.w = src.w - floor(src.w)
231 1.3.5 SETGE - Set On Greater Equal
236 1.3.6 SETLT - Set On Less Than
243 dst.x = clamp(src0.x, src1.x, src2.x)
244 dst.y = clamp(src0.y, src1.y, src2.y)
245 dst.z = clamp(src0.z, src1.z, src2.z)
246 dst.w = clamp(src0.w, src1.w, src2.w)
265 1.3.10 EXPBASE2 - Exponent Base 2
267 dst.x = pow(2.0, src.x)
268 dst.y = pow(2.0, src.x)
269 dst.z = pow(2.0, src.x)
270 dst.w = pow(2.0, src.x)
273 1.3.11 LOGBASE2 - Logarithm Base 2
283 dst.x = pow(src0.x, src1.x)
284 dst.y = pow(src0.x, src1.x)
285 dst.z = pow(src0.x, src1.x)
286 dst.w = pow(src0.x, src1.x)
289 1.3.13 RECIP - Reciprocal
294 1.3.14 RECIPSQRT - Reciprocal Square Root
299 1.3.15 CROSSPRODUCT - Cross Product
301 dst.x = src0.y * src1.z - src1.y * src0.z
302 dst.y = src0.z * src1.x - src1.z * src0.x
303 dst.z = src0.x * src1.y - src1.x * src0.y
307 1.3.16 MULTIPLYMATRIX - Multiply Matrix
309 Considered for removal from language.
312 1.4 GL_NV_vertex_program1_1
313 ----------------------------
324 1.4.2 RCC - Reciprocal Clamped
326 dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
327 dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
328 dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
329 dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
332 1.4.3 DPH - Homogeneous Dot Product
334 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
335 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
336 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
337 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
340 1.5 GL_NV_fragment_program
341 ---------------------------
352 1.5.2 DDX - Derivative Relative To X
354 dst.x = partialx(src.x)
355 dst.y = partialx(src.y)
356 dst.z = partialx(src.z)
357 dst.w = partialx(src.w)
360 1.5.3 DDY - Derivative Relative To Y
362 dst.x = partialy(src.x)
363 dst.y = partialy(src.y)
364 dst.z = partialy(src.z)
365 dst.w = partialy(src.w)
368 1.5.4 EX2 - Exponential Base 2
383 1.5.7 KILP - Predicated Discard
388 1.5.8 LG2 - Logarithm Base 2
393 1.5.9 LRP - Linear Interpolate
398 1.5.10 PK2H - Pack Two 16-bit Floats
403 1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
408 1.5.12 PK4B - Pack Four Signed 8-bit Scalars
413 1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
423 1.5.15 RFL - Reflection Vector
425 dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
426 dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
427 dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
431 1.5.16 SEQ - Set On Equal
433 dst.x = (src0.x == src1.x) ? 1.0 : 0.0
434 dst.y = (src0.y == src1.y) ? 1.0 : 0.0
435 dst.z = (src0.z == src1.z) ? 1.0 : 0.0
436 dst.w = (src0.w == src1.w) ? 1.0 : 0.0
439 1.5.17 SFL - Set On False
447 1.5.18 SGT - Set On Greater Than
449 dst.x = (src0.x > src1.x) ? 1.0 : 0.0
450 dst.y = (src0.y > src1.y) ? 1.0 : 0.0
451 dst.z = (src0.z > src1.z) ? 1.0 : 0.0
452 dst.w = (src0.w > src1.w) ? 1.0 : 0.0
463 1.5.20 SLE - Set On Less Equal Than
465 dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
466 dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
467 dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
468 dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
471 1.5.21 SNE - Set On Not Equal
473 dst.x = (src0.x != src1.x) ? 1.0 : 0.0
474 dst.y = (src0.y != src1.y) ? 1.0 : 0.0
475 dst.z = (src0.z != src1.z) ? 1.0 : 0.0
476 dst.w = (src0.w != src1.w) ? 1.0 : 0.0
479 1.5.22 STR - Set On True
487 1.5.23 TEX - Texture Lookup
492 1.5.24 TXD - Texture Lookup with Derivatives
497 1.5.25 TXP - Projective Texture Lookup
502 1.5.26 UP2H - Unpack Two 16-Bit Floats
507 1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
512 1.5.28 UP4B - Unpack Four Signed 8-Bit Values
517 1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
522 1.5.30 X2D - 2D Coordinate Transformation
524 dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
525 dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
526 dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
527 dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
530 1.6 GL_NV_vertex_program2
531 --------------------------
534 1.6.1 ARA - Address Register Add
539 1.6.2 ARR - Address Register Load With Round
552 1.6.4 CAL - Subroutine Call
558 1.6.5 RET - Subroutine Call Return
565 dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
566 dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
567 dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
568 dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
571 1.7 GL_ARB_vertex_program
572 --------------------------
575 1.7.1 SWZ - Extended Swizzle
583 1.7.2 XPD - Cross Product
585 Alias for CROSSPRODUCT.
588 1.8 GL_ARB_fragment_program
589 ----------------------------
594 dst.x = (src0.x < 0.0) ? src1.x : src2.x
595 dst.y = (src0.y < 0.0) ? src1.y : src2.y
596 dst.z = (src0.z < 0.0) ? src1.z : src2.z
597 dst.w = (src0.w < 0.0) ? src1.w : src2.w
600 1.8.2 KIL - Conditional Discard
605 1.8.3 SCS - Sine Cosine
613 1.8.4 TXB - Texture Lookup With Bias
618 1.9 GL_NV_fragment_program2
619 ----------------------------
622 1.9.1 NRM - 3-component Vector Normalise
624 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
625 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
626 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
632 dst.x = src0.x / src1.x
633 dst.y = src0.y / src1.y
634 dst.z = src0.z / src1.z
635 dst.w = src0.w / src1.w
638 1.9.3 DP2 - 2-component Dot Product
640 dst.x = src0.x * src1.x + src0.y * src1.y
641 dst.y = src0.x * src1.x + src0.y * src1.y
642 dst.z = src0.x * src1.x + src0.y * src1.y
643 dst.w = src0.x * src1.x + src0.y * src1.y
646 1.9.4 DP2A - 2-component Dot Product And Add
651 1.9.5 TXL - Texture Lookup With LOD
681 1.9.11 ENDIF - End If
686 1.9.12 ENDLOOP - End Loop
691 1.9.13 ENDREP - End Repeat
696 1.10 GL_NV_vertex_program3
697 ---------------------------
700 1.10.1 PUSHA - Push Address Register On Stack
708 1.10.2 POPA - Pop Address Register From Stack
716 1.11 GL_NV_gpu_program4
717 ------------------------
720 1.11.1 CEIL - Ceiling
728 1.11.2 I2F - Integer To Float
730 dst.x = (float) src.x
731 dst.y = (float) src.y
732 dst.z = (float) src.z
733 dst.w = (float) src.w
736 1.11.3 NOT - Bitwise Not
744 1.11.4 TRUNC - Truncate
752 1.11.5 SHL - Shift Left
754 dst.x = src0.x << src1.x
755 dst.y = src0.y << src1.x
756 dst.z = src0.z << src1.x
757 dst.w = src0.w << src1.x
760 1.11.6 SHR - Shift Right
762 dst.x = src0.x >> src1.x
763 dst.y = src0.y >> src1.x
764 dst.z = src0.z >> src1.x
765 dst.w = src0.w >> src1.x
768 1.11.7 AND - Bitwise And
770 dst.x = src0.x & src1.x
771 dst.y = src0.y & src1.y
772 dst.z = src0.z & src1.z
773 dst.w = src0.w & src1.w
776 1.11.8 OR - Bitwise Or
778 dst.x = src0.x | src1.x
779 dst.y = src0.y | src1.y
780 dst.z = src0.z | src1.z
781 dst.w = src0.w | src1.w
786 dst.x = src0.x % src1.x
787 dst.y = src0.y % src1.y
788 dst.z = src0.z % src1.z
789 dst.w = src0.w % src1.w
792 1.11.10 XOR - Bitwise Xor
794 dst.x = src0.x ^ src1.x
795 dst.y = src0.y ^ src1.y
796 dst.z = src0.z ^ src1.z
797 dst.w = src0.w ^ src1.w
800 1.11.11 SAD - Sum Of Absolute Differences
802 dst.x = abs(src0.x - src1.x) + src2.x
803 dst.y = abs(src0.y - src1.y) + src2.y
804 dst.z = abs(src0.z - src1.z) + src2.z
805 dst.w = abs(src0.w - src1.w) + src2.w
808 1.11.12 TXF - Texel Fetch
813 1.11.13 TXQ - Texture Size Query
818 1.11.14 CONT - Continue
823 1.12 GL_NV_geometry_program4
824 -----------------------------
832 1.12.2 ENDPRIM - End Primitive
841 1.13.1 BGNLOOP2 - Begin Loop
846 1.13.2 BGNSUB - Begin Subroutine
851 1.13.3 ENDLOOP2 - End Loop
856 1.13.4 ENDSUB - End Subroutine
861 1.13.5 INT - Truncate
866 1.13.6 NOISE1 - 1D Noise
871 1.13.7 NOISE2 - 2D Noise
876 1.13.8 NOISE3 - 3D Noise
881 1.13.9 NOISE4 - 4D Noise
886 1.13.10 NOP - No Operation
895 1.14.1 TEXKILL - Conditional Discard
904 1.15.1 TEXLD - Texture Lookup
913 1.16.1 M4X4 - Multiply Matrix
915 Alias for MULTIPLYMATRIX.
918 1.16.2 M4X3 - Multiply Matrix
920 Considered for removal from language.
923 1.16.3 M3X4 - Multiply Matrix
925 Considered for removal from language.
928 1.16.4 M3X3 - Multiply Matrix
930 Considered for removal from language.
933 1.16.5 M3X2 - Multiply Matrix
935 Considered for removal from language.
938 1.16.6 CRS - Cross Product
943 1.16.7 NRM4 - 4-component Vector Normalise
945 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
946 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
947 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
948 dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
951 1.16.8 SINCOS - Sine Cosine
956 1.16.9 TEXLDB - Texture Lookup With Bias
961 1.16.10 DP2ADD - 2-component Dot Product And Add
970 1.17.1 CALL - Subroutine Call
975 1.17.2 CALLNZ - Subroutine Call If Not Zero
990 1.17.5 BREAKC - Break Conditional
995 1.17.6 DSX - Derivative Relative To X
1000 1.17.7 DSY - Derivative Relative To Y
1005 1.17.8 TEXLDD - Texture Lookup with Derivatives
1014 1.18.1 EXPP - Approximate Exponential Base 2
1019 1.18.2 LOGP - Logarithm Base 2