1 TGSI Instruction Specification
2 ==============================
3 ==============================
6 1 Instruction Set Operations
7 =============================
10 1.1 GL_NV_vertex_program
11 -------------------------
14 1.1.1 ARL - Address Register Load
30 1.1.3 LIT - Light Coefficients
33 dst.y = max(src.x, 0.0)
34 dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
38 1.1.4 RCP - Reciprocal
46 1.1.5 RSQ - Reciprocal Square Root
48 dst.x = 1.0 / sqrt(abs(src.x))
49 dst.y = 1.0 / sqrt(abs(src.x))
50 dst.z = 1.0 / sqrt(abs(src.x))
51 dst.w = 1.0 / sqrt(abs(src.x))
54 1.1.6 EXP - Approximate Exponential Base 2
56 dst.x = pow(2.0, floor(src.x))
57 dst.y = src.x - floor(src.x)
58 dst.z = pow(2.0, src.x)
62 1.1.7 LOG - Approximate Logarithm Base 2
64 dst.x = floor(lg2(abs(src.x)))
65 dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
66 dst.z = lg2(abs(src.x))
72 dst.x = src0.x * src1.x
73 dst.y = src0.y * src1.y
74 dst.z = src0.z * src1.z
75 dst.w = src0.w * src1.w
80 dst.x = src0.x + src1.x
81 dst.y = src0.y + src1.y
82 dst.z = src0.z + src1.z
83 dst.w = src0.w + src1.w
86 1.1.10 DP3 - 3-component Dot Product
88 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
89 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
90 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
91 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
94 1.1.11 DP4 - 4-component Dot Product
96 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
97 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
98 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
99 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
102 1.1.12 DST - Distance Vector
105 dst.y = src0.y * src1.y
112 dst.x = min(src0.x, src1.x)
113 dst.y = min(src0.y, src1.y)
114 dst.z = min(src0.z, src1.z)
115 dst.w = min(src0.w, src1.w)
120 dst.x = max(src0.x, src1.x)
121 dst.y = max(src0.y, src1.y)
122 dst.z = max(src0.z, src1.z)
123 dst.w = max(src0.w, src1.w)
126 1.1.15 SLT - Set On Less Than
128 dst.x = (src0.x < src1.x) ? 1.0 : 0.0
129 dst.y = (src0.y < src1.y) ? 1.0 : 0.0
130 dst.z = (src0.z < src1.z) ? 1.0 : 0.0
131 dst.w = (src0.w < src1.w) ? 1.0 : 0.0
134 1.1.16 SGE - Set On Greater Equal Than
136 dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
137 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
138 dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
139 dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
142 1.1.17 MAD - Multiply And Add
144 dst.x = src0.x * src1.x + src2.x
145 dst.y = src0.y * src1.y + src2.y
146 dst.z = src0.z * src1.z + src2.z
147 dst.w = src0.w * src1.w + src2.w
150 1.2 GL_ATI_fragment_shader
151 ---------------------------
156 dst.x = src0.x - src1.x
157 dst.y = src0.y - src1.y
158 dst.z = src0.z - src1.z
159 dst.w = src0.w - src1.w
162 1.2.2 DOT3 - 3-component Dot Product
167 1.2.3 DOT4 - 4-component Dot Product
172 1.2.4 LERP - Linear Interpolate
174 dst.x = src0.x * (src1.x - src2.x) + src2.x
175 dst.y = src0.y * (src1.y - src2.y) + src2.y
176 dst.z = src0.z * (src1.z - src2.z) + src2.z
177 dst.w = src0.w * (src1.w - src2.w) + src2.w
180 1.2.5 CND - Condition
182 dst.x = (src2.x > 0.5) ? src0.x : src1.x
183 dst.y = (src2.y > 0.5) ? src0.y : src1.y
184 dst.z = (src2.z > 0.5) ? src0.z : src1.z
185 dst.w = (src2.w > 0.5) ? src0.w : src1.w
188 1.2.6 CND0 - Condition Zero
190 Removed. Use (CMP src2, src1, src0) instead.
192 1.2.7 DOT2ADD - 2-component Dot Product And Add
194 dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
195 dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
196 dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
197 dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
200 1.3 GL_EXT_vertex_shader
201 -------------------------
204 1.3.1 INDEX - Array Lookup
206 Considered for removal from language.
209 1.3.2 NEGATE - Negate
211 Considered for removal from language.
214 1.3.3 MADD - Multiply And Add
219 1.3.4 FRAC - Fraction
221 dst.x = src.x - floor(src.x)
222 dst.y = src.y - floor(src.y)
223 dst.z = src.z - floor(src.z)
224 dst.w = src.w - floor(src.w)
227 1.3.5 SETGE - Set On Greater Equal
232 1.3.6 SETLT - Set On Less Than
239 dst.x = clamp(src0.x, src1.x, src2.x)
240 dst.y = clamp(src0.y, src1.y, src2.y)
241 dst.z = clamp(src0.z, src1.z, src2.z)
242 dst.w = clamp(src0.w, src1.w, src2.w)
261 1.3.10 EXPBASE2 - Exponential Base 2
263 dst.x = pow(2.0, src.x)
264 dst.y = pow(2.0, src.x)
265 dst.z = pow(2.0, src.x)
266 dst.w = pow(2.0, src.x)
269 1.3.11 LOGBASE2 - Logarithm Base 2
279 dst.x = pow(src0.x, src1.x)
280 dst.y = pow(src0.x, src1.x)
281 dst.z = pow(src0.x, src1.x)
282 dst.w = pow(src0.x, src1.x)
285 1.3.13 RECIP - Reciprocal
290 1.3.14 RECIPSQRT - Reciprocal Square Root
295 1.3.15 CROSSPRODUCT - Cross Product
297 dst.x = src0.y * src1.z - src1.y * src0.z
298 dst.y = src0.z * src1.x - src1.z * src0.x
299 dst.z = src0.x * src1.y - src1.x * src0.y
303 1.3.16 MULTIPLYMATRIX - Multiply Matrix
305 Considered for removal from language.
308 1.4 GL_NV_vertex_program1_1
309 ----------------------------
320 1.4.2 RCC - Reciprocal Clamped
322 dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
323 dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
324 dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
325 dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
328 1.4.3 DPH - Homogeneous Dot Product
330 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
331 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
332 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
333 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
336 1.5 GL_NV_fragment_program
337 ---------------------------
348 1.5.2 DDX - Derivative Relative To X
350 dst.x = partialx(src.x)
351 dst.y = partialx(src.y)
352 dst.z = partialx(src.z)
353 dst.w = partialx(src.w)
356 1.5.3 DDY - Derivative Relative To Y
358 dst.x = partialy(src.x)
359 dst.y = partialy(src.y)
360 dst.z = partialy(src.z)
361 dst.w = partialy(src.w)
364 1.5.4 EX2 - Exponential Base 2
379 1.5.7 KILP - Predicated Discard
384 1.5.8 LG2 - Logarithm Base 2
389 1.5.9 LRP - Linear Interpolate
394 1.5.10 PK2H - Pack Two 16-bit Floats
399 1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
404 1.5.12 PK4B - Pack Four Signed 8-bit Scalars
409 1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
419 1.5.15 RFL - Reflection Vector
421 dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
422 dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
423 dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
427 1.5.16 SEQ - Set On Equal
429 dst.x = (src0.x == src1.x) ? 1.0 : 0.0
430 dst.y = (src0.y == src1.y) ? 1.0 : 0.0
431 dst.z = (src0.z == src1.z) ? 1.0 : 0.0
432 dst.w = (src0.w == src1.w) ? 1.0 : 0.0
435 1.5.17 SFL - Set On False
443 1.5.18 SGT - Set On Greater Than
445 dst.x = (src0.x > src1.x) ? 1.0 : 0.0
446 dst.y = (src0.y > src1.y) ? 1.0 : 0.0
447 dst.z = (src0.z > src1.z) ? 1.0 : 0.0
448 dst.w = (src0.w > src1.w) ? 1.0 : 0.0
459 1.5.20 SLE - Set On Less Equal Than
461 dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
462 dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
463 dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
464 dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
467 1.5.21 SNE - Set On Not Equal
469 dst.x = (src0.x != src1.x) ? 1.0 : 0.0
470 dst.y = (src0.y != src1.y) ? 1.0 : 0.0
471 dst.z = (src0.z != src1.z) ? 1.0 : 0.0
472 dst.w = (src0.w != src1.w) ? 1.0 : 0.0
475 1.5.22 STR - Set On True
483 1.5.23 TEX - Texture Lookup
488 1.5.24 TXD - Texture Lookup with Derivatives
493 1.5.25 TXP - Projective Texture Lookup
498 1.5.26 UP2H - Unpack Two 16-Bit Floats
503 1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
508 1.5.28 UP4B - Unpack Four Signed 8-Bit Values
513 1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
518 1.5.30 X2D - 2D Coordinate Transformation
520 dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
521 dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
522 dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
523 dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
526 1.6 GL_NV_vertex_program2
527 --------------------------
530 1.6.1 ARA - Address Register Add
535 1.6.2 ARR - Address Register Load With Round
548 1.6.4 CAL - Subroutine Call
554 1.6.5 RET - Subroutine Call Return
561 dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
562 dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
563 dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
564 dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
567 1.7 GL_ARB_vertex_program
568 --------------------------
571 1.7.1 SWZ - Extended Swizzle
579 1.7.2 XPD - Cross Product
581 Alias for CROSSPRODUCT.
584 1.8 GL_ARB_fragment_program
585 ----------------------------
590 dst.x = (src0.x < 0.0) ? src1.x : src2.x
591 dst.y = (src0.y < 0.0) ? src1.y : src2.y
592 dst.z = (src0.z < 0.0) ? src1.z : src2.z
593 dst.w = (src0.w < 0.0) ? src1.w : src2.w
596 1.8.2 KIL - Conditional Discard
598 if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
603 1.8.3 SCS - Sine Cosine
611 1.8.4 TXB - Texture Lookup With Bias
616 1.9 GL_NV_fragment_program2
617 ----------------------------
620 1.9.1 NRM - 3-component Vector Normalise
622 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
623 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
624 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
630 dst.x = src0.x / src1.x
631 dst.y = src0.y / src1.y
632 dst.z = src0.z / src1.z
633 dst.w = src0.w / src1.w
636 1.9.3 DP2 - 2-component Dot Product
638 dst.x = src0.x * src1.x + src0.y * src1.y
639 dst.y = src0.x * src1.x + src0.y * src1.y
640 dst.z = src0.x * src1.x + src0.y * src1.y
641 dst.w = src0.x * src1.x + src0.y * src1.y
644 1.9.4 DP2A - 2-component Dot Product And Add
649 1.9.5 TXL - Texture Lookup With LOD
664 1.9.8 BGNFOR - Begin a For-Loop
671 pc = [matching ENDFOR] + 1
674 Note: The destination must be a loop register.
675 The source must be a constant register.
688 1.9.11 ENDIF - End If
693 1.9.12 ENDFOR - End a For-Loop
695 dst.x = dst.x + dst.z
699 pc = [matching BGNFOR instruction] + 1
702 Note: The destination must be a loop register.
705 1.9.13 ENDREP - End Repeat
710 1.10 GL_NV_vertex_program3
711 ---------------------------
714 1.10.1 PUSHA - Push Address Register On Stack
722 1.10.2 POPA - Pop Address Register From Stack
730 1.11 GL_NV_gpu_program4
731 ------------------------
734 1.11.1 CEIL - Ceiling
742 1.11.2 I2F - Integer To Float
744 dst.x = (float) src.x
745 dst.y = (float) src.y
746 dst.z = (float) src.z
747 dst.w = (float) src.w
750 1.11.3 NOT - Bitwise Not
758 1.11.4 TRUNC - Truncate
766 1.11.5 SHL - Shift Left
768 dst.x = src0.x << src1.x
769 dst.y = src0.y << src1.x
770 dst.z = src0.z << src1.x
771 dst.w = src0.w << src1.x
774 1.11.6 SHR - Shift Right
776 dst.x = src0.x >> src1.x
777 dst.y = src0.y >> src1.x
778 dst.z = src0.z >> src1.x
779 dst.w = src0.w >> src1.x
782 1.11.7 AND - Bitwise And
784 dst.x = src0.x & src1.x
785 dst.y = src0.y & src1.y
786 dst.z = src0.z & src1.z
787 dst.w = src0.w & src1.w
790 1.11.8 OR - Bitwise Or
792 dst.x = src0.x | src1.x
793 dst.y = src0.y | src1.y
794 dst.z = src0.z | src1.z
795 dst.w = src0.w | src1.w
800 dst.x = src0.x % src1.x
801 dst.y = src0.y % src1.y
802 dst.z = src0.z % src1.z
803 dst.w = src0.w % src1.w
806 1.11.10 XOR - Bitwise Xor
808 dst.x = src0.x ^ src1.x
809 dst.y = src0.y ^ src1.y
810 dst.z = src0.z ^ src1.z
811 dst.w = src0.w ^ src1.w
814 1.11.11 SAD - Sum Of Absolute Differences
816 dst.x = abs(src0.x - src1.x) + src2.x
817 dst.y = abs(src0.y - src1.y) + src2.y
818 dst.z = abs(src0.z - src1.z) + src2.z
819 dst.w = abs(src0.w - src1.w) + src2.w
822 1.11.12 TXF - Texel Fetch
827 1.11.13 TXQ - Texture Size Query
832 1.11.14 CONT - Continue
837 1.12 GL_NV_geometry_program4
838 -----------------------------
846 1.12.2 ENDPRIM - End Primitive
855 1.13.1 BGNLOOP - Begin a Loop
860 1.13.2 BGNSUB - Begin Subroutine
865 1.13.3 ENDLOOP - End a Loop
870 1.13.4 ENDSUB - End Subroutine
875 1.13.5 INT - Truncate
880 1.13.6 NOISE1 - 1D Noise
885 1.13.7 NOISE2 - 2D Noise
890 1.13.8 NOISE3 - 3D Noise
895 1.13.9 NOISE4 - 4D Noise
900 1.13.10 NOP - No Operation
909 1.14.1 TEXKILL - Conditional Discard
918 1.15.1 TEXLD - Texture Lookup
927 1.16.1 M4X4 - Multiply Matrix
929 Alias for MULTIPLYMATRIX.
932 1.16.2 M4X3 - Multiply Matrix
934 Considered for removal from language.
937 1.16.3 M3X4 - Multiply Matrix
939 Considered for removal from language.
942 1.16.4 M3X3 - Multiply Matrix
944 Considered for removal from language.
947 1.16.5 M3X2 - Multiply Matrix
949 Considered for removal from language.
952 1.16.6 CRS - Cross Product
957 1.16.7 NRM4 - 4-component Vector Normalise
959 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
960 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
961 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
962 dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
965 1.16.8 SINCOS - Sine Cosine
970 1.16.9 TEXLDB - Texture Lookup With Bias
975 1.16.10 DP2ADD - 2-component Dot Product And Add
984 1.17.1 CALL - Subroutine Call
989 1.17.2 CALLNZ - Subroutine Call If Not Zero
1004 1.17.5 BREAKC - Break Conditional
1009 1.17.6 DSX - Derivative Relative To X
1014 1.17.7 DSY - Derivative Relative To Y
1019 1.17.8 TEXLDD - Texture Lookup with Derivatives
1028 1.18.1 EXPP - Approximate Exponential Base 2
1030 Use EXP. See also 1.19.3.
1033 1.18.2 LOGP - Logarithm Base 2
1035 Use LOG. See also 1.19.4.
1042 1.19.1 SGN - Set Sign
1047 1.19.2 MOVA - Move Address Register
1052 1.19.3 EXPP - Approximate Exponential Base 2
1057 1.19.4 LOGP - Logarithm Base 2
1062 2 Explanation of symbols used
1063 ==============================
1070 abs(x) Absolute value of x.
1074 ceil(x) Ceiling of x.
1076 clamp(x,y,z) Clamp x between y and z.
1077 (x < y) ? y : (x > z) ? z : x
1081 floor(x) Floor of x.
1083 lg2(x) Logarithm base 2 of x.
1085 max(x,y) Maximum of x and y.
1088 min(x,y) Minimum of x and y.
1091 partialx(x) Derivative of x relative to fragment's X.
1093 partialy(x) Derivative of x relative to fragment's Y.
1095 pop() Pop from stack.
1097 pow(x,y) Raise x to power of y.
1099 push(x) Push x on stack.
1105 sqrt(x) Square root of x.
1107 trunc(x) Truncate x.
1114 discard Discard fragment.
1116 dst First destination register.
1118 dst0 First destination register.
1122 src First source register.
1124 src0 First source register.
1126 src1 Second source register.
1128 src2 Third source register.
1130 target Label of target instruction.