1 TGSI Instruction Specification
2 ==============================
3 ==============================
6 1 Instruction Set Operations
7 =============================
10 1.1 GL_NV_vertex_program
11 -------------------------
14 1.1.1 ARL - Address Register Load
30 1.1.3 LIT - Light Coefficients
33 dst.y = max(src.x, 0.0)
34 dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
38 1.1.4 RCP - Reciprocal
46 1.1.5 RSQ - Reciprocal Square Root
48 dst.x = 1.0 / sqrt(abs(src.x))
49 dst.y = 1.0 / sqrt(abs(src.x))
50 dst.z = 1.0 / sqrt(abs(src.x))
51 dst.w = 1.0 / sqrt(abs(src.x))
54 1.1.6 EXP - Approximate Exponential Base 2
56 dst.x = pow(2.0, floor(src.x))
57 dst.y = src.x - floor(src.x)
58 dst.z = pow(2.0, src.x)
62 1.1.7 LOG - Approximate Logarithm Base 2
64 dst.x = floor(lg2(abs(src.x)))
65 dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
66 dst.z = lg2(abs(src.x))
72 dst.x = src0.x * src1.x
73 dst.y = src0.y * src1.y
74 dst.z = src0.z * src1.z
75 dst.w = src0.w * src1.w
80 dst.x = src0.x + src1.x
81 dst.y = src0.y + src1.y
82 dst.z = src0.z + src1.z
83 dst.w = src0.w + src1.w
86 1.1.10 DP3 - 3-component Dot Product
88 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
89 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
90 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
91 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
94 1.1.11 DP4 - 4-component Dot Product
96 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
97 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
98 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
99 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
102 1.1.12 DST - Distance Vector
105 dst.y = src0.y * src1.y
112 dst.x = min(src0.x, src1.x)
113 dst.y = min(src0.y, src1.y)
114 dst.z = min(src0.z, src1.z)
115 dst.w = min(src0.w, src1.w)
120 dst.x = max(src0.x, src1.x)
121 dst.y = max(src0.y, src1.y)
122 dst.z = max(src0.z, src1.z)
123 dst.w = max(src0.w, src1.w)
126 1.1.15 SLT - Set On Less Than
128 dst.x = (src0.x < src1.x) ? 1.0 : 0.0
129 dst.y = (src0.y < src1.y) ? 1.0 : 0.0
130 dst.z = (src0.z < src1.z) ? 1.0 : 0.0
131 dst.w = (src0.w < src1.w) ? 1.0 : 0.0
134 1.1.16 SGE - Set On Greater Equal Than
136 dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
137 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
138 dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
139 dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
142 1.1.17 MAD - Multiply And Add
144 dst.x = src0.x * src1.x + src2.x
145 dst.y = src0.y * src1.y + src2.y
146 dst.z = src0.z * src1.z + src2.z
147 dst.w = src0.w * src1.w + src2.w
150 1.2 GL_ATI_fragment_shader
151 ---------------------------
156 dst.x = src0.x - src1.x
157 dst.y = src0.y - src1.y
158 dst.z = src0.z - src1.z
159 dst.w = src0.w - src1.w
162 1.2.2 DOT3 - 3-component Dot Product
167 1.2.3 DOT4 - 4-component Dot Product
172 1.2.4 LERP - Linear Interpolate
174 dst.x = src0.x * (src1.x - src2.x) + src2.x
175 dst.y = src0.y * (src1.y - src2.y) + src2.y
176 dst.z = src0.z * (src1.z - src2.z) + src2.z
177 dst.w = src0.w * (src1.w - src2.w) + src2.w
180 1.2.5 CND - Condition
182 dst.x = (src2.x > 0.5) ? src0.x : src1.x
183 dst.y = (src2.y > 0.5) ? src0.y : src1.y
184 dst.z = (src2.z > 0.5) ? src0.z : src1.z
185 dst.w = (src2.w > 0.5) ? src0.w : src1.w
188 1.2.6 CND0 - Condition Zero
190 dst.x = (src2.x >= 0.0) ? src0.x : src1.x
191 dst.y = (src2.y >= 0.0) ? src0.y : src1.y
192 dst.z = (src2.z >= 0.0) ? src0.z : src1.z
193 dst.w = (src2.w >= 0.0) ? src0.w : src1.w
196 1.2.7 DOT2ADD - 2-component Dot Product And Add
198 dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
199 dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
200 dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
201 dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
204 1.3 GL_EXT_vertex_shader
205 -------------------------
208 1.3.1 INDEX - Array Lookup
210 Considered for removal from language.
213 1.3.2 NEGATE - Negate
215 Considered for removal from language.
218 1.3.3 MADD - Multiply And Add
223 1.3.4 FRAC - Fraction
225 dst.x = src.x - floor(src.x)
226 dst.y = src.y - floor(src.y)
227 dst.z = src.z - floor(src.z)
228 dst.w = src.w - floor(src.w)
231 1.3.5 SETGE - Set On Greater Equal
236 1.3.6 SETLT - Set On Less Than
243 dst.x = clamp(src0.x, src1.x, src2.x)
244 dst.y = clamp(src0.y, src1.y, src2.y)
245 dst.z = clamp(src0.z, src1.z, src2.z)
246 dst.w = clamp(src0.w, src1.w, src2.w)
265 1.3.10 EXPBASE2 - Exponential Base 2
267 dst.x = pow(2.0, src.x)
268 dst.y = pow(2.0, src.x)
269 dst.z = pow(2.0, src.x)
270 dst.w = pow(2.0, src.x)
273 1.3.11 LOGBASE2 - Logarithm Base 2
283 dst.x = pow(src0.x, src1.x)
284 dst.y = pow(src0.x, src1.x)
285 dst.z = pow(src0.x, src1.x)
286 dst.w = pow(src0.x, src1.x)
289 1.3.13 RECIP - Reciprocal
294 1.3.14 RECIPSQRT - Reciprocal Square Root
299 1.3.15 CROSSPRODUCT - Cross Product
301 dst.x = src0.y * src1.z - src1.y * src0.z
302 dst.y = src0.z * src1.x - src1.z * src0.x
303 dst.z = src0.x * src1.y - src1.x * src0.y
307 1.3.16 MULTIPLYMATRIX - Multiply Matrix
309 Considered for removal from language.
312 1.4 GL_NV_vertex_program1_1
313 ----------------------------
324 1.4.2 RCC - Reciprocal Clamped
326 dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
327 dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
328 dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
329 dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
332 1.4.3 DPH - Homogeneous Dot Product
334 dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
335 dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
336 dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
337 dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
340 1.5 GL_NV_fragment_program
341 ---------------------------
352 1.5.2 DDX - Derivative Relative To X
354 dst.x = partialx(src.x)
355 dst.y = partialx(src.y)
356 dst.z = partialx(src.z)
357 dst.w = partialx(src.w)
360 1.5.3 DDY - Derivative Relative To Y
362 dst.x = partialy(src.x)
363 dst.y = partialy(src.y)
364 dst.z = partialy(src.z)
365 dst.w = partialy(src.w)
368 1.5.4 EX2 - Exponential Base 2
383 1.5.7 KILP - Predicated Discard
388 1.5.8 LG2 - Logarithm Base 2
393 1.5.9 LRP - Linear Interpolate
398 1.5.10 PK2H - Pack Two 16-bit Floats
403 1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
408 1.5.12 PK4B - Pack Four Signed 8-bit Scalars
413 1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
423 1.5.15 RFL - Reflection Vector
425 dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
426 dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
427 dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
431 1.5.16 SEQ - Set On Equal
433 dst.x = (src0.x == src1.x) ? 1.0 : 0.0
434 dst.y = (src0.y == src1.y) ? 1.0 : 0.0
435 dst.z = (src0.z == src1.z) ? 1.0 : 0.0
436 dst.w = (src0.w == src1.w) ? 1.0 : 0.0
439 1.5.17 SFL - Set On False
447 1.5.18 SGT - Set On Greater Than
449 dst.x = (src0.x > src1.x) ? 1.0 : 0.0
450 dst.y = (src0.y > src1.y) ? 1.0 : 0.0
451 dst.z = (src0.z > src1.z) ? 1.0 : 0.0
452 dst.w = (src0.w > src1.w) ? 1.0 : 0.0
463 1.5.20 SLE - Set On Less Equal Than
465 dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
466 dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
467 dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
468 dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
471 1.5.21 SNE - Set On Not Equal
473 dst.x = (src0.x != src1.x) ? 1.0 : 0.0
474 dst.y = (src0.y != src1.y) ? 1.0 : 0.0
475 dst.z = (src0.z != src1.z) ? 1.0 : 0.0
476 dst.w = (src0.w != src1.w) ? 1.0 : 0.0
479 1.5.22 STR - Set On True
487 1.5.23 TEX - Texture Lookup
492 1.5.24 TXD - Texture Lookup with Derivatives
497 1.5.25 TXP - Projective Texture Lookup
502 1.5.26 UP2H - Unpack Two 16-Bit Floats
507 1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
512 1.5.28 UP4B - Unpack Four Signed 8-Bit Values
517 1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
522 1.5.30 X2D - 2D Coordinate Transformation
524 dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
525 dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
526 dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
527 dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
530 1.6 GL_NV_vertex_program2
531 --------------------------
534 1.6.1 ARA - Address Register Add
539 1.6.2 ARR - Address Register Load With Round
552 1.6.4 CAL - Subroutine Call
558 1.6.5 RET - Subroutine Call Return
565 dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
566 dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
567 dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
568 dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
571 1.7 GL_ARB_vertex_program
572 --------------------------
575 1.7.1 SWZ - Extended Swizzle
583 1.7.2 XPD - Cross Product
585 Alias for CROSSPRODUCT.
588 1.8 GL_ARB_fragment_program
589 ----------------------------
594 dst.x = (src0.x < 0.0) ? src1.x : src2.x
595 dst.y = (src0.y < 0.0) ? src1.y : src2.y
596 dst.z = (src0.z < 0.0) ? src1.z : src2.z
597 dst.w = (src0.w < 0.0) ? src1.w : src2.w
600 1.8.2 KIL - Conditional Discard
602 if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
607 1.8.3 SCS - Sine Cosine
615 1.8.4 TXB - Texture Lookup With Bias
620 1.9 GL_NV_fragment_program2
621 ----------------------------
624 1.9.1 NRM - 3-component Vector Normalise
626 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
627 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
628 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
634 dst.x = src0.x / src1.x
635 dst.y = src0.y / src1.y
636 dst.z = src0.z / src1.z
637 dst.w = src0.w / src1.w
640 1.9.3 DP2 - 2-component Dot Product
642 dst.x = src0.x * src1.x + src0.y * src1.y
643 dst.y = src0.x * src1.x + src0.y * src1.y
644 dst.z = src0.x * src1.x + src0.y * src1.y
645 dst.w = src0.x * src1.x + src0.y * src1.y
648 1.9.4 DP2A - 2-component Dot Product And Add
653 1.9.5 TXL - Texture Lookup With LOD
683 1.9.11 ENDIF - End If
688 1.9.12 ENDLOOP - End Loop
693 1.9.13 ENDREP - End Repeat
698 1.10 GL_NV_vertex_program3
699 ---------------------------
702 1.10.1 PUSHA - Push Address Register On Stack
710 1.10.2 POPA - Pop Address Register From Stack
718 1.11 GL_NV_gpu_program4
719 ------------------------
722 1.11.1 CEIL - Ceiling
730 1.11.2 I2F - Integer To Float
732 dst.x = (float) src.x
733 dst.y = (float) src.y
734 dst.z = (float) src.z
735 dst.w = (float) src.w
738 1.11.3 NOT - Bitwise Not
746 1.11.4 TRUNC - Truncate
754 1.11.5 SHL - Shift Left
756 dst.x = src0.x << src1.x
757 dst.y = src0.y << src1.x
758 dst.z = src0.z << src1.x
759 dst.w = src0.w << src1.x
762 1.11.6 SHR - Shift Right
764 dst.x = src0.x >> src1.x
765 dst.y = src0.y >> src1.x
766 dst.z = src0.z >> src1.x
767 dst.w = src0.w >> src1.x
770 1.11.7 AND - Bitwise And
772 dst.x = src0.x & src1.x
773 dst.y = src0.y & src1.y
774 dst.z = src0.z & src1.z
775 dst.w = src0.w & src1.w
778 1.11.8 OR - Bitwise Or
780 dst.x = src0.x | src1.x
781 dst.y = src0.y | src1.y
782 dst.z = src0.z | src1.z
783 dst.w = src0.w | src1.w
788 dst.x = src0.x % src1.x
789 dst.y = src0.y % src1.y
790 dst.z = src0.z % src1.z
791 dst.w = src0.w % src1.w
794 1.11.10 XOR - Bitwise Xor
796 dst.x = src0.x ^ src1.x
797 dst.y = src0.y ^ src1.y
798 dst.z = src0.z ^ src1.z
799 dst.w = src0.w ^ src1.w
802 1.11.11 SAD - Sum Of Absolute Differences
804 dst.x = abs(src0.x - src1.x) + src2.x
805 dst.y = abs(src0.y - src1.y) + src2.y
806 dst.z = abs(src0.z - src1.z) + src2.z
807 dst.w = abs(src0.w - src1.w) + src2.w
810 1.11.12 TXF - Texel Fetch
815 1.11.13 TXQ - Texture Size Query
820 1.11.14 CONT - Continue
825 1.12 GL_NV_geometry_program4
826 -----------------------------
834 1.12.2 ENDPRIM - End Primitive
843 1.13.1 BGNLOOP2 - Begin Loop
848 1.13.2 BGNSUB - Begin Subroutine
853 1.13.3 ENDLOOP2 - End Loop
858 1.13.4 ENDSUB - End Subroutine
863 1.13.5 INT - Truncate
868 1.13.6 NOISE1 - 1D Noise
873 1.13.7 NOISE2 - 2D Noise
878 1.13.8 NOISE3 - 3D Noise
883 1.13.9 NOISE4 - 4D Noise
888 1.13.10 NOP - No Operation
897 1.14.1 TEXKILL - Conditional Discard
906 1.15.1 TEXLD - Texture Lookup
915 1.16.1 M4X4 - Multiply Matrix
917 Alias for MULTIPLYMATRIX.
920 1.16.2 M4X3 - Multiply Matrix
922 Considered for removal from language.
925 1.16.3 M3X4 - Multiply Matrix
927 Considered for removal from language.
930 1.16.4 M3X3 - Multiply Matrix
932 Considered for removal from language.
935 1.16.5 M3X2 - Multiply Matrix
937 Considered for removal from language.
940 1.16.6 CRS - Cross Product
945 1.16.7 NRM4 - 4-component Vector Normalise
947 dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
948 dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
949 dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
950 dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
953 1.16.8 SINCOS - Sine Cosine
958 1.16.9 TEXLDB - Texture Lookup With Bias
963 1.16.10 DP2ADD - 2-component Dot Product And Add
972 1.17.1 CALL - Subroutine Call
977 1.17.2 CALLNZ - Subroutine Call If Not Zero
992 1.17.5 BREAKC - Break Conditional
997 1.17.6 DSX - Derivative Relative To X
1002 1.17.7 DSY - Derivative Relative To Y
1007 1.17.8 TEXLDD - Texture Lookup with Derivatives
1016 1.18.1 EXPP - Approximate Exponential Base 2
1021 1.18.2 LOGP - Logarithm Base 2
1030 1.19.1 SGN - Set Sign
1035 1.19.2 MOVA - Move Address Register
1040 2 Explanation of symbols used
1041 ==============================
1048 abs(x) Absolute value of x.
1052 ceil(x) Ceiling of x.
1054 clamp(x,y,z) Clamp x between y and z.
1055 (x < y) ? y : (x > z) ? z : x
1059 floor(x) Floor of x.
1061 lg2(x) Logarithm base 2 of x.
1063 max(x,y) Maximum of x and y.
1066 min(x,y) Minimum of x and y.
1069 partialx(x) Derivative of x relative to fragment's X.
1071 partialy(x) Derivative of x relative to fragment's Y.
1073 pop() Pop from stack.
1075 pow(x,y) Raise x to power of y.
1077 push(x) Push x on stack.
1083 sqrt(x) Square root of x.
1085 trunc(x) Truncate x.
1092 discard Discard fragment.
1094 dst First destination register.
1096 dst0 First destination register.
1100 src First source register.
1102 src0 First source register.
1104 src1 Second source register.
1106 src2 Third source register.
1108 target Label of target instruction.