for describing shaders. Since Gallium is inherently shaderful, shaders are
an important part of the API. TGSI is the only intermediate representation
used by all drivers.
+
+
+TGSI Instruction Specification
+==============================
+
+
+1 Instruction Set Operations
+=============================
+
+
+1.1 GL_NV_vertex_program
+-------------------------
+
+
+1.1.1 ARL - Address Register Load
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.1.2 MOV - Move
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.1.3 LIT - Light Coefficients
+
+ dst.x = 1.0
+ dst.y = max(src.x, 0.0)
+ dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
+ dst.w = 1.0
+
+
+1.1.4 RCP - Reciprocal
+
+ dst.x = 1.0 / src.x
+ dst.y = 1.0 / src.x
+ dst.z = 1.0 / src.x
+ dst.w = 1.0 / src.x
+
+
+1.1.5 RSQ - Reciprocal Square Root
+
+ dst.x = 1.0 / sqrt(abs(src.x))
+ dst.y = 1.0 / sqrt(abs(src.x))
+ dst.z = 1.0 / sqrt(abs(src.x))
+ dst.w = 1.0 / sqrt(abs(src.x))
+
+
+1.1.6 EXP - Approximate Exponential Base 2
+
+ dst.x = pow(2.0, floor(src.x))
+ dst.y = src.x - floor(src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = 1.0
+
+
+1.1.7 LOG - Approximate Logarithm Base 2
+
+ dst.x = floor(lg2(abs(src.x)))
+ dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
+ dst.z = lg2(abs(src.x))
+ dst.w = 1.0
+
+
+1.1.8 MUL - Multiply
+
+ dst.x = src0.x * src1.x
+ dst.y = src0.y * src1.y
+ dst.z = src0.z * src1.z
+ dst.w = src0.w * src1.w
+
+
+1.1.9 ADD - Add
+
+ dst.x = src0.x + src1.x
+ dst.y = src0.y + src1.y
+ dst.z = src0.z + src1.z
+ dst.w = src0.w + src1.w
+
+
+1.1.10 DP3 - 3-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+
+
+1.1.11 DP4 - 4-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+
+
+1.1.12 DST - Distance Vector
+
+ dst.x = 1.0
+ dst.y = src0.y * src1.y
+ dst.z = src0.z
+ dst.w = src1.w
+
+
+1.1.13 MIN - Minimum
+
+ dst.x = min(src0.x, src1.x)
+ dst.y = min(src0.y, src1.y)
+ dst.z = min(src0.z, src1.z)
+ dst.w = min(src0.w, src1.w)
+
+
+1.1.14 MAX - Maximum
+
+ dst.x = max(src0.x, src1.x)
+ dst.y = max(src0.y, src1.y)
+ dst.z = max(src0.z, src1.z)
+ dst.w = max(src0.w, src1.w)
+
+
+1.1.15 SLT - Set On Less Than
+
+ dst.x = (src0.x < src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y < src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z < src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w < src1.w) ? 1.0 : 0.0
+
+
+1.1.16 SGE - Set On Greater Equal Than
+
+ dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
+
+
+1.1.17 MAD - Multiply And Add
+
+ dst.x = src0.x * src1.x + src2.x
+ dst.y = src0.y * src1.y + src2.y
+ dst.z = src0.z * src1.z + src2.z
+ dst.w = src0.w * src1.w + src2.w
+
+
+1.2 GL_ATI_fragment_shader
+---------------------------
+
+
+1.2.1 SUB - Subtract
+
+ dst.x = src0.x - src1.x
+ dst.y = src0.y - src1.y
+ dst.z = src0.z - src1.z
+ dst.w = src0.w - src1.w
+
+
+1.2.2 DOT3 - 3-component Dot Product
+
+ Alias for DP3.
+
+
+1.2.3 DOT4 - 4-component Dot Product
+
+ Alias for DP4.
+
+
+1.2.4 LERP - Linear Interpolate
+
+ dst.x = src0.x * (src1.x - src2.x) + src2.x
+ dst.y = src0.y * (src1.y - src2.y) + src2.y
+ dst.z = src0.z * (src1.z - src2.z) + src2.z
+ dst.w = src0.w * (src1.w - src2.w) + src2.w
+
+
+1.2.5 CND - Condition
+
+ dst.x = (src2.x > 0.5) ? src0.x : src1.x
+ dst.y = (src2.y > 0.5) ? src0.y : src1.y
+ dst.z = (src2.z > 0.5) ? src0.z : src1.z
+ dst.w = (src2.w > 0.5) ? src0.w : src1.w
+
+
+1.2.6 CND0 - Condition Zero
+
+ Removed. Use (CMP src2, src1, src0) instead.
+
+1.2.7 DOT2ADD - 2-component Dot Product And Add
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+
+
+1.3 GL_EXT_vertex_shader
+-------------------------
+
+
+1.3.1 INDEX - Array Lookup
+
+ Considered for removal from language.
+
+
+1.3.2 NEGATE - Negate
+
+ Considered for removal from language.
+
+
+1.3.3 MADD - Multiply And Add
+
+ Alias for MAD.
+
+
+1.3.4 FRAC - Fraction
+
+ dst.x = src.x - floor(src.x)
+ dst.y = src.y - floor(src.y)
+ dst.z = src.z - floor(src.z)
+ dst.w = src.w - floor(src.w)
+
+
+1.3.5 SETGE - Set On Greater Equal
+
+ Alias for SGE.
+
+
+1.3.6 SETLT - Set On Less Than
+
+ Alias for SLT.
+
+
+1.3.7 CLAMP - Clamp
+
+ dst.x = clamp(src0.x, src1.x, src2.x)
+ dst.y = clamp(src0.y, src1.y, src2.y)
+ dst.z = clamp(src0.z, src1.z, src2.z)
+ dst.w = clamp(src0.w, src1.w, src2.w)
+
+
+1.3.8 FLOOR - Floor
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.3.9 ROUND - Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.3.10 EXPBASE2 - Exponential Base 2
+
+ dst.x = pow(2.0, src.x)
+ dst.y = pow(2.0, src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = pow(2.0, src.x)
+
+
+1.3.11 LOGBASE2 - Logarithm Base 2
+
+ dst.x = lg2(src.x)
+ dst.y = lg2(src.x)
+ dst.z = lg2(src.x)
+ dst.w = lg2(src.x)
+
+
+1.3.12 POWER - Power
+
+ dst.x = pow(src0.x, src1.x)
+ dst.y = pow(src0.x, src1.x)
+ dst.z = pow(src0.x, src1.x)
+ dst.w = pow(src0.x, src1.x)
+
+
+1.3.13 RECIP - Reciprocal
+
+ Alias for RCP.
+
+
+1.3.14 RECIPSQRT - Reciprocal Square Root
+
+ Alias for RSQ.
+
+
+1.3.15 CROSSPRODUCT - Cross Product
+
+ dst.x = src0.y * src1.z - src1.y * src0.z
+ dst.y = src0.z * src1.x - src1.z * src0.x
+ dst.z = src0.x * src1.y - src1.x * src0.y
+ dst.w = 1.0
+
+
+1.3.16 MULTIPLYMATRIX - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.4 GL_NV_vertex_program1_1
+----------------------------
+
+
+1.4.1 ABS - Absolute
+
+ dst.x = abs(src.x)
+ dst.y = abs(src.y)
+ dst.z = abs(src.z)
+ dst.w = abs(src.w)
+
+
+1.4.2 RCC - Reciprocal Clamped
+
+ dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+
+
+1.4.3 DPH - Homogeneous Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+
+
+1.5 GL_NV_fragment_program
+---------------------------
+
+
+1.5.1 COS - Cosine
+
+ dst.x = cos(src.x)
+ dst.y = cos(src.x)
+ dst.z = cos(src.x)
+ dst.w = cos(src.w)
+
+
+1.5.2 DDX - Derivative Relative To X
+
+ dst.x = partialx(src.x)
+ dst.y = partialx(src.y)
+ dst.z = partialx(src.z)
+ dst.w = partialx(src.w)
+
+
+1.5.3 DDY - Derivative Relative To Y
+
+ dst.x = partialy(src.x)
+ dst.y = partialy(src.y)
+ dst.z = partialy(src.z)
+ dst.w = partialy(src.w)
+
+
+1.5.4 EX2 - Exponential Base 2
+
+ Alias for EXPBASE2.
+
+
+1.5.5 FLR - Floor
+
+ Alias for FLOOR.
+
+
+1.5.6 FRC - Fraction
+
+ Alias for FRAC.
+
+
+1.5.7 KILP - Predicated Discard
+
+ discard
+
+
+1.5.8 LG2 - Logarithm Base 2
+
+ Alias for LOGBASE2.
+
+
+1.5.9 LRP - Linear Interpolate
+
+ Alias for LERP.
+
+
+1.5.10 PK2H - Pack Two 16-bit Floats
+
+ TBD
+
+
+1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
+
+ TBD
+
+
+1.5.12 PK4B - Pack Four Signed 8-bit Scalars
+
+ TBD
+
+
+1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
+
+ TBD
+
+
+1.5.14 POW - Power
+
+ Alias for POWER.
+
+
+1.5.15 RFL - Reflection Vector
+
+ dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
+ dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
+ dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
+ dst.w = 1.0
+
+
+1.5.16 SEQ - Set On Equal
+
+ dst.x = (src0.x == src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y == src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z == src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w == src1.w) ? 1.0 : 0.0
+
+
+1.5.17 SFL - Set On False
+
+ dst.x = 0.0
+ dst.y = 0.0
+ dst.z = 0.0
+ dst.w = 0.0
+
+
+1.5.18 SGT - Set On Greater Than
+
+ dst.x = (src0.x > src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y > src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z > src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w > src1.w) ? 1.0 : 0.0
+
+
+1.5.19 SIN - Sine
+
+ dst.x = sin(src.x)
+ dst.y = sin(src.x)
+ dst.z = sin(src.x)
+ dst.w = sin(src.w)
+
+
+1.5.20 SLE - Set On Less Equal Than
+
+ dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
+
+
+1.5.21 SNE - Set On Not Equal
+
+ dst.x = (src0.x != src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y != src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z != src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w != src1.w) ? 1.0 : 0.0
+
+
+1.5.22 STR - Set On True
+
+ dst.x = 1.0
+ dst.y = 1.0
+ dst.z = 1.0
+ dst.w = 1.0
+
+
+1.5.23 TEX - Texture Lookup
+
+ TBD
+
+
+1.5.24 TXD - Texture Lookup with Derivatives
+
+ TBD
+
+
+1.5.25 TXP - Projective Texture Lookup
+
+ TBD
+
+
+1.5.26 UP2H - Unpack Two 16-Bit Floats
+
+ TBD
+
+
+1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
+
+ TBD
+
+
+1.5.28 UP4B - Unpack Four Signed 8-Bit Values
+
+ TBD
+
+
+1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
+
+ TBD
+
+
+1.5.30 X2D - 2D Coordinate Transformation
+
+ dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
+ dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
+
+
+1.6 GL_NV_vertex_program2
+--------------------------
+
+
+1.6.1 ARA - Address Register Add
+
+ TBD
+
+
+1.6.2 ARR - Address Register Load With Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.6.3 BRA - Branch
+
+ pc = target
+
+
+1.6.4 CAL - Subroutine Call
+
+ push(pc)
+ pc = target
+
+
+1.6.5 RET - Subroutine Call Return
+
+ pc = pop()
+
+
+1.6.6 SSG - Set Sign
+
+ dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+ dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+ dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+ dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+
+
+1.7 GL_ARB_vertex_program
+--------------------------
+
+
+1.7.1 SWZ - Extended Swizzle
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.7.2 XPD - Cross Product
+
+ Alias for CROSSPRODUCT.
+
+
+1.8 GL_ARB_fragment_program
+----------------------------
+
+
+1.8.1 CMP - Compare
+
+ dst.x = (src0.x < 0.0) ? src1.x : src2.x
+ dst.y = (src0.y < 0.0) ? src1.y : src2.y
+ dst.z = (src0.z < 0.0) ? src1.z : src2.z
+ dst.w = (src0.w < 0.0) ? src1.w : src2.w
+
+
+1.8.2 KIL - Conditional Discard
+
+ if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
+ discard
+ endif
+
+
+1.8.3 SCS - Sine Cosine
+
+ dst.x = cos(src.x)
+ dst.y = sin(src.x)
+ dst.z = 0.0
+ dst.y = 1.0
+
+
+1.8.4 TXB - Texture Lookup With Bias
+
+ TBD
+
+
+1.9 GL_NV_fragment_program2
+----------------------------
+
+
+1.9.1 NRM - 3-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.w = 1.0
+
+
+1.9.2 DIV - Divide
+
+ dst.x = src0.x / src1.x
+ dst.y = src0.y / src1.y
+ dst.z = src0.z / src1.z
+ dst.w = src0.w / src1.w
+
+
+1.9.3 DP2 - 2-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y
+ dst.y = src0.x * src1.x + src0.y * src1.y
+ dst.z = src0.x * src1.x + src0.y * src1.y
+ dst.w = src0.x * src1.x + src0.y * src1.y
+
+
+1.9.4 DP2A - 2-component Dot Product And Add
+
+ Alias for DOT2ADD.
+
+
+1.9.5 TXL - Texture Lookup With LOD
+
+ TBD
+
+
+1.9.6 BRK - Break
+
+ TBD
+
+
+1.9.7 IF - If
+
+ TBD
+
+
+1.9.8 BGNFOR - Begin a For-Loop
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+
+ if (dst.y <= 0)
+ pc = [matching ENDFOR] + 1
+ endif
+
+ Note: The destination must be a loop register.
+ The source must be a constant register.
+
+
+1.9.9 REP - Repeat
+
+ TBD
+
+
+1.9.10 ELSE - Else
+
+ TBD
+
+
+1.9.11 ENDIF - End If
+
+ TBD
+
+
+1.9.12 ENDFOR - End a For-Loop
+
+ dst.x = dst.x + dst.z
+ dst.y = dst.y - 1.0
+
+ if (dst.y > 0)
+ pc = [matching BGNFOR instruction] + 1
+ endif
+
+ Note: The destination must be a loop register.
+
+
+1.9.13 ENDREP - End Repeat
+
+ TBD
+
+
+1.10 GL_NV_vertex_program3
+---------------------------
+
+
+1.10.1 PUSHA - Push Address Register On Stack
+
+ push(src.x)
+ push(src.y)
+ push(src.z)
+ push(src.w)
+
+
+1.10.2 POPA - Pop Address Register From Stack
+
+ dst.w = pop()
+ dst.z = pop()
+ dst.y = pop()
+ dst.x = pop()
+
+
+1.11 GL_NV_gpu_program4
+------------------------
+
+
+1.11.1 CEIL - Ceiling
+
+ dst.x = ceil(src.x)
+ dst.y = ceil(src.y)
+ dst.z = ceil(src.z)
+ dst.w = ceil(src.w)
+
+
+1.11.2 I2F - Integer To Float
+
+ dst.x = (float) src.x
+ dst.y = (float) src.y
+ dst.z = (float) src.z
+ dst.w = (float) src.w
+
+
+1.11.3 NOT - Bitwise Not
+
+ dst.x = ~src.x
+ dst.y = ~src.y
+ dst.z = ~src.z
+ dst.w = ~src.w
+
+
+1.11.4 TRUNC - Truncate
+
+ dst.x = trunc(src.x)
+ dst.y = trunc(src.y)
+ dst.z = trunc(src.z)
+ dst.w = trunc(src.w)
+
+
+1.11.5 SHL - Shift Left
+
+ dst.x = src0.x << src1.x
+ dst.y = src0.y << src1.x
+ dst.z = src0.z << src1.x
+ dst.w = src0.w << src1.x
+
+
+1.11.6 SHR - Shift Right
+
+ dst.x = src0.x >> src1.x
+ dst.y = src0.y >> src1.x
+ dst.z = src0.z >> src1.x
+ dst.w = src0.w >> src1.x
+
+
+1.11.7 AND - Bitwise And
+
+ dst.x = src0.x & src1.x
+ dst.y = src0.y & src1.y
+ dst.z = src0.z & src1.z
+ dst.w = src0.w & src1.w
+
+
+1.11.8 OR - Bitwise Or
+
+ dst.x = src0.x | src1.x
+ dst.y = src0.y | src1.y
+ dst.z = src0.z | src1.z
+ dst.w = src0.w | src1.w
+
+
+1.11.9 MOD - Modulus
+
+ dst.x = src0.x % src1.x
+ dst.y = src0.y % src1.y
+ dst.z = src0.z % src1.z
+ dst.w = src0.w % src1.w
+
+
+1.11.10 XOR - Bitwise Xor
+
+ dst.x = src0.x ^ src1.x
+ dst.y = src0.y ^ src1.y
+ dst.z = src0.z ^ src1.z
+ dst.w = src0.w ^ src1.w
+
+
+1.11.11 SAD - Sum Of Absolute Differences
+
+ dst.x = abs(src0.x - src1.x) + src2.x
+ dst.y = abs(src0.y - src1.y) + src2.y
+ dst.z = abs(src0.z - src1.z) + src2.z
+ dst.w = abs(src0.w - src1.w) + src2.w
+
+
+1.11.12 TXF - Texel Fetch
+
+ TBD
+
+
+1.11.13 TXQ - Texture Size Query
+
+ TBD
+
+
+1.11.14 CONT - Continue
+
+ TBD
+
+
+1.12 GL_NV_geometry_program4
+-----------------------------
+
+
+1.12.1 EMIT - Emit
+
+ TBD
+
+
+1.12.2 ENDPRIM - End Primitive
+
+ TBD
+
+
+1.13 GLSL
+----------
+
+
+1.13.1 BGNLOOP - Begin a Loop
+
+ TBD
+
+
+1.13.2 BGNSUB - Begin Subroutine
+
+ TBD
+
+
+1.13.3 ENDLOOP - End a Loop
+
+ TBD
+
+
+1.13.4 ENDSUB - End Subroutine
+
+ TBD
+
+
+1.13.5 INT - Truncate
+
+ Alias for TRUNC.
+
+
+1.13.6 NOISE1 - 1D Noise
+
+ TBD
+
+
+1.13.7 NOISE2 - 2D Noise
+
+ TBD
+
+
+1.13.8 NOISE3 - 3D Noise
+
+ TBD
+
+
+1.13.9 NOISE4 - 4D Noise
+
+ TBD
+
+
+1.13.10 NOP - No Operation
+
+ Do nothing.
+
+
+1.14 ps_1_1
+------------
+
+
+1.14.1 TEXKILL - Conditional Discard
+
+ Alias for KIL.
+
+
+1.15 ps_1_4
+------------
+
+
+1.15.1 TEXLD - Texture Lookup
+
+ Alias for TEX.
+
+
+1.16 ps_2_0
+------------
+
+
+1.16.1 M4X4 - Multiply Matrix
+
+ Alias for MULTIPLYMATRIX.
+
+
+1.16.2 M4X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.3 M3X4 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.4 M3X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.5 M3X2 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.6 CRS - Cross Product
+
+ Alias for XPD.
+
+
+1.16.7 NRM4 - 4-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+
+
+1.16.8 SINCOS - Sine Cosine
+
+ Alias for SCS.
+
+
+1.16.9 TEXLDB - Texture Lookup With Bias
+
+ Alias for TXB.
+
+
+1.16.10 DP2ADD - 2-component Dot Product And Add
+
+ Alias for DP2A.
+
+
+1.17 ps_2_x
+------------
+
+
+1.17.1 CALL - Subroutine Call
+
+ Alias for CAL.
+
+
+1.17.2 CALLNZ - Subroutine Call If Not Zero
+
+ TBD
+
+
+1.17.3 IFC - If
+
+ TBD
+
+
+1.17.4 BREAK - Break
+
+ Alias for BRK.
+
+
+1.17.5 BREAKC - Break Conditional
+
+ TBD
+
+
+1.17.6 DSX - Derivative Relative To X
+
+ Alias for DDX.
+
+
+1.17.7 DSY - Derivative Relative To Y
+
+ Alias for DDY.
+
+
+1.17.8 TEXLDD - Texture Lookup with Derivatives
+
+ Alias for TXD.
+
+
+1.18 vs_1_1
+------------
+
+
+1.18.1 EXPP - Approximate Exponential Base 2
+
+ Use EXP. See also 1.19.3.
+
+
+1.18.2 LOGP - Logarithm Base 2
+
+ Use LOG. See also 1.19.4.
+
+
+1.19 vs_2_0
+------------
+
+
+1.19.1 SGN - Set Sign
+
+ Alias for SSG.
+
+
+1.19.2 MOVA - Move Address Register
+
+ Alias for ARR.
+
+
+1.19.3 EXPP - Approximate Exponential Base 2
+
+ Use EX2.
+
+
+1.19.4 LOGP - Logarithm Base 2
+
+ Use LG2.
+
+
+2 Explanation of symbols used
+==============================
+
+
+2.1 Functions
+--------------
+
+
+ abs(x) Absolute value of x.
+ '|x|'
+ (x < 0.0) ? -x : x
+
+ ceil(x) Ceiling of x.
+
+ clamp(x,y,z) Clamp x between y and z.
+ (x < y) ? y : (x > z) ? z : x
+
+ cos(x) Cosine of x.
+
+ floor(x) Floor of x.
+
+ lg2(x) Logarithm base 2 of x.
+
+ max(x,y) Maximum of x and y.
+ (x > y) ? x : y
+
+ min(x,y) Minimum of x and y.
+ (x < y) ? x : y
+
+ partialx(x) Derivative of x relative to fragment's X.
+
+ partialy(x) Derivative of x relative to fragment's Y.
+
+ pop() Pop from stack.
+
+ pow(x,y) Raise x to power of y.
+
+ push(x) Push x on stack.
+
+ round(x) Round x.
+
+ sin(x) Sine of x.
+
+ sqrt(x) Square root of x.
+
+ trunc(x) Truncate x.
+
+
+2.2 Keywords
+-------------
+
+
+ discard Discard fragment.
+
+ dst First destination register.
+
+ dst0 First destination register.
+
+ pc Program counter.
+
+ src First source register.
+
+ src0 First source register.
+
+ src1 Second source register.
+
+ src2 Third source register.
+
+ target Label of target instruction.
+
+
+3 Other tokens
+===============
+
+
+3.1 Declaration Semantic
+-------------------------
+
+
+ Follows Declaration token if Semantic bit is set.
+
+ Since its purpose is to link a shader with other stages of the pipeline,
+ it is valid to follow only those Declaration tokens that declare a register
+ either in INPUT or OUTPUT file.
+
+ SemanticName field contains the semantic name of the register being declared.
+ There is no default value.
+
+ SemanticIndex is an optional subscript that can be used to distinguish
+ different register declarations with the same semantic name. The default value
+ is 0.
+
+ The meanings of the individual semantic names are explained in the following
+ sections.
+
+
+3.1.1 FACE
+
+ Valid only in a fragment shader INPUT declaration.
+
+ FACE.x is negative when the primitive is back facing. FACE.x is positive
+ when the primitive is front facing.