Merge branch 'gallium-nopointsizeminmax'

[mesa.git] / src / gallium / docs / source / tgsi.rst
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst

index ef068448e8342af3152162c43f71a65dae6e8cae..c292cd37d5c21f8c940f1d7bf22caa56fcf37075 100644 (file)
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -6,6 +6,23 @@ for describing shaders. Since Gallium is inherently shaderful, shaders are
  an important part of the API. TGSI is the only intermediate representation
  used by all drivers.
  
+Basics
+------
+
+All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
+floating-point four-component vectors. An opcode may have up to one
+destination register, known as *dst*, and between zero and three source
+registers, called *src0* through *src2*, or simply *src* if there is only
+one.
+
+Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
+components as integers. Other instructions permit using registers as
+two-component vectors with double precision; see :ref:`Double Opcodes`.
+
+When an instruction has a scalar result, the result is usually copied into
+each of the components of *dst*. When this happens, the result is said to be
+*replicated* to *dst*. :opcode:`RCP` is one such instruction.
+
  Instruction Set
  ---------------
  
@@ -13,7 +30,7 @@ From GL_NV_vertex_program
  ^^^^^^^^^^^^^^^^^^^^^^^^^
  
  
-ARL - Address Register Load
+.. opcode:: ARL - Address Register Load
  
  .. math::
  
@@ -26,7 +43,7 @@ ARL - Address Register Load
    dst.w = \lfloor src.w\rfloor
  
  
-MOV - Move
+.. opcode:: MOV - Move
  
  .. math::
  
@@ -39,7 +56,7 @@ MOV - Move
    dst.w = src.w
  
  
-LIT - Light Coefficients
+.. opcode:: LIT - Light Coefficients
  
  .. math::
  
@@ -52,33 +69,25 @@ LIT - Light Coefficients
    dst.w = 1
  
  
-RCP - Reciprocal
-
-.. math::
+.. opcode:: RCP - Reciprocal
  
-  dst.x = \frac{1}{src.x}
+This instruction replicates its result.
  
-  dst.y = \frac{1}{src.x}
+.. math::
  
-  dst.z = \frac{1}{src.x}
+  dst = \frac{1}{src.x}
  
-  dst.w = \frac{1}{src.x}
  
+.. opcode:: RSQ - Reciprocal Square Root
  
-RSQ - Reciprocal Square Root
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = \frac{1}{\sqrt{|src.x|}}
-
-  dst.y = \frac{1}{\sqrt{|src.x|}}
-
-  dst.z = \frac{1}{\sqrt{|src.x|}}
+  dst = \frac{1}{\sqrt{|src.x|}}
  
-  dst.w = \frac{1}{\sqrt{|src.x|}}
  
-
-EXP - Approximate Exponential Base 2
+.. opcode:: EXP - Approximate Exponential Base 2
  
  .. math::
  
@@ -91,7 +100,7 @@ EXP - Approximate Exponential Base 2
    dst.w = 1
  
  
-LOG - Approximate Logarithm Base 2
+.. opcode:: LOG - Approximate Logarithm Base 2
  
  .. math::
  
@@ -104,7 +113,7 @@ LOG - Approximate Logarithm Base 2
    dst.w = 1
  
  
-MUL - Multiply
+.. opcode:: MUL - Multiply
  
  .. math::
  
@@ -117,7 +126,7 @@ MUL - Multiply
    dst.w = src0.w \times src1.w
  
  
-ADD - Add
+.. opcode:: ADD - Add
  
  .. math::
  
@@ -130,33 +139,25 @@ ADD - Add
    dst.w = src0.w + src1.w
  
  
-DP3 - 3-component Dot Product
-
-.. math::
+.. opcode:: DP3 - 3-component Dot Product
  
-  dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+This instruction replicates its result.
  
-  dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+.. math::
  
-  dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
  
-  dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
  
+.. opcode:: DP4 - 4-component Dot Product
  
-DP4 - 4-component Dot Product
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
-  dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
-  dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
  
-  dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
  
-
-DST - Distance Vector
+.. opcode:: DST - Distance Vector
  
  .. math::
  
@@ -169,7 +170,7 @@ DST - Distance Vector
    dst.w = src1.w
  
  
-MIN - Minimum
+.. opcode:: MIN - Minimum
  
  .. math::
  
@@ -182,7 +183,7 @@ MIN - Minimum
    dst.w = min(src0.w, src1.w)
  
  
-MAX - Maximum
+.. opcode:: MAX - Maximum
  
  .. math::
  
@@ -195,7 +196,7 @@ MAX - Maximum
    dst.w = max(src0.w, src1.w)
  
  
-SLT - Set On Less Than
+.. opcode:: SLT - Set On Less Than
  
  .. math::
  
@@ -208,7 +209,7 @@ SLT - Set On Less Than
    dst.w = (src0.w < src1.w) ? 1 : 0
  
  
-SGE - Set On Greater Equal Than
+.. opcode:: SGE - Set On Greater Equal Than
  
  .. math::
  
@@ -221,7 +222,7 @@ SGE - Set On Greater Equal Than
    dst.w = (src0.w >= src1.w) ? 1 : 0
  
  
-MAD - Multiply And Add
+.. opcode:: MAD - Multiply And Add
  
  .. math::
  
@@ -234,7 +235,7 @@ MAD - Multiply And Add
    dst.w = src0.w \times src1.w + src2.w
  
  
-SUB - Subtract
+.. opcode:: SUB - Subtract
  
  .. math::
  
@@ -247,7 +248,7 @@ SUB - Subtract
    dst.w = src0.w - src1.w
  
  
-LRP - Linear Interpolate
+.. opcode:: LRP - Linear Interpolate
  
  .. math::
  
@@ -260,7 +261,7 @@ LRP - Linear Interpolate
    dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
  
  
-CND - Condition
+.. opcode:: CND - Condition
  
  .. math::
  
@@ -273,7 +274,7 @@ CND - Condition
    dst.w = (src2.w > 0.5) ? src0.w : src1.w
  
  
-DP2A - 2-component Dot Product And Add
+.. opcode:: DP2A - 2-component Dot Product And Add
  
  .. math::
  
@@ -286,7 +287,7 @@ DP2A - 2-component Dot Product And Add
    dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
  
  
-FRAC - Fraction
+.. opcode:: FRAC - Fraction
  
  .. math::
  
@@ -299,7 +300,7 @@ FRAC - Fraction
    dst.w = src.w - \lfloor src.w\rfloor
  
  
-CLAMP - Clamp
+.. opcode:: CLAMP - Clamp
  
  .. math::
  
@@ -312,9 +313,9 @@ CLAMP - Clamp
    dst.w = clamp(src0.w, src1.w, src2.w)
  
  
-FLR - Floor
+.. opcode:: FLR - Floor
  
-This is identical to ARL.
+This is identical to :opcode:`ARL`.
  
  .. math::
  
@@ -327,7 +328,7 @@ This is identical to ARL.
    dst.w = \lfloor src.w\rfloor
  
  
-ROUND - Round
+.. opcode:: ROUND - Round
  
  .. math::
  
@@ -340,45 +341,33 @@ ROUND - Round
    dst.w = round(src.w)
  
  
-EX2 - Exponential Base 2
-
-.. math::
+.. opcode:: EX2 - Exponential Base 2
  
-  dst.x = 2^{src.x}
+This instruction replicates its result.
  
-  dst.y = 2^{src.x}
+.. math::
  
-  dst.z = 2^{src.x}
+  dst = 2^{src.x}
  
-  dst.w = 2^{src.x}
  
+.. opcode:: LG2 - Logarithm Base 2
  
-LG2 - Logarithm Base 2
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = \log_2{src.x}
-
-  dst.y = \log_2{src.x}
+  dst = \log_2{src.x}
  
-  dst.z = \log_2{src.x}
  
-  dst.w = \log_2{src.x}
+.. opcode:: POW - Power
  
-
-POW - Power
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = src0.x^{src1.x}
-
-  dst.y = src0.x^{src1.x}
-
-  dst.z = src0.x^{src1.x}
+  dst = src0.x^{src1.x}
  
-  dst.w = src0.x^{src1.x}
-
-XPD - Cross Product
+.. opcode:: XPD - Cross Product
  
  .. math::
  
@@ -391,7 +380,7 @@ XPD - Cross Product
    dst.w = 1
  
  
-ABS - Absolute
+.. opcode:: ABS - Absolute
  
  .. math::
  
@@ -404,48 +393,36 @@ ABS - Absolute
    dst.w = |src.w|
  
  
-RCC - Reciprocal Clamped
+.. opcode:: RCC - Reciprocal Clamped
+
+This instruction replicates its result.
  
  XXX cleanup on aisle three
  
  .. math::
  
-  dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
-
-  dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+  dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
  
-  dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
  
-  dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+.. opcode:: DPH - Homogeneous Dot Product
  
-
-DPH - Homogeneous Dot Product
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
-  dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
-  dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
  
-  dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
  
+.. opcode:: COS - Cosine
  
-COS - Cosine
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = \cos{src.x}
-
-  dst.y = \cos{src.x}
-
-  dst.z = \cos{src.x}
+  dst = \cos{src.x}
  
-  dst.w = \cos{src.x}
  
-
-DDX - Derivative Relative To X
+.. opcode:: DDX - Derivative Relative To X
  
  .. math::
  
@@ -458,7 +435,7 @@ DDX - Derivative Relative To X
    dst.w = partialx(src.w)
  
  
-DDY - Derivative Relative To Y
+.. opcode:: DDY - Derivative Relative To Y
  
  .. math::
  
@@ -471,32 +448,32 @@ DDY - Derivative Relative To Y
    dst.w = partialy(src.w)
  
  
-KILP - Predicated Discard
+.. opcode:: KILP - Predicated Discard
  
    discard
  
  
-PK2H - Pack Two 16-bit Floats
+.. opcode:: PK2H - Pack Two 16-bit Floats
  
    TBD
  
  
-PK2US - Pack Two Unsigned 16-bit Scalars
+.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
  
    TBD
  
  
-PK4B - Pack Four Signed 8-bit Scalars
+.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
  
    TBD
  
  
-PK4UB - Pack Four Unsigned 8-bit Scalars
+.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
  
    TBD
  
  
-RFL - Reflection Vector
+.. opcode:: RFL - Reflection Vector
  
  .. math::
  
@@ -508,145 +485,171 @@ RFL - Reflection Vector
  
    dst.w = 1
  
-Considered for removal.
+.. note::
+
+   Considered for removal.
  
  
-SEQ - Set On Equal
+.. opcode:: SEQ - Set On Equal
  
  .. math::
  
    dst.x = (src0.x == src1.x) ? 1 : 0
+
    dst.y = (src0.y == src1.y) ? 1 : 0
+
    dst.z = (src0.z == src1.z) ? 1 : 0
+
    dst.w = (src0.w == src1.w) ? 1 : 0
  
  
-SFL - Set On False
+.. opcode:: SFL - Set On False
+
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = 0
-  dst.y = 0
-  dst.z = 0
-  dst.w = 0
+  dst = 0
+
+.. note::
+
+   Considered for removal.
  
-Considered for removal.
  
-SGT - Set On Greater Than
+.. opcode:: SGT - Set On Greater Than
  
  .. math::
  
    dst.x = (src0.x > src1.x) ? 1 : 0
+
    dst.y = (src0.y > src1.y) ? 1 : 0
-  dst.z = (src0.z > src1.z) ? 1 : 0
-  dst.w = (src0.w > src1.w) ? 1 : 0
  
+  dst.z = (src0.z > src1.z) ? 1 : 0
  
-SIN - Sine
+  dst.w = (src0.w > src1.w) ? 1 : 0
  
-.. math::
  
-  dst.x = \sin{src.x}
+.. opcode:: SIN - Sine
  
-  dst.y = \sin{src.x}
+This instruction replicates its result.
  
-  dst.z = \sin{src.x}
+.. math::
  
-  dst.w = \sin{src.x}
+  dst = \sin{src.x}
  
  
-SLE - Set On Less Equal Than
+.. opcode:: SLE - Set On Less Equal Than
  
  .. math::
  
    dst.x = (src0.x <= src1.x) ? 1 : 0
+
    dst.y = (src0.y <= src1.y) ? 1 : 0
+
    dst.z = (src0.z <= src1.z) ? 1 : 0
+
    dst.w = (src0.w <= src1.w) ? 1 : 0
  
  
-SNE - Set On Not Equal
+.. opcode:: SNE - Set On Not Equal
  
  .. math::
  
    dst.x = (src0.x != src1.x) ? 1 : 0
+
    dst.y = (src0.y != src1.y) ? 1 : 0
+
    dst.z = (src0.z != src1.z) ? 1 : 0
+
    dst.w = (src0.w != src1.w) ? 1 : 0
  
  
-STR - Set On True
+.. opcode:: STR - Set On True
+
+This instruction replicates its result.
  
  .. math::
  
-  dst.x = 1
-  dst.y = 1
-  dst.z = 1
-  dst.w = 1
+  dst = 1
  
  
-TEX - Texture Lookup
+.. opcode:: TEX - Texture Lookup
  
    TBD
  
  
-TXD - Texture Lookup with Derivatives
+.. opcode:: TXD - Texture Lookup with Derivatives
  
    TBD
  
  
-TXP - Projective Texture Lookup
+.. opcode:: TXP - Projective Texture Lookup
  
    TBD
  
  
-UP2H - Unpack Two 16-Bit Floats
+.. opcode:: UP2H - Unpack Two 16-Bit Floats
  
    TBD
  
-  Considered for removal.
+.. note::
+
+   Considered for removal.
  
-UP2US - Unpack Two Unsigned 16-Bit Scalars
+.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
  
    TBD
  
-  Considered for removal.
+.. note::
+
+   Considered for removal.
  
-UP4B - Unpack Four Signed 8-Bit Values
+.. opcode:: UP4B - Unpack Four Signed 8-Bit Values
  
    TBD
  
-  Considered for removal.
+.. note::
  
-UP4UB - Unpack Four Unsigned 8-Bit Scalars
+   Considered for removal.
+
+.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
  
    TBD
  
-  Considered for removal.
+.. note::
+
+   Considered for removal.
  
-X2D - 2D Coordinate Transformation
+.. opcode:: X2D - 2D Coordinate Transformation
  
  .. math::
  
    dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
    dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
+
    dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
    dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
  
-Considered for removal.
+.. note::
+
+   Considered for removal.
  
  
  From GL_NV_vertex_program2
  ^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  
-ARA - Address Register Add
+.. opcode:: ARA - Address Register Add
  
    TBD
  
-  Considered for removal.
+.. note::
  
-ARR - Address Register Load With Round
+   Considered for removal.
+
+.. opcode:: ARR - Address Register Load With Round
  
  .. math::
  
@@ -659,26 +662,28 @@ ARR - Address Register Load With Round
    dst.w = round(src.w)
  
  
-BRA - Branch
+.. opcode:: BRA - Branch
  
    pc = target
  
-  Considered for removal.
+.. note::
+
+   Considered for removal.
  
-CAL - Subroutine Call
+.. opcode:: CAL - Subroutine Call
  
    push(pc)
    pc = target
  
  
-RET - Subroutine Call Return
+.. opcode:: RET - Subroutine Call Return
  
    pc = pop()
  
    Potential restrictions:  
    * Only occurs at end of function.
  
-SSG - Set Sign
+.. opcode:: SSG - Set Sign
  
  .. math::
  
@@ -691,7 +696,7 @@ SSG - Set Sign
    dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
  
  
-CMP - Compare
+.. opcode:: CMP - Compare
  
  .. math::
  
@@ -704,7 +709,7 @@ CMP - Compare
    dst.w = (src0.w < 0) ? src1.w : src2.w
  
  
-KIL - Conditional Discard
+.. opcode:: KIL - Conditional Discard
  
  .. math::
  
@@ -713,7 +718,7 @@ KIL - Conditional Discard
    endif
  
  
-SCS - Sine Cosine
+.. opcode:: SCS - Sine Cosine
  
  .. math::
  
@@ -726,12 +731,12 @@ SCS - Sine Cosine
    dst.y = 1
  
  
-TXB - Texture Lookup With Bias
+.. opcode:: TXB - Texture Lookup With Bias
  
    TBD
  
  
-NRM - 3-component Vector Normalise
+.. opcode:: NRM - 3-component Vector Normalise
  
  .. math::
  
@@ -744,7 +749,7 @@ NRM - 3-component Vector Normalise
    dst.w = 1
  
  
-DIV - Divide
+.. opcode:: DIV - Divide
  
  .. math::
  
@@ -757,35 +762,31 @@ DIV - Divide
    dst.w = \frac{src0.w}{src1.w}
  
  
-DP2 - 2-component Dot Product
+.. opcode:: DP2 - 2-component Dot Product
  
-.. math::
+This instruction replicates its result.
  
-  dst.x = src0.x \times src1.x + src0.y \times src1.y
-
-  dst.y = src0.x \times src1.x + src0.y \times src1.y
-
-  dst.z = src0.x \times src1.x + src0.y \times src1.y
+.. math::
  
-  dst.w = src0.x \times src1.x + src0.y \times src1.y
+  dst = src0.x \times src1.x + src0.y \times src1.y
  
  
-TXL - Texture Lookup With LOD
+.. opcode:: TXL - Texture Lookup With LOD
  
    TBD
  
  
-BRK - Break
+.. opcode:: BRK - Break
  
    TBD
  
  
-IF - If
+.. opcode:: IF - If
  
    TBD
  
  
-BGNFOR - Begin a For-Loop
+.. opcode:: BGNFOR - Begin a For-Loop
  
    dst.x = floor(src.x)
    dst.y = floor(src.y)
@@ -798,25 +799,31 @@ BGNFOR - Begin a For-Loop
    Note: The destination must be a loop register.
          The source must be a constant register.
  
-  Considered for cleanup / removal.
+.. note::
+
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
  
  
-REP - Repeat
+.. opcode:: REP - Repeat
  
    TBD
  
  
-ELSE - Else
+.. opcode:: ELSE - Else
  
    TBD
  
  
-ENDIF - End If
+.. opcode:: ENDIF - End If
  
    TBD
  
  
-ENDFOR - End a For-Loop
+.. opcode:: ENDFOR - End a For-Loop
  
    dst.x = dst.x + dst.z
    dst.y = dst.y - 1.0
@@ -827,30 +834,48 @@ ENDFOR - End a For-Loop
  
    Note: The destination must be a loop register.
  
-  Considered for cleanup / removal.
+.. note::
  
-ENDREP - End Repeat
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
+
+.. opcode:: ENDREP - End Repeat
  
    TBD
  
  
-PUSHA - Push Address Register On Stack
+.. opcode:: PUSHA - Push Address Register On Stack
  
    push(src.x)
    push(src.y)
    push(src.z)
    push(src.w)
  
-  Considered for cleanup / removal.
+.. note::
+
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
  
-POPA - Pop Address Register From Stack
+.. opcode:: POPA - Pop Address Register From Stack
  
    dst.w = pop()
    dst.z = pop()
    dst.y = pop()
    dst.x = pop()
  
-  Considered for cleanup / removal.
+.. note::
+
+   Considered for cleanup.
+
+.. note::
+
+   Considered for removal.
  
  
  From GL_NV_gpu_program4
@@ -858,7 +883,7 @@ From GL_NV_gpu_program4
  
  Support for these opcodes indicated by a special pipe capability bit (TBD).
  
-CEIL - Ceiling
+.. opcode:: CEIL - Ceiling
  
  .. math::
  
@@ -871,7 +896,7 @@ CEIL - Ceiling
    dst.w = \lceil src.w\rceil
  
  
-I2F - Integer To Float
+.. opcode:: I2F - Integer To Float
  
  .. math::
  
@@ -884,7 +909,7 @@ I2F - Integer To Float
    dst.w = (float) src.w
  
  
-NOT - Bitwise Not
+.. opcode:: NOT - Bitwise Not
  
  .. math::
  
@@ -897,7 +922,7 @@ NOT - Bitwise Not
    dst.w = ~src.w
  
  
-TRUNC - Truncate
+.. opcode:: TRUNC - Truncate
  
  .. math::
  
@@ -910,7 +935,7 @@ TRUNC - Truncate
    dst.w = trunc(src.w)
  
  
-SHL - Shift Left
+.. opcode:: SHL - Shift Left
  
  .. math::
  
@@ -923,7 +948,7 @@ SHL - Shift Left
    dst.w = src0.w << src1.x
  
  
-SHR - Shift Right
+.. opcode:: SHR - Shift Right
  
  .. math::
  
@@ -936,7 +961,7 @@ SHR - Shift Right
    dst.w = src0.w >> src1.x
  
  
-AND - Bitwise And
+.. opcode:: AND - Bitwise And
  
  .. math::
  
@@ -949,7 +974,7 @@ AND - Bitwise And
    dst.w = src0.w & src1.w
  
  
-OR - Bitwise Or
+.. opcode:: OR - Bitwise Or
  
  .. math::
  
@@ -962,7 +987,7 @@ OR - Bitwise Or
    dst.w = src0.w | src1.w
  
  
-MOD - Modulus
+.. opcode:: MOD - Modulus
  
  .. math::
  
@@ -975,20 +1000,20 @@ MOD - Modulus
    dst.w = src0.w \bmod src1.w
  
  
-XOR - Bitwise Xor
+.. opcode:: XOR - Bitwise Xor
  
  .. math::
  
-  dst.x = src0.x ^ src1.x
+  dst.x = src0.x \oplus src1.x
  
-  dst.y = src0.y ^ src1.y
+  dst.y = src0.y \oplus src1.y
  
-  dst.z = src0.z ^ src1.z
+  dst.z = src0.z \oplus src1.z
  
-  dst.w = src0.w ^ src1.w
+  dst.w = src0.w \oplus src1.w
  
  
-SAD - Sum Of Absolute Differences
+.. opcode:: SAD - Sum Of Absolute Differences
  
  .. math::
  
@@ -1001,17 +1026,17 @@ SAD - Sum Of Absolute Differences
    dst.w = |src0.w - src1.w| + src2.w
  
  
-TXF - Texel Fetch
+.. opcode:: TXF - Texel Fetch
  
    TBD
  
  
-TXQ - Texture Size Query
+.. opcode:: TXQ - Texture Size Query
  
    TBD
  
  
-CONT - Continue
+.. opcode:: CONT - Continue
  
    TBD
  
@@ -1020,12 +1045,12 @@ From GL_NV_geometry_program4
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  
-EMIT - Emit
+.. opcode:: EMIT - Emit
  
    TBD
  
  
-ENDPRIM - End Primitive
+.. opcode:: ENDPRIM - End Primitive
  
    TBD
  
@@ -1034,62 +1059,171 @@ From GLSL
  ^^^^^^^^^^
  
  
-BGNLOOP - Begin a Loop
+.. opcode:: BGNLOOP - Begin a Loop
  
    TBD
  
  
-BGNSUB - Begin Subroutine
+.. opcode:: BGNSUB - Begin Subroutine
  
    TBD
  
  
-ENDLOOP - End a Loop
+.. opcode:: ENDLOOP - End a Loop
  
    TBD
  
  
-ENDSUB - End Subroutine
+.. opcode:: ENDSUB - End Subroutine
  
    TBD
  
  
-NOP - No Operation
+.. opcode:: NOP - No Operation
  
    Do nothing.
  
  
-NRM4 - 4-component Vector Normalise
-
-.. math::
-
-  dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+.. opcode:: NRM4 - 4-component Vector Normalise
  
-  dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+This instruction replicates its result.
  
-  dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+.. math::
  
-  dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+  dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
  
  
  ps_2_x
  ^^^^^^^^^^^^
  
  
-CALLNZ - Subroutine Call If Not Zero
+.. opcode:: CALLNZ - Subroutine Call If Not Zero
  
    TBD
  
  
-IFC - If
+.. opcode:: IFC - If
  
    TBD
  
  
-BREAKC - Break Conditional
+.. opcode:: BREAKC - Break Conditional
  
    TBD
  
+.. _doubleopcodes:
+
+Double Opcodes
+^^^^^^^^^^^^^^^
+
+.. opcode:: DADD - Add Double
+
+.. math::
+
+  dst.xy = src0.xy + src1.xy
+
+  dst.zw = src0.zw + src1.zw
+
+
+.. opcode:: DDIV - Divide Double
+
+.. math::
+
+  dst.xy = src0.xy / src1.xy
+
+  dst.zw = src0.zw / src1.zw
+
+.. opcode:: DSEQ - Set Double on Equal
+
+.. math::
+
+  dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
+
+  dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DSLT - Set Double on Less than
+
+.. math::
+
+  dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
+
+  dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DFRAC - Double Fraction
+
+.. math::
+
+  dst.xy = src.xy - \lfloor src.xy\rfloor
+
+  dst.zw = src.zw - \lfloor src.zw\rfloor
+
+
+.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
+
+.. math::
+
+  dst0.xy = frexp(src.xy, dst1.xy)
+
+  dst0.zw = frexp(src.zw, dst1.zw)
+
+.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
+
+.. math::
+
+  dst.xy = ldexp(src0.xy, src1.xy)
+
+  dst.zw = ldexp(src0.zw, src1.zw)
+
+.. opcode:: DMIN - Minimum Double
+
+.. math::
+
+  dst.xy = min(src0.xy, src1.xy)
+
+  dst.zw = min(src0.zw, src1.zw)
+
+.. opcode:: DMAX - Maximum Double
+
+.. math::
+
+  dst.xy = max(src0.xy, src1.xy)
+
+  dst.zw = max(src0.zw, src1.zw)
+
+.. opcode:: DMUL - Multiply Double
+
+.. math::
+
+  dst.xy = src0.xy \times src1.xy
+
+  dst.zw = src0.zw \times src1.zw
+
+
+.. opcode:: DMAD - Multiply And Add Doubles
+
+.. math::
+
+  dst.xy = src0.xy \times src1.xy + src2.xy
+
+  dst.zw = src0.zw \times src1.zw + src2.zw
+
+
+.. opcode:: DRCP - Reciprocal Double
+
+.. math::
+
+   dst.xy = \frac{1}{src.xy}
+
+   dst.zw = \frac{1}{src.zw}
+
+.. opcode:: DSQRT - Square root double
+
+.. math::
+
+   dst.xy = \sqrt{src.xy}
+
+   dst.zw = \sqrt{src.zw}
+
  
  Explanation of symbols used
  ------------------------------
@@ -1137,25 +1271,41 @@ Keywords
  
    discard           Discard fragment.
  
-  dst               First destination register.
+  pc                Program counter.
  
-  dst0              First destination register.
+  target            Label of target instruction.
  
-  pc                Program counter.
  
-  src               First source register.
+Other tokens
+---------------
  
-  src0              First source register.
  
-  src1              Second source register.
+Declaration
+^^^^^^^^^^^
  
-  src2              Third source register.
  
-  target            Label of target instruction.
+Declares a register that is will be referenced as an operand in Instruction
+tokens.
  
+File field contains register file that is being declared and is one
+of TGSI_FILE.
  
-Other tokens
----------------
+UsageMask field specifies which of the register components can be accessed
+and is one of TGSI_WRITEMASK.
+
+Interpolate field is only valid for fragment shader INPUT register files.
+It specifes the way input is being interpolated by the rasteriser and is one
+of TGSI_INTERPOLATE.
+
+If Dimension flag is set to 1, a Declaration Dimension token follows.
+
+If Semantic flag is set to 1, a Declaration Semantic token follows.
+
+CylindricalWrap bitfield is only valid for fragment shader INPUT register
+files. It specifies which register components should be subject to cylindrical
+wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
+is set to 1, the X component should be interpolated according to cylindrical
+wrapping rules.
  
  
  Declaration Semantic
@@ -1187,9 +1337,8 @@ are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
  for the perspective divide, if enabled.
  
  As a vertex shader output, position should be scaled to the viewport. When
-used in fragment shaders, position will ---
-
-XXX --- wait a minute. Should position be in [0,1] for x and y?
+used in fragment shaders, position will be in window coordinates. The convention
+used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
  
  XXX additionally, is there a way to configure the perspective divide? it's
  accelerated on most chipsets AFAIK...
@@ -1266,3 +1415,85 @@ TGSI_SEMANTIC_EDGEFLAG
  """"""""""""""""""""""
  
  XXX no clue
+
+
+Properties
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+  Properties are general directives that apply to the whole TGSI program.
+
+FS_COORD_ORIGIN
+"""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
+The default value is UPPER_LEFT.
+
+If UPPER_LEFT, the position will be (0,0) at the upper left corner and
+increase downward and rightward.
+If LOWER_LEFT, the position will be (0,0) at the lower left corner and
+increase upward and rightward.
+
+OpenGL defaults to LOWER_LEFT, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9/10 use UPPER_LEFT.
+
+FS_COORD_PIXEL_CENTER
+"""""""""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
+The default value is HALF_INTEGER.
+
+If HALF_INTEGER, the fractionary part of the position will be 0.5
+If INTEGER, the fractionary part of the position will be 0.0
+
+Note that this does not affect the set of fragments generated by
+rasterization, which is instead controlled by gl_rasterization_rules in the
+rasterizer.
+
+OpenGL defaults to HALF_INTEGER, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9 uses INTEGER.
+DirectX 10 uses HALF_INTEGER.
+
+
+
+Texture Sampling and Texture Formats
+------------------------------------
+
+This table shows how texture image components are returned as (x,y,z,w) tuples
+by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
+:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
+well.
+
++--------------------+--------------+--------------------+--------------+
+| Texture Components | Gallium      | OpenGL             | Direct3D 9   |
++====================+==============+====================+==============+
+| R                  | XXX TBD      | (r, 0, 0, 1)       | (r, 1, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RG                 | XXX TBD      | (r, g, 0, 1)       | (r, g, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGBA               | (r, g, b, a) | (r, g, b, a)       | (r, g, b, a) |
++--------------------+--------------+--------------------+--------------+
+| A                  | (0, 0, 0, a) | (0, 0, 0, a)       | (0, 0, 0, a) |
++--------------------+--------------+--------------------+--------------+
+| L                  | (l, l, l, 1) | (l, l, l, 1)       | (l, l, l, 1) |
++--------------------+--------------+--------------------+--------------+
+| LA                 | (l, l, l, a) | (l, l, l, a)       | (l, l, l, a) |
++--------------------+--------------+--------------------+--------------+
+| I                  | (i, i, i, i) | (i, i, i, i)       | N/A          |
++--------------------+--------------+--------------------+--------------+
+| UV                 | XXX TBD      | (0, 0, 0, 1)       | (u, v, 1, 1) |
+|                    |              | [#envmap-bumpmap]_ |              |
++--------------------+--------------+--------------------+--------------+
+| Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
+|                    |              | [#depth-tex-mode]_ |              |
++--------------------+--------------+--------------------+--------------+
+
+.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
+.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
+   or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.