gallium: Add PIPE_SHADER_CAP_DOUBLES

[mesa.git] / src / gallium / docs / source / tgsi.rst
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst

index 4d26c465579e24238dfb20b8428a1bb79727b0ba..4213b4ac185530c2e3ace23a63e03b19a25c63da 100644 (file)
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -17,7 +17,7 @@ one.
  
  Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
  components as integers. Other instructions permit using registers as
-two-component vectors with double precision; see :ref:`Double Opcodes`.
+two-component vectors with double precision; see :ref:`doubleopcodes`.
  
  When an instruction has a scalar result, the result is usually copied into
  each of the components of *dst*. When this happens, the result is said to be
@@ -74,13 +74,10 @@ used.
  
  .. math::
  
-  dst.x = 1
-
-  dst.y = max(src.x, 0)
-
-  dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
-
-  dst.w = 1
+  dst.x &= 1 \\
+  dst.y &= max(src.x, 0) \\
+  dst.z &= (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 \\
+  dst.w &= 1
  
  
  .. opcode:: RCP - Reciprocal
@@ -114,26 +111,20 @@ This instruction replicates its result. The results are undefined for src < 0.
  
  .. math::
  
-  dst.x = 2^{\lfloor src.x\rfloor}
-
-  dst.y = src.x - \lfloor src.x\rfloor
-
-  dst.z = 2^{src.x}
-
-  dst.w = 1
+  dst.x &= 2^{\lfloor src.x\rfloor} \\
+  dst.y &= src.x - \lfloor src.x\rfloor \\
+  dst.z &= 2^{src.x} \\
+  dst.w &= 1
  
  
  .. opcode:: LOG - Approximate Logarithm Base 2
  
  .. math::
  
-  dst.x = \lfloor\log_2{|src.x|}\rfloor
-
-  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
-
-  dst.z = \log_2{|src.x|}
-
-  dst.w = 1
+  dst.x &= \lfloor\log_2{|src.x|}\rfloor \\
+  dst.y &= \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} \\
+  dst.z &= \log_2{|src.x|} \\
+  dst.w &= 1
  
  
  .. opcode:: MUL - Multiply
@@ -184,13 +175,10 @@ This instruction replicates its result.
  
  .. math::
  
-  dst.x = 1
-
-  dst.y = src0.y \times src1.y
-
-  dst.z = src0.z
-
-  dst.w = src1.w
+  dst.x &= 1\\
+  dst.y &= src0.y \times src1.y\\
+  dst.z &= src0.z\\
+  dst.w &= src1.w
  
  
  .. opcode:: MIN - Minimum
@@ -223,26 +211,26 @@ This instruction replicates its result.
  
  .. math::
  
-  dst.x = (src0.x < src1.x) ? 1 : 0
+  dst.x = (src0.x < src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y < src1.y) ? 1 : 0
+  dst.y = (src0.y < src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z < src1.z) ? 1 : 0
+  dst.z = (src0.z < src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w < src1.w) ? 1 : 0
+  dst.w = (src0.w < src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: SGE - Set On Greater Equal Than
  
  .. math::
  
-  dst.x = (src0.x >= src1.x) ? 1 : 0
+  dst.x = (src0.x >= src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y >= src1.y) ? 1 : 0
+  dst.y = (src0.y >= src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z >= src1.z) ? 1 : 0
+  dst.z = (src0.z >= src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w >= src1.w) ? 1 : 0
+  dst.w = (src0.w >= src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: MAD - Multiply And Add
@@ -424,7 +412,7 @@ XXX cleanup on aisle three
  
  .. math::
  
-  dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+  dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.84467e+019) : clamp(1 / src.x, -1.84467e+019, -5.42101e-020)
  
  
  .. opcode:: DPH - Homogeneous Dot Product
@@ -512,13 +500,13 @@ This instruction replicates its result.
  
  .. math::
  
-  dst.x = (src0.x == src1.x) ? 1 : 0
+  dst.x = (src0.x == src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y == src1.y) ? 1 : 0
+  dst.y = (src0.y == src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z == src1.z) ? 1 : 0
+  dst.z = (src0.z == src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w == src1.w) ? 1 : 0
+  dst.w = (src0.w == src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: SFL - Set On False
@@ -527,7 +515,7 @@ This instruction replicates its result.
  
  .. math::
  
-  dst = 0
+  dst = 0.0F
  
  .. note::
  
@@ -538,13 +526,13 @@ This instruction replicates its result.
  
  .. math::
  
-  dst.x = (src0.x > src1.x) ? 1 : 0
+  dst.x = (src0.x > src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y > src1.y) ? 1 : 0
+  dst.y = (src0.y > src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z > src1.z) ? 1 : 0
+  dst.z = (src0.z > src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w > src1.w) ? 1 : 0
+  dst.w = (src0.w > src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: SIN - Sine
@@ -560,26 +548,26 @@ This instruction replicates its result.
  
  .. math::
  
-  dst.x = (src0.x <= src1.x) ? 1 : 0
+  dst.x = (src0.x <= src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y <= src1.y) ? 1 : 0
+  dst.y = (src0.y <= src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z <= src1.z) ? 1 : 0
+  dst.z = (src0.z <= src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w <= src1.w) ? 1 : 0
+  dst.w = (src0.w <= src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: SNE - Set On Not Equal
  
  .. math::
  
-  dst.x = (src0.x != src1.x) ? 1 : 0
+  dst.x = (src0.x != src1.x) ? 1.0F : 0.0F
  
-  dst.y = (src0.y != src1.y) ? 1 : 0
+  dst.y = (src0.y != src1.y) ? 1.0F : 0.0F
  
-  dst.z = (src0.z != src1.z) ? 1 : 0
+  dst.z = (src0.z != src1.z) ? 1.0F : 0.0F
  
-  dst.w = (src0.w != src1.w) ? 1 : 0
+  dst.w = (src0.w != src1.w) ? 1.0F : 0.0F
  
  
  .. opcode:: STR - Set On True
@@ -588,31 +576,34 @@ This instruction replicates its result.
  
  .. math::
  
-  dst = 1
+  dst = 1.0F
  
  
  .. opcode:: TEX - Texture Lookup
  
-.. math::
-
-  coord = src0
-
-  bias = 0.0
-
-  dst = texture_sample(unit, coord, bias)
-
    for array textures src0.y contains the slice for 1D,
    and src0.z contain the slice for 2D.
+
    for shadow textures with no arrays, src0.z contains
    the reference value.
+
    for shadow textures with arrays, src0.z contains
    the reference value for 1D arrays, and src0.w contains
    the reference value for 2D arrays.
+
    There is no way to pass a bias in the .w value for
    shadow arrays, and GLSL doesn't allow this.
    GLSL does allow cube shadows maps to take a bias value,
    and we have to determine how this will look in TGSI.
  
+.. math::
+
+  coord = src0
+
+  bias = 0.0
+
+  dst = texture\_sample(unit, coord, bias)
+
  .. opcode:: TXD - Texture Lookup with Derivatives
  
  .. math::
@@ -625,7 +616,7 @@ This instruction replicates its result.
  
    bias = 0.0
  
-  dst = texture_sample_deriv(unit, coord, bias, ddx, ddy)
+  dst = texture\_sample\_deriv(unit, coord, bias, ddx, ddy)
  
  
  .. opcode:: TXP - Projective Texture Lookup
@@ -642,7 +633,7 @@ This instruction replicates its result.
  
    bias = 0.0
  
-  dst = texture_sample(unit, coord, bias)
+  dst = texture\_sample(unit, coord, bias)
  
  
  .. opcode:: UP2H - Unpack Two 16-Bit Floats
@@ -784,7 +775,7 @@ This instruction replicates its result.
  
    bias = src.z
  
-  dst = texture_sample(unit, coord, bias)
+  dst = texture\_sample(unit, coord, bias)
  
  
  .. opcode:: NRM - 3-component Vector Normalise
@@ -836,7 +827,7 @@ This instruction replicates its result.
  
    lod = src0.w
  
-  dst = texture_sample(unit, coord, lod)
+  dst = texture\_sample(unit, coord, lod)
  
  
  .. opcode:: PUSHA - Push Address Register On Stack
@@ -958,34 +949,85 @@ XXX doesn't look like most of the opcodes really belong here.
    dst.w = |src0.w - src1.w| + src2.w
  
  
-.. opcode:: TXF - Texel Fetch (as per NV_gpu_shader4), extract a single texel
-                  from a specified texture image. The source sampler may
-                 not be a CUBE or SHADOW.
-                  src 0 is a four-component signed integer vector used to
-                 identify the single texel accessed. 3 components + level.
-                 src 1 is a 3 component constant signed integer vector,
-                 with each component only have a range of
-                 -8..+8 (hw only seems to deal with this range, interface
-                 allows for up to unsigned int).
-                 TXF(uint_vec coord, int_vec offset).
+.. opcode:: TXF - Texel Fetch
+
+  As per NV_gpu_shader4, extract a single texel from a specified texture
+  image. The source sampler may not be a CUBE or SHADOW.  src 0 is a
+  four-component signed integer vector used to identify the single texel
+  accessed. 3 components + level.  src 1 is a 3 component constant signed
+  integer vector, with each component only have a range of -8..+8 (hw only
+  seems to deal with this range, interface allows for up to unsigned int).
+  TXF(uint_vec coord, int_vec offset).
+
  
+.. opcode:: TXQ - Texture Size Query
  
-.. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4)
-                  retrieve the dimensions of the texture
-                  depending on the target. For 1D (width), 2D/RECT/CUBE
-                 (width, height), 3D (width, height, depth),
-                 1D array (width, layers), 2D array (width, height, layers)
+  As per NV_gpu_program4, retrieve the dimensions of the texture depending on
+  the target. For 1D (width), 2D/RECT/CUBE (width, height), 3D (width, height,
+  depth), 1D array (width, layers), 2D array (width, height, layers).
+  Also return the number of accessible levels (last_level - first_level + 1)
+  in W.
  
  .. math::
  
    lod = src0.x
  
-  dst.x = texture_width(unit, lod)
+  dst.x = texture\_width(unit, lod)
+
+  dst.y = texture\_height(unit, lod)
+
+  dst.z = texture\_depth(unit, lod)
+
+  dst.w = texture\_levels(unit)
+
+.. opcode:: TG4 - Texture Gather
+
+  As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
+  filtering operation and packs them into a single register.  Only works with
+  2D, 2D array, cubemaps, and cubemaps arrays.  For 2D textures, only the
+  addressing modes of the sampler and the top level of any mip pyramid are
+  used. Set W to zero.  It behaves like the TEX instruction, but a filtered
+  sample is not generated. The four samples that contribute to filtering are
+  placed into xyzw in clockwise order, starting with the (u,v) texture
+  coordinate delta at the following locations (-, +), (+, +), (+, -), (-, -),
+  where the magnitude of the deltas are half a texel.
+
+  PIPE_CAP_TEXTURE_SM5 enhances this instruction to support shadow per-sample
+  depth compares, single component selection, and a non-constant offset. It
+  doesn't allow support for the GL independent offset to get i0,j0. This would
+  require another CAP is hw can do it natively. For now we lower that before
+  TGSI.
+
+.. math::
+
+   coord = src0
+
+   component = src1
+
+   dst = texture\_gather4 (unit, coord, component)
+
+(with SM5 - cube array shadow)
+
+.. math::
+
+   coord = src0
  
-  dst.y = texture_height(unit, lod)
+   compare = src1
  
-  dst.z = texture_depth(unit, lod)
+   dst = texture\_gather (uint, coord, compare)
  
+.. opcode:: LODQ - level of detail query
+
+   Compute the LOD information that the texture pipe would use to access the
+   texture. The Y component contains the computed LOD lambda_prime. The X
+   component contains the LOD that will be accessed, based on min/max lod's
+   and mipmap filters.
+
+.. math::
+
+   coord = src0
+
+   dst.xy = lodq(uint, coord);
  
  Integer ISA
  ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1103,6 +1145,36 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
    dst.w = src0.w \times src1.w
  
  
+.. opcode:: IMUL_HI - Signed Integer Multiply High Bits
+
+   The high 32bits of the multiplication of 2 signed integers are returned.
+
+.. math::
+
+  dst.x = (src0.x \times src1.x) >> 32
+
+  dst.y = (src0.y \times src1.y) >> 32
+
+  dst.z = (src0.z \times src1.z) >> 32
+
+  dst.w = (src0.w \times src1.w) >> 32
+
+
+.. opcode:: UMUL_HI - Unsigned Integer Multiply High Bits
+
+   The high 32bits of the multiplication of 2 unsigned integers are returned.
+
+.. math::
+
+  dst.x = (src0.x \times src1.x) >> 32
+
+  dst.y = (src0.y \times src1.y) >> 32
+
+  dst.z = (src0.z \times src1.z) >> 32
+
+  dst.w = (src0.w \times src1.w) >> 32
+
+
  .. opcode:: IDIV - Signed Integer Division
  
     TBD: behavior for division by zero.
@@ -1152,26 +1224,26 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
  
  .. math::
  
-  dst.x = ~src.x
+  dst.x = \sim src.x
  
-  dst.y = ~src.y
+  dst.y = \sim src.y
  
-  dst.z = ~src.z
+  dst.z = \sim src.z
  
-  dst.w = ~src.w
+  dst.w = \sim src.w
  
  
  .. opcode:: AND - Bitwise And
  
  .. math::
  
-  dst.x = src0.x & src1.x
+  dst.x = src0.x \& src1.x
  
-  dst.y = src0.y & src1.y
+  dst.y = src0.y \& src1.y
  
-  dst.z = src0.z & src1.z
+  dst.z = src0.z \& src1.z
  
-  dst.w = src0.w & src1.w
+  dst.w = src0.w \& src1.w
  
  
  .. opcode:: OR - Bitwise Or
@@ -1254,41 +1326,47 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
  
  .. opcode:: SHL - Shift Left
  
+   The shift count is masked with 0x1f before the shift is applied.
+
  .. math::
  
-  dst.x = src0.x << src1.x
+  dst.x = src0.x << (0x1f \& src1.x)
  
-  dst.y = src0.y << src1.x
+  dst.y = src0.y << (0x1f \& src1.y)
  
-  dst.z = src0.z << src1.x
+  dst.z = src0.z << (0x1f \& src1.z)
  
-  dst.w = src0.w << src1.x
+  dst.w = src0.w << (0x1f \& src1.w)
  
  
  .. opcode:: ISHR - Arithmetic Shift Right (of Signed Integer)
  
+   The shift count is masked with 0x1f before the shift is applied.
+
  .. math::
  
-  dst.x = src0.x >> src1.x
+  dst.x = src0.x >> (0x1f \& src1.x)
  
-  dst.y = src0.y >> src1.x
+  dst.y = src0.y >> (0x1f \& src1.y)
  
-  dst.z = src0.z >> src1.x
+  dst.z = src0.z >> (0x1f \& src1.z)
  
-  dst.w = src0.w >> src1.x
+  dst.w = src0.w >> (0x1f \& src1.w)
  
  
  .. opcode:: USHR - Logical Shift Right
  
+   The shift count is masked with 0x1f before the shift is applied.
+
  .. math::
  
-  dst.x = src0.x >> (unsigned) src1.x
+  dst.x = src0.x >> (unsigned) (0x1f \& src1.x)
  
-  dst.y = src0.y >> (unsigned) src1.x
+  dst.y = src0.y >> (unsigned) (0x1f \& src1.y)
  
-  dst.z = src0.z >> (unsigned) src1.x
+  dst.z = src0.z >> (unsigned) (0x1f \& src1.z)
  
-  dst.w = src0.w >> (unsigned) src1.x
+  dst.w = src0.w >> (unsigned) (0x1f \& src1.w)
  
  
  .. opcode:: UCMP - Integer Conditional Move
@@ -1319,82 +1397,142 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
  
  
  
+.. opcode:: FSLT - Float Set On Less Than (ordered)
+
+   Same comparison as SLT but returns integer instead of 1.0/0.0 float
+
+.. math::
+
+  dst.x = (src0.x < src1.x) ? \sim 0 : 0
+
+  dst.y = (src0.y < src1.y) ? \sim 0 : 0
+
+  dst.z = (src0.z < src1.z) ? \sim 0 : 0
+
+  dst.w = (src0.w < src1.w) ? \sim 0 : 0
+
+
  .. opcode:: ISLT - Signed Integer Set On Less Than
  
  .. math::
  
-  dst.x = (src0.x < src1.x) ? ~0 : 0
+  dst.x = (src0.x < src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y < src1.y) ? ~0 : 0
+  dst.y = (src0.y < src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z < src1.z) ? ~0 : 0
+  dst.z = (src0.z < src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w < src1.w) ? ~0 : 0
+  dst.w = (src0.w < src1.w) ? \sim 0 : 0
  
  
  .. opcode:: USLT - Unsigned Integer Set On Less Than
  
  .. math::
  
-  dst.x = (src0.x < src1.x) ? ~0 : 0
+  dst.x = (src0.x < src1.x) ? \sim 0 : 0
+
+  dst.y = (src0.y < src1.y) ? \sim 0 : 0
+
+  dst.z = (src0.z < src1.z) ? \sim 0 : 0
+
+  dst.w = (src0.w < src1.w) ? \sim 0 : 0
+
+
+.. opcode:: FSGE - Float Set On Greater Equal Than (ordered)
+
+   Same comparison as SGE but returns integer instead of 1.0/0.0 float
+
+.. math::
+
+  dst.x = (src0.x >= src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y < src1.y) ? ~0 : 0
+  dst.y = (src0.y >= src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z < src1.z) ? ~0 : 0
+  dst.z = (src0.z >= src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w < src1.w) ? ~0 : 0
+  dst.w = (src0.w >= src1.w) ? \sim 0 : 0
  
  
  .. opcode:: ISGE - Signed Integer Set On Greater Equal Than
  
  .. math::
  
-  dst.x = (src0.x >= src1.x) ? ~0 : 0
+  dst.x = (src0.x >= src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y >= src1.y) ? ~0 : 0
+  dst.y = (src0.y >= src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z >= src1.z) ? ~0 : 0
+  dst.z = (src0.z >= src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w >= src1.w) ? ~0 : 0
+  dst.w = (src0.w >= src1.w) ? \sim 0 : 0
  
  
  .. opcode:: USGE - Unsigned Integer Set On Greater Equal Than
  
  .. math::
  
-  dst.x = (src0.x >= src1.x) ? ~0 : 0
+  dst.x = (src0.x >= src1.x) ? \sim 0 : 0
+
+  dst.y = (src0.y >= src1.y) ? \sim 0 : 0
+
+  dst.z = (src0.z >= src1.z) ? \sim 0 : 0
+
+  dst.w = (src0.w >= src1.w) ? \sim 0 : 0
+
+
+.. opcode:: FSEQ - Float Set On Equal (ordered)
+
+   Same comparison as SEQ but returns integer instead of 1.0/0.0 float
+
+.. math::
+
+  dst.x = (src0.x == src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y >= src1.y) ? ~0 : 0
+  dst.y = (src0.y == src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z >= src1.z) ? ~0 : 0
+  dst.z = (src0.z == src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w >= src1.w) ? ~0 : 0
+  dst.w = (src0.w == src1.w) ? \sim 0 : 0
  
  
  .. opcode:: USEQ - Integer Set On Equal
  
  .. math::
  
-  dst.x = (src0.x == src1.x) ? ~0 : 0
+  dst.x = (src0.x == src1.x) ? \sim 0 : 0
+
+  dst.y = (src0.y == src1.y) ? \sim 0 : 0
+
+  dst.z = (src0.z == src1.z) ? \sim 0 : 0
+
+  dst.w = (src0.w == src1.w) ? \sim 0 : 0
+
+
+.. opcode:: FSNE - Float Set On Not Equal (unordered)
+
+   Same comparison as SNE but returns integer instead of 1.0/0.0 float
+
+.. math::
+
+  dst.x = (src0.x != src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y == src1.y) ? ~0 : 0
+  dst.y = (src0.y != src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z == src1.z) ? ~0 : 0
+  dst.z = (src0.z != src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w == src1.w) ? ~0 : 0
+  dst.w = (src0.w != src1.w) ? \sim 0 : 0
  
  
  .. opcode:: USNE - Integer Set On Not Equal
  
  .. math::
  
-  dst.x = (src0.x != src1.x) ? ~0 : 0
+  dst.x = (src0.x != src1.x) ? \sim 0 : 0
  
-  dst.y = (src0.y != src1.y) ? ~0 : 0
+  dst.y = (src0.y != src1.y) ? \sim 0 : 0
  
-  dst.z = (src0.z != src1.z) ? ~0 : 0
+  dst.z = (src0.z != src1.z) ? \sim 0 : 0
  
-  dst.w = (src0.w != src1.w) ? ~0 : 0
+  dst.w = (src0.w != src1.w) ? \sim 0 : 0
  
  
  .. opcode:: INEG - Integer Negate
@@ -1424,6 +1562,81 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
  
    dst.w = |src.w|
  
+Bitwise ISA
+^^^^^^^^^^^
+These opcodes are used for bit-level manipulation of integers.
+
+.. opcode:: IBFE - Signed Bitfield Extract
+
+  See SM5 instruction of the same name. Extracts a set of bits from the input,
+  and sign-extends them if the high bit of the extracted window is set.
+
+  Pseudocode::
+
+    def ibfe(value, offset, bits):
+      offset = offset & 0x1f
+      bits = bits & 0x1f
+      if bits == 0: return 0
+      # Note: >> sign-extends
+      if width + offset < 32:
+        return (value << (32 - offset - bits)) >> (32 - bits)
+      else:
+        return value >> offset
+
+.. opcode:: UBFE - Unsigned Bitfield Extract
+
+  See SM5 instruction of the same name. Extracts a set of bits from the input,
+  without any sign-extension.
+
+  Pseudocode::
+
+    def ubfe(value, offset, bits):
+      offset = offset & 0x1f
+      bits = bits & 0x1f
+      if bits == 0: return 0
+      # Note: >> does not sign-extend
+      if width + offset < 32:
+        return (value << (32 - offset - bits)) >> (32 - bits)
+      else:
+        return value >> offset
+
+.. opcode:: BFI - Bitfield Insert
+
+  See SM5 instruction of the same name. Replaces a bit region of 'base' with
+  the low bits of 'insert'.
+
+  Pseudocode::
+
+    def bfi(base, insert, offset, bits):
+      offset = offset & 0x1f
+      bits = bits & 0x1f
+      mask = ((1 << bits) - 1) << offset
+      return ((insert << offset) & mask) | (base & ~mask)
+
+.. opcode:: BREV - Bitfield Reverse
+
+  See SM5 instruction BFREV. Reverses the bits of the argument.
+
+.. opcode:: POPC - Population Count
+
+  See SM5 instruction COUNTBITS. Counts the number of set bits in the argument.
+
+.. opcode:: LSB - Index of lowest set bit
+
+  See SM5 instruction FIRSTBIT_LO. Computes the 0-based index of the first set
+  bit of the argument. Returns -1 if none are set.
+
+.. opcode:: IMSB - Index of highest non-sign bit
+
+  See SM5 instruction FIRSTBIT_SHI. Computes the 0-based index of the highest
+  non-sign bit of the argument (i.e. highest 0 bit for negative numbers,
+  highest 1 bit for positive numbers). Returns -1 if all bits are the same
+  (i.e. for inputs 0 and -1).
+
+.. opcode:: UMSB - Index of highest set bit
+
+  See SM5 instruction FIRSTBIT_HI. Computes the 0-based index of the highest
+  set bit of the argument. Returns -1 if none are set.
  
  Geometry ISA
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1433,14 +1646,14 @@ in any other type of shader.
  
  .. opcode:: EMIT - Emit
  
-  Generate a new vertex for the current primitive using the values in the
-  output registers.
+  Generate a new vertex for the current primitive into the specified vertex
+  stream using the values in the output registers.
  
  
  .. opcode:: ENDPRIM - End Primitive
  
-  Complete the current primitive (consisting of the emitted vertices),
-  and start a new one.
+  Complete the current primitive in the specified vertex stream (consisting of
+  the emitted vertices), and start a new one.
  
  
  GLSL ISA
@@ -1555,18 +1768,19 @@ Some require glsl version 1.30 (UIF/BREAKC/SWITCH/CASE/DEFAULT/ENDSWITCH).
     just as last statement, and fallthrough is allowed into/from it.
     CASE src arguments are evaluated at bit level against the SWITCH src argument.
  
-   Example:
-   SWITCH src[0].x
-   CASE src[0].x
-   (some instructions here)
-   (optional BRK here)
-   DEFAULT
-   (some instructions here)
-   (optional BRK here)
-   CASE src[0].x
-   (some instructions here)
-   (optional BRK here)
-   ENDSWITCH
+   Example::
+
+     SWITCH src[0].x
+     CASE src[0].x
+     (some instructions here)
+     (optional BRK here)
+     DEFAULT
+     (some instructions here)
+     (optional BRK here)
+     CASE src[0].x
+     (some instructions here)
+     (optional BRK here)
+     ENDSWITCH
  
  
  .. opcode:: CASE - Switch case
@@ -1730,146 +1944,174 @@ Resource Sampling Opcodes
  
  Those opcodes follow very closely semantics of the respective Direct3D
  instructions. If in doubt double check Direct3D documentation.
+Note that the swizzle on SVIEW (src1) determines texel swizzling
+after lookup.
+
+.. opcode:: SAMPLE
+
+  Using provided address, sample data from the specified texture using the
+  filtering mode identified by the gven sampler. The source data may come from
+  any resource type other than buffers.
+
+  Syntax: ``SAMPLE dst, address, sampler_view, sampler``
+
+  Example: ``SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]``
+
+.. opcode:: SAMPLE_I
+
+  Simplified alternative to the SAMPLE instruction.  Using the provided
+  integer address, SAMPLE_I fetches data from the specified sampler view
+  without any filtering.  The source data may come from any resource type
+  other than CUBE.
+
+  Syntax: ``SAMPLE_I dst, address, sampler_view``
+
+  Example: ``SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]``
+
+  The 'address' is specified as unsigned integers. If the 'address' is out of
+  range [0...(# texels - 1)] the result of the fetch is always 0 in all
+  components.  As such the instruction doesn't honor address wrap modes, in
+  cases where that behavior is desirable 'SAMPLE' instruction should be used.
+  address.w always provides an unsigned integer mipmap level. If the value is
+  out of the range then the instruction always returns 0 in all components.
+  address.yz are ignored for buffers and 1d textures.  address.z is ignored
+  for 1d texture arrays and 2d textures.
+
+  For 1D texture arrays address.y provides the array index (also as unsigned
+  integer). If the value is out of the range of available array indices
+  [0... (array size - 1)] then the opcode always returns 0 in all components.
+  For 2D texture arrays address.z provides the array index, otherwise it
+  exhibits the same behavior as in the case for 1D texture arrays.  The exact
+  semantics of the source address are presented in the table below:
+
+  +---------------------------+----+-----+-----+---------+
+  | resource type             | X  |  Y  |  Z  |    W    |
+  +===========================+====+=====+=====+=========+
+  | ``PIPE_BUFFER``           | x  |     |     | ignored |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_1D``       | x  |     |     |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_2D``       | x  |  y  |     |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_3D``       | x  |  y  |  z  |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_RECT``     | x  |  y  |     |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_CUBE``     | not allowed as source    |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_1D_ARRAY`` | x  | idx |     |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+  | ``PIPE_TEXTURE_2D_ARRAY`` | x  |  y  | idx |   mpl   |
+  +---------------------------+----+-----+-----+---------+
+
+  Where 'mpl' is a mipmap level and 'idx' is the array index.
+
+.. opcode:: SAMPLE_I_MS
  
-.. opcode:: SAMPLE - Using provided address, sample data from the
-               specified texture using the filtering mode identified
-               by the gven sampler. The source data may come from
-               any resource type other than buffers.
-               SAMPLE dst, address, sampler_view, sampler
-               e.g.
-               SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]
-
-.. opcode:: SAMPLE_I - Simplified alternative to the SAMPLE instruction.
-               Using the provided integer address, SAMPLE_I fetches data
-               from the specified sampler view without any filtering.
-               The source data may come from any resource type other
-               than CUBE.
-               SAMPLE_I dst, address, sampler_view
-               e.g.
-               SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]
-               The 'address' is specified as unsigned integers. If the
-               'address' is out of range [0...(# texels - 1)] the
-               result of the fetch is always 0 in all components.
-               As such the instruction doesn't honor address wrap
-               modes, in cases where that behavior is desirable
-               'SAMPLE' instruction should be used.
-               address.w always provides an unsigned integer mipmap
-               level. If the value is out of the range then the
-               instruction always returns 0 in all components.
-               address.yz are ignored for buffers and 1d textures.
-               address.z is ignored for 1d texture arrays and 2d
-               textures.
-               For 1D texture arrays address.y provides the array
-               index (also as unsigned integer). If the value is
-               out of the range of available array indices
-               [0... (array size - 1)] then the opcode always returns
-               0 in all components.
-               For 2D texture arrays address.z provides the array
-               index, otherwise it exhibits the same behavior as in
-               the case for 1D texture arrays.
-               The exact semantics of the source address are presented
-               in the table below:
-               resource type         X     Y     Z       W
-               -------------         ------------------------
-               PIPE_BUFFER           x                ignored
-               PIPE_TEXTURE_1D       x                  mpl
-               PIPE_TEXTURE_2D       x     y            mpl
-               PIPE_TEXTURE_3D       x     y     z      mpl
-               PIPE_TEXTURE_RECT     x     y            mpl
-               PIPE_TEXTURE_CUBE     not allowed as source
-               PIPE_TEXTURE_1D_ARRAY x    idx           mpl
-               PIPE_TEXTURE_2D_ARRAY x     y    idx     mpl
-
-               Where 'mpl' is a mipmap level and 'idx' is the
-               array index.
-
-.. opcode:: SAMPLE_I_MS - Just like SAMPLE_I but allows fetch data from
-               multi-sampled surfaces.
-               SAMPLE_I_MS dst, address, sampler_view, sample
-
-.. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the
-               exception that an additional bias is applied to the
-               level of detail computed as part of the instruction
-               execution.
-               SAMPLE_B dst, address, sampler_view, sampler, lod_bias
-               e.g.
-               SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x
-
-.. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it
-               performs a comparison filter. The operands to SAMPLE_C
-               are identical to SAMPLE, except that there is an additional
-               float32 operand, reference value, which must be a register
-               with single-component, or a scalar literal.
-               SAMPLE_C makes the hardware use the current samplers
-               compare_func (in pipe_sampler_state) to compare
-               reference value against the red component value for the
-               surce resource at each texel that the currently configured
-               texture filter covers based on the provided coordinates.
-               SAMPLE_C dst, address, sampler_view.r, sampler, ref_value
-               e.g.
-               SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
-
-.. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives
-               are ignored. The LZ stands for level-zero.
-               SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value
-               e.g.
-               SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
-
-
-.. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except
-               that the derivatives for the source address in the x
-               direction and the y direction are provided by extra
-               parameters.
-               SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y
-               e.g.
-               SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]
-
-.. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except
-               that the LOD is provided directly as a scalar value,
-               representing no anisotropy.
-               SAMPLE_L dst, address, sampler_view, sampler, explicit_lod
-               e.g.
-               SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x
-
-.. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear
-               filtering operation and packs them into a single register.
-               Only works with 2D, 2D array, cubemaps, and cubemaps arrays.
-               For 2D textures, only the addressing modes of the sampler and
-               the top level of any mip pyramid are used. Set W to zero.
-               It behaves like the SAMPLE instruction, but a filtered
-               sample is not generated. The four samples that contribute
-               to filtering are placed into xyzw in counter-clockwise order,
-               starting with the (u,v) texture coordinate delta at the
-               following locations (-, +), (+, +), (+, -), (-, -), where
-               the magnitude of the deltas are half a texel.
-
-
-.. opcode:: SVIEWINFO - query the dimensions of a given sampler view.
-               dst receives width, height, depth or array size and
-               number of mipmap levels as int4. The dst can have a writemask
-               which will specify what info is the caller interested
-               in.
-               SVIEWINFO dst, src_mip_level, sampler_view
-               e.g.
-               SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]
-               src_mip_level is an unsigned integer scalar. If it's
-               out of range then returns 0 for width, height and
-               depth/array size but the total number of mipmap is
-               still returned correctly for the given sampler view.
-               The returned width, height and depth values are for
-               the mipmap level selected by the src_mip_level and
-               are in the number of texels.
-               For 1d texture array width is in dst.x, array size
-               is in dst.y and dst.zw are always 0.
-
-.. opcode:: SAMPLE_POS - query the position of a given sample.
-               dst receives float4 (x, y, 0, 0) indicated where the
-               sample is located. If the resource is not a multi-sample
-               resource and not a render target, the result is 0.
-
-.. opcode:: SAMPLE_INFO - dst receives number of samples in x.
-               If the resource is not a multi-sample resource and
-               not a render target, the result is 0.
+  Just like SAMPLE_I but allows fetch data from multi-sampled surfaces.
+
+  Syntax: ``SAMPLE_I_MS dst, address, sampler_view, sample``
+
+.. opcode:: SAMPLE_B
+
+  Just like the SAMPLE instruction with the exception that an additional bias
+  is applied to the level of detail computed as part of the instruction
+  execution.
+
+  Syntax: ``SAMPLE_B dst, address, sampler_view, sampler, lod_bias``
+
+  Example: ``SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
+
+.. opcode:: SAMPLE_C
+
+  Similar to the SAMPLE instruction but it performs a comparison filter. The
+  operands to SAMPLE_C are identical to SAMPLE, except that there is an
+  additional float32 operand, reference value, which must be a register with
+  single-component, or a scalar literal.  SAMPLE_C makes the hardware use the
+  current samplers compare_func (in pipe_sampler_state) to compare reference
+  value against the red component value for the surce resource at each texel
+  that the currently configured texture filter covers based on the provided
+  coordinates.
+
+  Syntax: ``SAMPLE_C dst, address, sampler_view.r, sampler, ref_value``
+
+  Example: ``SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
+
+.. opcode:: SAMPLE_C_LZ
+
+  Same as SAMPLE_C, but LOD is 0 and derivatives are ignored. The LZ stands
+  for level-zero.
+
+  Syntax: ``SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value``
+
+  Example: ``SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
+
+
+.. opcode:: SAMPLE_D
+
+  SAMPLE_D is identical to the SAMPLE opcode except that the derivatives for
+  the source address in the x direction and the y direction are provided by
+  extra parameters.
+
+  Syntax: ``SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y``
+
+  Example: ``SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]``
+
+.. opcode:: SAMPLE_L
+
+  SAMPLE_L is identical to the SAMPLE opcode except that the LOD is provided
+  directly as a scalar value, representing no anisotropy.
+
+  Syntax: ``SAMPLE_L dst, address, sampler_view, sampler, explicit_lod``
+
+  Example: ``SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
+
+.. opcode:: GATHER4
+
+  Gathers the four texels to be used in a bi-linear filtering operation and
+  packs them into a single register.  Only works with 2D, 2D array, cubemaps,
+  and cubemaps arrays.  For 2D textures, only the addressing modes of the
+  sampler and the top level of any mip pyramid are used. Set W to zero.  It
+  behaves like the SAMPLE instruction, but a filtered sample is not
+  generated. The four samples that contribute to filtering are placed into
+  xyzw in counter-clockwise order, starting with the (u,v) texture coordinate
+  delta at the following locations (-, +), (+, +), (+, -), (-, -), where the
+  magnitude of the deltas are half a texel.
+
+
+.. opcode:: SVIEWINFO
+
+  Query the dimensions of a given sampler view.  dst receives width, height,
+  depth or array size and number of mipmap levels as int4. The dst can have a
+  writemask which will specify what info is the caller interested in.
+
+  Syntax: ``SVIEWINFO dst, src_mip_level, sampler_view``
+
+  Example: ``SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]``
+
+  src_mip_level is an unsigned integer scalar. If it's out of range then
+  returns 0 for width, height and depth/array size but the total number of
+  mipmap is still returned correctly for the given sampler view.  The returned
+  width, height and depth values are for the mipmap level selected by the
+  src_mip_level and are in the number of texels.  For 1d texture array width
+  is in dst.x, array size is in dst.y and dst.z is 0. The number of mipmaps is
+  still in dst.w.  In contrast to d3d10 resinfo, there's no way in the tgsi
+  instruction encoding to specify the return type (float/rcpfloat/uint), hence
+  always using uint. Also, unlike the SAMPLE instructions, the swizzle on src1
+  resinfo allowing swizzling dst values is ignored (due to the interaction
+  with rcpfloat modifier which requires some swizzle handling in the state
+  tracker anyway).
+
+.. opcode:: SAMPLE_POS
+
+  Query the position of a given sample.  dst receives float4 (x, y, 0, 0)
+  indicated where the sample is located. If the resource is not a multi-sample
+  resource and not a render target, the result is 0.
+
+.. opcode:: SAMPLE_INFO
+
+  dst receives number of samples in x.  If the resource is not a multi-sample
+  resource and not a render target, the result is 0.
  
  
  .. _resourceopcodes:
@@ -2242,24 +2484,24 @@ and will prevent packing of scalar/vec2 arrays and effective alias analysis.
  Declaration Semantic
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
-  Vertex and fragment shader input and output registers may be labeled
-  with semantic information consisting of a name and index.
+Vertex and fragment shader input and output registers may be labeled
+with semantic information consisting of a name and index.
  
-  Follows Declaration token if Semantic bit is set.
+Follows Declaration token if Semantic bit is set.
  
-  Since its purpose is to link a shader with other stages of the pipeline,
-  it is valid to follow only those Declaration tokens that declare a register
-  either in INPUT or OUTPUT file.
+Since its purpose is to link a shader with other stages of the pipeline,
+it is valid to follow only those Declaration tokens that declare a register
+either in INPUT or OUTPUT file.
  
-  SemanticName field contains the semantic name of the register being declared.
-  There is no default value.
+SemanticName field contains the semantic name of the register being declared.
+There is no default value.
  
-  SemanticIndex is an optional subscript that can be used to distinguish
-  different register declarations with the same semantic name. The default value
-  is 0.
+SemanticIndex is an optional subscript that can be used to distinguish
+different register declarations with the same semantic name. The default value
+is 0.
  
-  The meanings of the individual semantic names are explained in the following
-  sections.
+The meanings of the individual semantic names are explained in the following
+sections.
  
  TGSI_SEMANTIC_POSITION
  """"""""""""""""""""""
@@ -2314,13 +2556,10 @@ TGSI_SEMANTIC_FOG
  
  Vertex shader inputs and outputs and fragment shader inputs may be
  labeled with TGSI_SEMANTIC_FOG to indicate that the register contains
-a fog coordinate in the form (F, 0, 0, 1).  Typically, the fragment
-shader will use the fog coordinate to compute a fog blend factor which
-is used to blend the normal fragment color with a constant fog color.
-
-Only the first component matters when writing from the vertex shader;
-the driver will ensure that the coordinate is in this format when used
-as a fragment shader input.
+a fog coordinate.  Typically, the fragment shader will use the fog coordinate
+to compute a fog blend factor which is used to blend the normal fragment color
+with a constant fog color.  But fog coord really is just an ordinary vec4
+register like regular semantics.
  
  
  TGSI_SEMANTIC_PSIZE
@@ -2461,6 +2700,32 @@ distances and by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT
  which specifies the maximum number of registers which can be
  annotated with those semantics.
  
+TGSI_SEMANTIC_SAMPLEID
+""""""""""""""""""""""
+
+For fragment shaders, this semantic label indicates that a system value
+contains the current sample id (i.e. gl_SampleID). Only the X value is used.
+
+TGSI_SEMANTIC_SAMPLEPOS
+"""""""""""""""""""""""
+
+For fragment shaders, this semantic label indicates that a system value
+contains the current sample's position (i.e. gl_SamplePosition). Only the X
+and Y values are used.
+
+TGSI_SEMANTIC_SAMPLEMASK
+""""""""""""""""""""""""
+
+For fragment shaders, this semantic label indicates that an output contains
+the sample mask used to disable further sample processing
+(i.e. gl_SampleMask). Only the X value is used, up to 32x MS.
+
+TGSI_SEMANTIC_INVOCATIONID
+""""""""""""""""""""""""""
+
+For geometry shaders, this semantic label indicates that a system value
+contains the current invocation id (i.e. gl_InvocationID). Only the X value is
+used.
  
  Declaration Interpolate
  ^^^^^^^^^^^^^^^^^^^^^^^
@@ -2479,54 +2744,53 @@ should be interpolated according to cylindrical wrapping rules.
  Declaration Sampler View
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
-   Follows Declaration token if file is TGSI_FILE_SAMPLER_VIEW.
+Follows Declaration token if file is TGSI_FILE_SAMPLER_VIEW.
  
-   DCL SVIEW[#], resource, type(s)
+DCL SVIEW[#], resource, type(s)
  
-   Declares a shader input sampler view and assigns it to a SVIEW[#]
-   register.
+Declares a shader input sampler view and assigns it to a SVIEW[#]
+register.
  
-   resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray.
+resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray.
  
-   type must be 1 or 4 entries (if specifying on a per-component
-   level) out of UNORM, SNORM, SINT, UINT and FLOAT.
+type must be 1 or 4 entries (if specifying on a per-component
+level) out of UNORM, SNORM, SINT, UINT and FLOAT.
  
  
  Declaration Resource
  ^^^^^^^^^^^^^^^^^^^^
  
-   Follows Declaration token if file is TGSI_FILE_RESOURCE.
+Follows Declaration token if file is TGSI_FILE_RESOURCE.
  
-   DCL RES[#], resource [, WR] [, RAW]
+DCL RES[#], resource [, WR] [, RAW]
  
-   Declares a shader input resource and assigns it to a RES[#]
-   register.
+Declares a shader input resource and assigns it to a RES[#]
+register.
  
-   resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
-   2DArray.
+resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
+2DArray.
  
-   If the RAW keyword is not specified, the texture data will be
-   subject to conversion, swizzling and scaling as required to yield
-   the specified data type from the physical data format of the bound
-   resource.
+If the RAW keyword is not specified, the texture data will be
+subject to conversion, swizzling and scaling as required to yield
+the specified data type from the physical data format of the bound
+resource.
  
-   If the RAW keyword is specified, no channel conversion will be
-   performed: the values read for each of the channels (X,Y,Z,W) will
-   correspond to consecutive words in the same order and format
-   they're found in memory.  No element-to-address conversion will be
-   performed either: the value of the provided X coordinate will be
-   interpreted in byte units instead of texel units.  The result of
-   accessing a misaligned address is undefined.
+If the RAW keyword is specified, no channel conversion will be
+performed: the values read for each of the channels (X,Y,Z,W) will
+correspond to consecutive words in the same order and format
+they're found in memory.  No element-to-address conversion will be
+performed either: the value of the provided X coordinate will be
+interpreted in byte units instead of texel units.  The result of
+accessing a misaligned address is undefined.
  
-   Usage of the STORE opcode is only allowed if the WR (writable) flag
-   is set.
+Usage of the STORE opcode is only allowed if the WR (writable) flag
+is set.
  
  
  Properties
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
-
-  Properties are general directives that apply to the whole TGSI program.
+Properties are general directives that apply to the whole TGSI program.
  
  FS_COORD_ORIGIN
  """""""""""""""
@@ -2580,6 +2844,23 @@ distance outputs.
  This is useful for APIs that don't have UCPs and where clip distances written
  by a shader cannot be disabled.
  
+GS_INVOCATIONS
+""""""""""""""
+
+Specifies the number of times a geometry shader should be executed for each
+input primitive. Each invocation will have a different
+TGSI_SEMANTIC_INVOCATIONID system value set. If not specified, assumed to
+be 1.
+
+VS_WINDOW_SPACE_POSITION
+""""""""""""""""""""""""""
+If this property is set on the vertex shader, the TGSI_SEMANTIC_POSITION output
+is assumed to contain window space coordinates.
+Division of X,Y,Z by W and the viewport transformation are disabled, and 1/W is
+directly taken from the 4-th component of the shader output.
+Naturally, clipping is not performed on window coordinates either.
+The effect of this property is undefined if a geometry or tessellation shader
+are in use.
  
  Texture Sampling and Texture Formats
  ------------------------------------