r300g: handle DISCARD_WHOLE_RESOURCE for buffers

[mesa.git] / src / gallium / docs / source / tgsi.rst
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst

index 5478d866678bbd2309cf997b878591797b982c8a..4debcc6ecc4921d00603882cc72c44a003af2b8d 100644 (file)
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -26,9 +26,11 @@ each of the components of *dst*. When this happens, the result is said to be
  Instruction Set
  ---------------
  
-From GL_NV_vertex_program
+Core ISA
  ^^^^^^^^^^^^^^^^^^^^^^^^^
  
+These opcodes are guaranteed to be available regardless of the driver being
+used.
  
  .. opcode:: ARL - Address Register Load
  
@@ -287,7 +289,7 @@ This instruction replicates its result.
    dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
  
  
-.. opcode:: FRAC - Fraction
+.. opcode:: FRC - Fraction
  
  .. math::
  
@@ -575,17 +577,45 @@ This instruction replicates its result.
  
  .. opcode:: TEX - Texture Lookup
  
-  TBD
+.. math::
+
+  coord = src0
+
+  bias = 0.0
+
+  dst = texture_sample(unit, coord, bias)
  
  
  .. opcode:: TXD - Texture Lookup with Derivatives
  
-  TBD
+.. math::
+
+  coord = src0
+
+  ddx = src1
+
+  ddy = src2
+
+  bias = 0.0
+
+  dst = texture_sample_deriv(unit, coord, bias, ddx, ddy)
  
  
  .. opcode:: TXP - Projective Texture Lookup
  
-  TBD
+.. math::
+
+  coord.x = src0.x / src.w
+
+  coord.y = src0.y / src.w
+
+  coord.z = src0.z / src.w
+
+  coord.w = src0.w
+
+  bias = 0.0
+
+  dst = texture_sample(unit, coord, bias)
  
  
  .. opcode:: UP2H - Unpack Two 16-Bit Floats
@@ -637,10 +667,6 @@ This instruction replicates its result.
     Considered for removal.
  
  
-From GL_NV_vertex_program2
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-
  .. opcode:: ARA - Address Register Add
  
    TBD
@@ -680,8 +706,6 @@ From GL_NV_vertex_program2
  
    pc = pop()
  
-  Potential restrictions:  
-  * Only occurs at end of function.
  
  .. opcode:: SSG - Set Sign
  
@@ -728,12 +752,24 @@ From GL_NV_vertex_program2
  
    dst.z = 0
  
-  dst.y = 1
+  dst.w = 1
  
  
  .. opcode:: TXB - Texture Lookup With Bias
  
-  TBD
+.. math::
+
+  coord.x = src.x
+
+  coord.y = src.y
+
+  coord.z = src.z
+
+  coord.w = 1.0
+
+  bias = src.z
+
+  dst = texture_sample(unit, coord, bias)
  
  
  .. opcode:: NRM - 3-component Vector Normalise
@@ -771,44 +807,29 @@ This instruction replicates its result.
    dst = src0.x \times src1.x + src0.y \times src1.y
  
  
-.. opcode:: TXL - Texture Lookup With LOD
-
-  TBD
-
-
-.. opcode:: BRK - Break
+.. opcode:: TXL - Texture Lookup With explicit LOD
  
-  TBD
+.. math::
  
+  coord.x = src0.x
  
-.. opcode:: IF - If
+  coord.y = src0.y
  
-  TBD
+  coord.z = src0.z
  
+  coord.w = 1.0
  
-.. opcode:: BGNFOR - Begin a For-Loop
+  lod = src0.w
  
-  dst.x = floor(src.x)
-  dst.y = floor(src.y)
-  dst.z = floor(src.z)
+  dst = texture_sample(unit, coord, lod)
  
-  if (dst.y <= 0)
-    pc = [matching ENDFOR] + 1
-  endif
-
-  Note: The destination must be a loop register.
-        The source must be a constant register.
-
-.. note::
  
-   Considered for cleanup.
-
-.. note::
+.. opcode:: BRK - Break
  
-   Considered for removal.
+  TBD
  
  
-.. opcode:: REP - Repeat
+.. opcode:: IF - If
  
    TBD
  
@@ -823,30 +844,6 @@ This instruction replicates its result.
    TBD
  
  
-.. opcode:: ENDFOR - End a For-Loop
-
-  dst.x = dst.x + dst.z
-  dst.y = dst.y - 1.0
-
-  if (dst.y > 0)
-    pc = [matching BGNFOR instruction] + 1
-  endif
-
-  Note: The destination must be a loop register.
-
-.. note::
-
-   Considered for cleanup.
-
-.. note::
-
-   Considered for removal.
-
-.. opcode:: ENDREP - End Repeat
-
-  TBD
-
-
  .. opcode:: PUSHA - Push Address Register On Stack
  
    push(src.x)
@@ -878,11 +875,14 @@ This instruction replicates its result.
     Considered for removal.
  
  
-From GL_NV_gpu_program4
+Compute ISA
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
+These opcodes are primarily provided for special-use computational shaders.
  Support for these opcodes indicated by a special pipe capability bit (TBD).
  
+XXX so let's discuss it, yeah?
+
  .. opcode:: CEIL - Ceiling
  
  .. math::
@@ -1040,10 +1040,17 @@ Support for these opcodes indicated by a special pipe capability bit (TBD).
  
    TBD
  
+.. note::
  
-From GL_NV_geometry_program4
+   Support for CONT is determined by a special capability bit,
+   ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information.
+
+
+Geometry ISA
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  
+These opcodes are only supported in geometry shaders; they have no meaning
+in any other type of shader.
  
  .. opcode:: EMIT - Emit
  
@@ -1055,9 +1062,11 @@ From GL_NV_geometry_program4
    TBD
  
  
-From GLSL
+GLSL ISA
  ^^^^^^^^^^
  
+These opcodes are part of :term:`GLSL`'s opcode set. Support for these
+opcodes is determined by a special capability bit, ``GLSL``.
  
  .. opcode:: BGNLOOP - Begin a Loop
  
@@ -1096,6 +1105,7 @@ This instruction replicates its result.
  ps_2_x
  ^^^^^^^^^^^^
  
+XXX wait what
  
  .. opcode:: CALLNZ - Subroutine Call If Not Zero
  
@@ -1113,10 +1123,15 @@ ps_2_x
  
  .. _doubleopcodes:
  
-Double Opcodes
+Double ISA
  ^^^^^^^^^^^^^^^
  
-.. opcode:: DADD - Add Double
+The double-precision opcodes reinterpret four-component vectors into
+two-component vectors with doubled precision in each component.
+
+Support for these opcodes is XXX undecided. :T
+
+.. opcode:: DADD - Add
  
  .. math::
  
@@ -1125,7 +1140,7 @@ Double Opcodes
    dst.zw = src0.zw + src1.zw
  
  
-.. opcode:: DDIV - Divide Double
+.. opcode:: DDIV - Divide
  
  .. math::
  
@@ -1133,7 +1148,7 @@ Double Opcodes
  
    dst.zw = src0.zw / src1.zw
  
-.. opcode:: DSEQ - Set Double on Equal
+.. opcode:: DSEQ - Set on Equal
  
  .. math::
  
@@ -1141,7 +1156,7 @@ Double Opcodes
  
    dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
  
-.. opcode:: DSLT - Set Double on Less than
+.. opcode:: DSLT - Set on Less than
  
  .. math::
  
@@ -1149,7 +1164,7 @@ Double Opcodes
  
    dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
  
-.. opcode:: DFRAC - Double Fraction
+.. opcode:: DFRAC - Fraction
  
  .. math::
  
@@ -1158,23 +1173,33 @@ Double Opcodes
    dst.zw = src.zw - \lfloor src.zw\rfloor
  
  
-.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
+.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components
+
+Like the ``frexp()`` routine in many math libraries, this opcode stores the
+exponent of its source to ``dst0``, and the significand to ``dst1``, such that
+:math:`dst1 \times 2^{dst0} = src` .
  
  .. math::
  
-  dst0.xy = frexp(src.xy, dst1.xy)
+  dst0.xy = exp(src.xy)
+
+  dst1.xy = frac(src.xy)
+
+  dst0.zw = exp(src.zw)
  
-  dst0.zw = frexp(src.zw, dst1.zw)
+  dst1.zw = frac(src.zw)
  
-.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
+.. opcode:: DLDEXP - Multiply Number by Integral Power of 2
+
+This opcode is the inverse of :opcode:`DFRACEXP`.
  
  .. math::
  
-  dst.xy = ldexp(src0.xy, src1.xy)
+  dst.xy = src0.xy \times 2^{src1.xy}
  
-  dst.zw = ldexp(src0.zw, src1.zw)
+  dst.zw = src0.zw \times 2^{src1.zw}
  
-.. opcode:: DMIN - Minimum Double
+.. opcode:: DMIN - Minimum
  
  .. math::
  
@@ -1182,7 +1207,7 @@ Double Opcodes
  
    dst.zw = min(src0.zw, src1.zw)
  
-.. opcode:: DMAX - Maximum Double
+.. opcode:: DMAX - Maximum
  
  .. math::
  
@@ -1190,7 +1215,7 @@ Double Opcodes
  
    dst.zw = max(src0.zw, src1.zw)
  
-.. opcode:: DMUL - Multiply Double
+.. opcode:: DMUL - Multiply
  
  .. math::
  
@@ -1199,7 +1224,7 @@ Double Opcodes
    dst.zw = src0.zw \times src1.zw
  
  
-.. opcode:: DMAD - Multiply And Add Doubles
+.. opcode:: DMAD - Multiply And Add
  
  .. math::
  
@@ -1208,7 +1233,7 @@ Double Opcodes
    dst.zw = src0.zw \times src1.zw + src2.zw
  
  
-.. opcode:: DRCP - Reciprocal Double
+.. opcode:: DRCP - Reciprocal
  
  .. math::
  
@@ -1216,7 +1241,7 @@ Double Opcodes
  
     dst.zw = \frac{1}{src.zw}
  
-.. opcode:: DSQRT - Square root double
+.. opcode:: DSQRT - Square Root
  
  .. math::
  
@@ -1225,6 +1250,157 @@ Double Opcodes
     dst.zw = \sqrt{src.zw}
  
  
+.. _resourceopcodes:
+
+Resource Access Opcodes
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Those opcodes follow very closely semantics of the respective Direct3D
+instructions. If in doubt double check Direct3D documentation.
+
+.. opcode:: LOAD - Simplified alternative to the "SAMPLE" instruction.
+               Using the provided integer address, LOAD fetches data
+               from the specified buffer/texture without any filtering.
+               The source data may come from any resource type other
+               than CUBE.
+               LOAD dst, address, resource
+               e.g.
+               LOAD TEMP[0], TEMP[1], RES[0]
+               The 'address' is specified as unsigned integers. If the
+               'address' is out of range [0...(# texels - 1)] the
+               result of the fetch is always 0 in all components.
+               As such the instruction doesn't honor address wrap
+               modes, in cases where that behavior is desirable
+               'sample' instruction should be used.
+               address.w always provides an unsigned integer mipmap
+               level. If the value is out of the range then the
+               instruction always returns 0 in all components.
+               address.yz are ignored for buffers and 1d textures.
+               address.z is ignored for 1d texture arrays and 2d
+               textures.
+               For 1D texture arrays address.y provides the array
+               index (also as unsigned integer). If the value is
+               out of the range of available array indices
+               [0... (array size - 1)] then the opcode always returns
+               0 in all components.
+               For 2D texture arrays address.z provides the array
+               index, otherwise it exhibits the same behavior as in
+               the case for 1D texture arrays.
+               The exeact semantics of the source address are presented
+               in the table below:
+               resource type         X     Y     Z       W
+               -------------         ------------------------
+               PIPE_BUFFER           x                ignored
+               PIPE_TEXTURE_1D       x                  mpl
+               PIPE_TEXTURE_2D       x     y            mpl
+               PIPE_TEXTURE_3D       x     y     z      mpl
+               PIPE_TEXTURE_RECT     x     y            mpl
+               PIPE_TEXTURE_CUBE     not allowed as source
+               PIPE_TEXTURE_1D_ARRAY x    idx           mpl
+               PIPE_TEXTURE_2D_ARRAY x     y    idx     mpl
+
+               Where 'mpl' is a mipmap level and 'idx' is the
+               array index.
+
+
+.. opcode:: LOAD_MS - Just like LOAD but allows fetch data from
+               multi-sampled surfaces.
+
+.. opcode:: SAMPLE - Using provided address, sample data from the
+               specified texture using the filtering mode identified
+               by the gven sampler. The source data may come from
+               any resource type other than buffers.
+               SAMPLE dst, address, resource, sampler
+               e.g.
+               SAMPLE TEMP[0], TEMP[1], RES[0], SAMP[0]
+
+.. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the
+               exception that an additiona bias is applied to the
+               level of detail computed as part of the instruction
+               execution.
+               SAMPLE_B dst, address, resource, sampler, lod_bias
+               e.g.
+               SAMPLE_B TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2].x
+
+.. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it
+               performs a comparison filter. The operands to SAMPLE_C
+               are identical to SAMPLE, except that tere is an additional
+               float32 operand, reference value, which must be a register
+               with single-component, or a scalar literal.
+               SAMPLE_C makes the hardware use the current samplers
+               compare_func (in pipe_sampler_state) to compare
+               reference value against the red component value for the
+               surce resource at each texel that the currently configured
+               texture filter covers based on the provided coordinates.
+               SAMPLE_C dst, address, resource.r, sampler, ref_value
+               e.g.
+               SAMPLE_C TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+
+.. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives
+               are ignored. The LZ stands for level-zero.
+               SAMPLE_C_LZ dst, address, resource.r, sampler, ref_value
+               e.g.
+               SAMPLE_C_LZ TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+
+
+.. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except
+               that the derivatives for the source address in the x
+               direction and the y direction are provided by extra
+               parameters.
+               SAMPLE_D dst, address, resource, sampler, der_x, der_y
+               e.g.
+               SAMPLE_D TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2], TEMP[3]
+
+.. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except
+               that the LOD is provided directly as a scalar value,
+               representing no anisotropy. Source addresses A channel
+               is used as the LOD.
+               SAMPLE_L dst, address, resource, sampler
+               e.g.
+               SAMPLE_L TEMP[0], TEMP[1], RES[0], SAMP[0]
+
+
+.. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear
+               filtering operation and packs them into a single register.
+               Only woth with 2D, 2D array, cubemaps, and cubemaps arrays.
+               For 2D textures, only the addressing modes of the sampler and
+               the top level of any mip pyramid are used. Set W to zero.
+               It behaves like the SAMPLE instruction, but a filtered
+               sample is not generated. The four samples that contribute
+               to filtering are places into xyzw in cunter-clockwise order,
+               starting with the (u,v) texture coordinate delta at the
+               following locations (-, +), (+, +), (+, -), (-, -), where
+               the magnitude of the deltas are half a texel.
+
+
+.. opcode:: RESINFO - query the dimensions of a given input buffer.
+               dst receives width, height, depth or array size and
+               number of mipmap levels. The dst can have a writemask
+               which will specify what info is the caller interested
+               in.
+               RESINFO dst, src_mip_level, resource
+               e.g.
+               RESINFO TEMP[0], TEMP[1].x, RES[0]
+               src_mip_level is an unsigned integer scalar. If it's
+               out of range then returns 0 for width, height and
+               depth/array size but the total number of mipmap is
+               still returned correctly for the given resource.
+               The returned width, height and depth values are for
+               the mipmap level selected by the src_mip_level and
+               are in the number of texels.
+               For 1d texture array width is in dst.x, array size
+               is in dst.y and dst.zw are always 0.
+
+.. opcode:: SAMPLE_POS - query the position of a given sample.
+               dst receives float4 (x, y, 0, 0) indicated where the
+               sample is located. If the resource is not a multi-sample
+               resource and not a render target, the result is 0.
+
+.. opcode:: SAMPLE_INFO - dst receives number of samples in x.
+               If the resource is not a multi-sample resource and
+               not a render target, the result is 0.
+
+
  Explanation of symbols used
  ------------------------------
  
@@ -1280,9 +1456,41 @@ Other tokens
  ---------------
  
  
+Declaration
+^^^^^^^^^^^
+
+
+Declares a register that is will be referenced as an operand in Instruction
+tokens.
+
+File field contains register file that is being declared and is one
+of TGSI_FILE.
+
+UsageMask field specifies which of the register components can be accessed
+and is one of TGSI_WRITEMASK.
+
+Interpolate field is only valid for fragment shader INPUT register files.
+It specifes the way input is being interpolated by the rasteriser and is one
+of TGSI_INTERPOLATE.
+
+If Dimension flag is set to 1, a Declaration Dimension token follows.
+
+If Semantic flag is set to 1, a Declaration Semantic token follows.
+
+CylindricalWrap bitfield is only valid for fragment shader INPUT register
+files. It specifies which register components should be subject to cylindrical
+wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
+is set to 1, the X component should be interpolated according to cylindrical
+wrapping rules.
+
+If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows.
+
+
  Declaration Semantic
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
+  Vertex and fragment shader input and output registers may be labeled
+  with semantic information consisting of a name and index.
  
    Follows Declaration token if Semantic bit is set.
  
@@ -1303,92 +1511,138 @@ Declaration Semantic
  TGSI_SEMANTIC_POSITION
  """"""""""""""""""""""
  
-Position, sometimes known as HPOS or WPOS for historical reasons, is the
-location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z``
-are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
-for the perspective divide, if enabled.
+For vertex shaders, TGSI_SEMANTIC_POSITION indicates the vertex shader
+output register which contains the homogeneous vertex position in the clip
+space coordinate system.  After clipping, the X, Y and Z components of the
+vertex will be divided by the W value to get normalized device coordinates.
  
-As a vertex shader output, position should be scaled to the viewport. When
-used in fragment shaders, position will be in window coordinates. The convention
-used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
+For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
+fragment shader input contains the fragment's window position.  The X
+component starts at zero and always increases from left to right.
+The Y component starts at zero and always increases but Y=0 may either
+indicate the top of the window or the bottom depending on the fragment
+coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
+The Z coordinate ranges from 0 to 1 to represent depth from the front
+to the back of the Z buffer.  The W component contains the reciprocol
+of the interpolated vertex position W component.
  
-XXX additionally, is there a way to configure the perspective divide? it's
-accelerated on most chipsets AFAIK...
+Fragment shaders may also declare an output register with
+TGSI_SEMANTIC_POSITION.  Only the Z component is writable.  This allows
+the fragment shader to change the fragment's Z position.
  
-Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can
-be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``.
  
-XXX usually? can we solidify that?
  
  TGSI_SEMANTIC_COLOR
  """""""""""""""""""
  
-Colors are used to, well, color the primitives. Colors are always in
-``(r, g, b, a)`` format.
+For vertex shader outputs or fragment shader inputs/outputs, this
+label indicates that the resister contains an R,G,B,A color.
+
+Several shader inputs/outputs may contain colors so the semantic index
+is used to distinguish them.  For example, color[0] may be the diffuse
+color while color[1] may be the specular color.
+
+This label is needed so that the flat/smooth shading can be applied
+to the right interpolants during rasterization.
+
  
-If alpha is not specified, it defaults to 1.
  
  TGSI_SEMANTIC_BCOLOR
  """"""""""""""""""""
  
  Back-facing colors are only used for back-facing polygons, and are only valid
  in vertex shader outputs. After rasterization, all polygons are front-facing
-and COLOR and BCOLOR end up occupying the same slots in the fragment, so
-all BCOLORs effectively become regular COLORs in the fragment shader.
+and COLOR and BCOLOR end up occupying the same slots in the fragment shader,
+so all BCOLORs effectively become regular COLORs in the fragment shader.
+
  
  TGSI_SEMANTIC_FOG
  """""""""""""""""
  
-The fog coordinate historically has been used to replace the depth coordinate
-for generation of fog in dedicated fog blocks. Gallium, however, does not use
-dedicated fog acceleration, placing it entirely in the fragment shader
-instead.
+Vertex shader inputs and outputs and fragment shader inputs may be
+labeled with TGSI_SEMANTIC_FOG to indicate that the register contains
+a fog coordinate in the form (F, 0, 0, 1).  Typically, the fragment
+shader will use the fog coordinate to compute a fog blend factor which
+is used to blend the normal fragment color with a constant fog color.
+
+Only the first component matters when writing from the vertex shader;
+the driver will ensure that the coordinate is in this format when used
+as a fragment shader input.
  
-The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first
-component matters when writing from the vertex shader; the driver will ensure
-that the coordinate is in this format when used as a fragment shader input.
  
  TGSI_SEMANTIC_PSIZE
  """""""""""""""""""
  
-PSIZE, or point size, is used to specify point sizes per-vertex. It should
-be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum
-size, ``x`` is the maximum size, and ``f`` is the fade threshold.
-
-XXX this is arb_vp. is this what we actually do? should double-check...
+Vertex shader input and output registers may be labeled with
+TGIS_SEMANTIC_PSIZE to indicate that the register contains a point size
+in the form (S, 0, 0, 1).  The point size controls the width or diameter
+of points for rasterization.  This label cannot be used in fragment
+shaders.
  
  When using this semantic, be sure to set the appropriate state in the
  :ref:`rasterizer` first.
  
+
  TGSI_SEMANTIC_GENERIC
  """""""""""""""""""""
  
-Generic semantics are nearly always used for texture coordinate attributes,
-in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds
-of lookups, and ``q`` is the level-of-detail bias for biased sampling.
+All vertex/fragment shader inputs/outputs not labeled with any other
+semantic label can be considered to be generic attributes.  Typical
+uses of generic inputs/outputs are texcoords and user-defined values.
  
-These attributes are called "generic" because they may be used for anything
-else, including parameters, texture generation information, or anything that
-can be stored inside a four-component vector.
  
  TGSI_SEMANTIC_NORMAL
  """"""""""""""""""""
  
-Vertex normal; could be used to implement per-pixel lighting for legacy APIs
-that allow mixing fixed-function and programmable stages.
+Indicates that a vertex shader input is a normal vector.  This is
+typically only used for legacy graphics APIs.
+
  
  TGSI_SEMANTIC_FACE
  """"""""""""""""""
  
-FACE is the facing bit, to store the facing information for the fragment
-shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive
-when the fragment is front-facing, and negative when the component is
-back-facing.
+This label applies to fragment shader inputs only and indicates that
+the register contains front/back-face information of the form (F, 0,
+0, 1).  The first component will be positive when the fragment belongs
+to a front-facing polygon, and negative when the fragment belongs to a
+back-facing polygon.
+
  
  TGSI_SEMANTIC_EDGEFLAG
  """"""""""""""""""""""
  
-XXX no clue
+For vertex shaders, this sematic label indicates that an input or
+output is a boolean edge flag.  The register layout is [F, x, x, x]
+where F is 0.0 or 1.0 and x = don't care.  Normally, the vertex shader
+simply copies the edge flag input to the edgeflag output.
+
+Edge flags are used to control which lines or points are actually
+drawn when the polygon mode converts triangles/quads/polygons into
+points or lines.
+
+TGSI_SEMANTIC_STENCIL
+""""""""""""""""""""""
+
+For fragment shaders, this semantic label indicates than an output
+is a writable stencil reference value. Only the Y component is writable.
+This allows the fragment shader to change the fragments stencilref value.
+
+
+Declaration Resource
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+   Follows Declaration token if file is TGSI_FILE_RESOURCE.
+
+   DCL RES[#], resource, type(s)
+
+   Declares a shader input resource and assigns it to a RES[#]
+   register.
+
+   resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
+   2DArray.
+
+   type must be 1 or 4 entries (if specifying on a per-component
+   level) out of UNORM, SNORM, SINT, UINT and FLOAT.
  
  
  Properties
@@ -1432,6 +1686,11 @@ GL_ARB_fragment_coord_conventions extension.
  DirectX 9 uses INTEGER.
  DirectX 10 uses HALF_INTEGER.
  
+FS_COLOR0_WRITES_ALL_CBUFS
+""""""""""""""""""""""""""
+Specifies that writes to the fragment shader color 0 are replicated to all
+bound cbufs. This facilitates OpenGL's fragColor output vs fragData[0] where
+fragData is directed to a single color buffer, but fragColor is broadcast.
  
  
  Texture Sampling and Texture Formats
@@ -1445,9 +1704,9 @@ well.
  +--------------------+--------------+--------------------+--------------+
  | Texture Components | Gallium      | OpenGL             | Direct3D 9   |
  +====================+==============+====================+==============+
-| R                  | XXX TBD      | (r, 0, 0, 1)       | (r, 1, 1, 1) |
+| R                  | (r, 0, 0, 1) | (r, 0, 0, 1)       | (r, 1, 1, 1) |
  +--------------------+--------------+--------------------+--------------+
-| RG                 | XXX TBD      | (r, g, 0, 1)       | (r, g, 1, 1) |
+| RG                 | (r, g, 0, 1) | (r, g, 0, 1)       | (r, g, 1, 1) |
  +--------------------+--------------+--------------------+--------------+
  | RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
  +--------------------+--------------+--------------------+--------------+
@@ -1467,6 +1726,8 @@ well.
  | Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
  |                    |              | [#depth-tex-mode]_ |              |
  +--------------------+--------------+--------------------+--------------+
+| S                  | (s, s, s, s) | unknown            | unknown      |
++--------------------+--------------+--------------------+--------------+
  
  .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
  .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)