return TRUE;
/* point sprites */
- if (rasterizer->point_sprite && draw->pipeline.point_sprite)
+ if (rasterizer->sprite_coord_enable && draw->pipeline.point_sprite)
return TRUE;
}
{
struct draw_context *draw = stage->draw;
struct draw_stage *next = draw->pipeline.rasterize;
- int need_det = 0;
- int precalc_flat = 0;
+ boolean need_det = FALSE;
+ boolean precalc_flat = FALSE;
boolean wide_lines, wide_points;
/* Set the validate's next stage to the rasterize stage, so that it
&& !draw->rasterizer->line_smooth);
/* drawing large points? */
- if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)
+ if (draw->rasterizer->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
wide_points = FALSE;
if (wide_lines) {
draw->pipeline.wide_line->next = next;
next = draw->pipeline.wide_line;
- precalc_flat = 1;
+ precalc_flat = TRUE;
}
- if (wide_points || draw->rasterizer->point_sprite) {
+ if (wide_points || draw->rasterizer->sprite_coord_enable) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}
if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) {
draw->pipeline.stipple->next = next;
next = draw->pipeline.stipple;
- precalc_flat = 1; /* only needed for lines really */
+ precalc_flat = TRUE; /* only needed for lines really */
}
if (draw->rasterizer->poly_stipple_enable
draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
draw->pipeline.unfilled->next = next;
next = draw->pipeline.unfilled;
- precalc_flat = 1; /* only needed for triangles really */
- need_det = 1;
+ precalc_flat = TRUE; /* only needed for triangles really */
+ need_det = TRUE;
}
if (draw->rasterizer->flatshade && precalc_flat) {
draw->rasterizer->offset_ccw) {
draw->pipeline.offset->next = next;
next = draw->pipeline.offset;
- need_det = 1;
+ need_det = TRUE;
}
if (draw->rasterizer->light_twoside) {
draw->pipeline.twoside->next = next;
next = draw->pipeline.twoside;
- need_det = 1;
+ need_det = TRUE;
}
/* Always run the cull stage as we calculate determinant there
Attributes include polygon culling state, line width, line stipple,
multisample state, scissoring and flat/smooth shading.
-
Members
-------
+bypass_vs_clip_and_viewport
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Whether the entire TCL pipeline should be bypassed. This implies that
+vertices are pre-transformed for the viewport, and will not be run
+through the vertex shader.
+
+.. note::
+
+ Implementations may still clip away vertices that are not in the viewport
+ when this is set.
+
flatshade
- If set, the provoking vertex of each polygon is used to determine the
- color of the entire polygon. If not set, fragment colors will be
- interpolated between the vertex colors.
- Note that this is separate from the fragment shader input attributes
- CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at
+^^^^^^^^^
+
+If set, the provoking vertex of each polygon is used to determine the color
+of the entire polygon. If not set, fragment colors will be interpolated
+between the vertex colors.
+
+The actual interpolated shading algorithm is obviously
+implementation-dependent, but will usually be Gourard for most hardware.
+
+.. note::
+
+ This is separate from the fragment shader input attributes
+ CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at
clipping time to determine how to set the color of new vertices.
- Also note that the draw module can implement flat shading by copying
- the provoking vertex color to all the other vertices in the primitive.
+
+ :ref:`Draw` can implement flat shading by copying the provoking vertex
+ color to all the other vertices in the primitive.
flatshade_first
- Whether the first vertex should be the provoking vertex, for most
- primitives. If not set, the last vertex is the provoking vertex.
+^^^^^^^^^^^^^^^
+
+Whether the first vertex should be the provoking vertex, for most primitives.
+If not set, the last vertex is the provoking vertex.
+
+There are several important exceptions to the specification of this rule.
+
+* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first
+ vertex. If the caller wishes to change the provoking vertex, they merely
+ need to rotate the vertices themselves.
+* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no
+ effect; the provoking vertex is always the last vertex.
+* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the
+ second vertex, not the first. This permits each segment of the fan to have
+ a different color.
+
+Other Members
+^^^^^^^^^^^^^
light_twoside
- If set, there are per-vertex back-facing colors. The draw module
+ If set, there are per-vertex back-facing colors. :ref:`Draw`
uses this state along with the front/back information to set the
final vertex colors prior to rasterization.
front_winding
Indicates the window order of front-facing polygons, either
PIPE_WINDING_CW or PIPE_WINDING_CCW
+
cull_mode
Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no
polygons), PIPE_WINDING_CW (cull clockwise-winding polygons),
line_stipple_pattern
16-bit bitfield of on/off flags, used to pattern the line stipple.
line_stipple_factor
- When drawinga stippled line, each bit in the stipple pattern is
+ When drawing a stippled line, each bit in the stipple pattern is
repeated N times, where N = line_stipple_factor + 1.
line_last_pixel
Controls whether the last pixel in a line is drawn or not. OpenGL
Whether vertices have a point size element.
point_size
The size of points, if not specified per-vertex.
- point_size_min
- The minimum size of points.
- point_size_max
- The maximum size of points.
- point_sprite
- Whether points are drawn as sprites (textured quads)
+ sprite_coord_enable
+ Specifies if a coord has its texture coordinates replaced or not. This
+ is a packed bitfield containing the enable for all coords - if all are 0
+ point sprites are effectively disabled, though points may still be
+ rendered slightly different according to point_quad_rasterization.
+ If any coord is non-zero, point_smooth should be disabled, and
+ point_quad_rasterization enabled.
+ If enabled, the four vertices of the resulting quad will be assigned
+ texture coordinates, according to sprite_coord_mode.
sprite_coord_mode
Specifies how the value for each shader output should be computed when
- drawing sprites. If PIPE_SPRITE_COORD_NONE, don't change the vertex
- shader output. Otherwise, the four vertices of the resulting quad will
- be assigned texture coordinates. For PIPE_SPRITE_COORD_LOWER_LEFT, the
- lower left vertex will have coordinate (0,0,0,1).
+ drawing sprites, for each coord which has sprite_coord_enable set.
+ For PIPE_SPRITE_COORD_LOWER_LEFT, the lower left vertex will have
+ coordinate (0,0,0,1).
For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have
coordinate (0,0,0,1).
- This state is needed by the 'draw' module because that's where each
+ This state is needed by :ref:`Draw` because that's where each
point vertex is converted into four quad vertices. There's no other
place to emit the new vertex texture coordinates which are required for
sprite rendering.
Note that when geometry shaders are available, this state could be
removed. A special geometry shader defined by the state tracker could
- converts the incoming points into quads with the proper texture coords.
+ convert the incoming points into quads with the proper texture coords.
+ point_quad_rasterization
+ This determines if points should be rasterized as quads or points.
+ d3d always uses quad rasterization for points, regardless if point sprites
+ are enabled or not, but OGL has different rules. If point_quad_rasterization
+ is set, point_smooth should be disabled, and points will be rendered as
+ squares even if multisample is enabled.
+ sprite_coord_enable should be zero if point_quad_rasterization is not
+ enabled.
scissor
Whether the scissor test is enabled.
multisample
- Whether :ref:`MSAA` is enabled.
-
-bypass_vs_clip_and_viewport
- Whether the entire TCL pipeline should be bypassed. This implies that
- vertices are pre-transformed for the viewport, and will not be run
- through the vertex shader. Note that implementations may still clip away
- vertices that are not in the viewport.
+ Whether :term:`MSAA` is enabled.
gl_rasterization_rules
Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set,
the rasterizer will use (0, 0) for pixel centers.
-
-Notes
------
-
-flatshade
-^^^^^^^^^
-
-The actual interpolated shading algorithm is obviously
-implementation-dependent, but will usually be Gourard for most hardware.
-
-bypass_vs_clip_and_viewport
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When set, this implies that vertices are pre-transformed for the viewport, and
-will not be run through the vertex shader. Note that implementations may still
-clip away vertices that are not visible.
-
-flatshade_first
-^^^^^^^^^^^^^^^
-
-There are several important exceptions to the specification of this rule.
-
-* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first
- vertex. If the caller wishes to change the provoking vertex, they merely
- need to rotate the vertices themselves.
-* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no
- effect; the provoking vertex is always the last vertex.
-* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the
- second vertex, not the first. This permits each segment of the fan to have
- a different color.
an important part of the API. TGSI is the only intermediate representation
used by all drivers.
+Basics
+------
+
+All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
+floating-point four-component vectors. An opcode may have up to one
+destination register, known as *dst*, and between zero and three source
+registers, called *src0* through *src2*, or simply *src* if there is only
+one.
+
+Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
+components as integers. Other instructions permit using registers as
+two-component vectors with double precision; see :ref:`Double Opcodes`.
+
+When an instruction has a scalar result, the result is usually copied into
+each of the components of *dst*. When this happens, the result is said to be
+*replicated* to *dst*. :opcode:`RCP` is one such instruction.
+
Instruction Set
---------------
^^^^^^^^^^^^^^^^^^^^^^^^^
-ARL - Address Register Load
+.. opcode:: ARL - Address Register Load
.. math::
dst.w = \lfloor src.w\rfloor
-MOV - Move
+.. opcode:: MOV - Move
.. math::
dst.w = src.w
-LIT - Light Coefficients
+.. opcode:: LIT - Light Coefficients
.. math::
dst.w = 1
-RCP - Reciprocal
-
-.. math::
+.. opcode:: RCP - Reciprocal
- dst.x = \frac{1}{src.x}
+This instruction replicates its result.
- dst.y = \frac{1}{src.x}
+.. math::
- dst.z = \frac{1}{src.x}
+ dst = \frac{1}{src.x}
- dst.w = \frac{1}{src.x}
+.. opcode:: RSQ - Reciprocal Square Root
-RSQ - Reciprocal Square Root
+This instruction replicates its result.
.. math::
- dst.x = \frac{1}{\sqrt{|src.x|}}
-
- dst.y = \frac{1}{\sqrt{|src.x|}}
-
- dst.z = \frac{1}{\sqrt{|src.x|}}
+ dst = \frac{1}{\sqrt{|src.x|}}
- dst.w = \frac{1}{\sqrt{|src.x|}}
-
-EXP - Approximate Exponential Base 2
+.. opcode:: EXP - Approximate Exponential Base 2
.. math::
dst.w = 1
-LOG - Approximate Logarithm Base 2
+.. opcode:: LOG - Approximate Logarithm Base 2
.. math::
dst.w = 1
-MUL - Multiply
+.. opcode:: MUL - Multiply
.. math::
dst.w = src0.w \times src1.w
-ADD - Add
+.. opcode:: ADD - Add
.. math::
dst.w = src0.w + src1.w
-DP3 - 3-component Dot Product
-
-.. math::
+.. opcode:: DP3 - 3-component Dot Product
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+This instruction replicates its result.
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+.. math::
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+.. opcode:: DP4 - 4-component Dot Product
-DP4 - 4-component Dot Product
+This instruction replicates its result.
.. math::
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
-DST - Distance Vector
+.. opcode:: DST - Distance Vector
.. math::
dst.w = src1.w
-MIN - Minimum
+.. opcode:: MIN - Minimum
.. math::
dst.w = min(src0.w, src1.w)
-MAX - Maximum
+.. opcode:: MAX - Maximum
.. math::
dst.w = max(src0.w, src1.w)
-SLT - Set On Less Than
+.. opcode:: SLT - Set On Less Than
.. math::
dst.w = (src0.w < src1.w) ? 1 : 0
-SGE - Set On Greater Equal Than
+.. opcode:: SGE - Set On Greater Equal Than
.. math::
dst.w = (src0.w >= src1.w) ? 1 : 0
-MAD - Multiply And Add
+.. opcode:: MAD - Multiply And Add
.. math::
dst.w = src0.w \times src1.w + src2.w
-SUB - Subtract
+.. opcode:: SUB - Subtract
.. math::
dst.w = src0.w - src1.w
-LRP - Linear Interpolate
+.. opcode:: LRP - Linear Interpolate
.. math::
dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
-CND - Condition
+.. opcode:: CND - Condition
.. math::
dst.w = (src2.w > 0.5) ? src0.w : src1.w
-DP2A - 2-component Dot Product And Add
+.. opcode:: DP2A - 2-component Dot Product And Add
.. math::
dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
-FRAC - Fraction
+.. opcode:: FRAC - Fraction
.. math::
dst.w = src.w - \lfloor src.w\rfloor
-CLAMP - Clamp
+.. opcode:: CLAMP - Clamp
.. math::
dst.w = clamp(src0.w, src1.w, src2.w)
-FLR - Floor
+.. opcode:: FLR - Floor
-This is identical to ARL.
+This is identical to :opcode:`ARL`.
.. math::
dst.w = \lfloor src.w\rfloor
-ROUND - Round
+.. opcode:: ROUND - Round
.. math::
dst.w = round(src.w)
-EX2 - Exponential Base 2
-
-.. math::
+.. opcode:: EX2 - Exponential Base 2
- dst.x = 2^{src.x}
+This instruction replicates its result.
- dst.y = 2^{src.x}
+.. math::
- dst.z = 2^{src.x}
+ dst = 2^{src.x}
- dst.w = 2^{src.x}
+.. opcode:: LG2 - Logarithm Base 2
-LG2 - Logarithm Base 2
+This instruction replicates its result.
.. math::
- dst.x = \log_2{src.x}
-
- dst.y = \log_2{src.x}
+ dst = \log_2{src.x}
- dst.z = \log_2{src.x}
- dst.w = \log_2{src.x}
+.. opcode:: POW - Power
-
-POW - Power
+This instruction replicates its result.
.. math::
- dst.x = src0.x^{src1.x}
-
- dst.y = src0.x^{src1.x}
-
- dst.z = src0.x^{src1.x}
+ dst = src0.x^{src1.x}
- dst.w = src0.x^{src1.x}
-
-XPD - Cross Product
+.. opcode:: XPD - Cross Product
.. math::
dst.w = 1
-ABS - Absolute
+.. opcode:: ABS - Absolute
.. math::
dst.w = |src.w|
-RCC - Reciprocal Clamped
+.. opcode:: RCC - Reciprocal Clamped
+
+This instruction replicates its result.
XXX cleanup on aisle three
.. math::
- dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
-
- dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+ dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
- dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
- dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+.. opcode:: DPH - Homogeneous Dot Product
-
-DPH - Homogeneous Dot Product
+This instruction replicates its result.
.. math::
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+.. opcode:: COS - Cosine
-COS - Cosine
+This instruction replicates its result.
.. math::
- dst.x = \cos{src.x}
-
- dst.y = \cos{src.x}
-
- dst.z = \cos{src.x}
+ dst = \cos{src.x}
- dst.w = \cos{src.x}
-
-DDX - Derivative Relative To X
+.. opcode:: DDX - Derivative Relative To X
.. math::
dst.w = partialx(src.w)
-DDY - Derivative Relative To Y
+.. opcode:: DDY - Derivative Relative To Y
.. math::
dst.w = partialy(src.w)
-KILP - Predicated Discard
+.. opcode:: KILP - Predicated Discard
discard
-PK2H - Pack Two 16-bit Floats
+.. opcode:: PK2H - Pack Two 16-bit Floats
TBD
-PK2US - Pack Two Unsigned 16-bit Scalars
+.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
TBD
-PK4B - Pack Four Signed 8-bit Scalars
+.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
TBD
-PK4UB - Pack Four Unsigned 8-bit Scalars
+.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
TBD
-RFL - Reflection Vector
+.. opcode:: RFL - Reflection Vector
.. math::
dst.w = 1
-Considered for removal.
+.. note::
+
+ Considered for removal.
-SEQ - Set On Equal
+.. opcode:: SEQ - Set On Equal
.. math::
dst.x = (src0.x == src1.x) ? 1 : 0
+
dst.y = (src0.y == src1.y) ? 1 : 0
+
dst.z = (src0.z == src1.z) ? 1 : 0
+
dst.w = (src0.w == src1.w) ? 1 : 0
-SFL - Set On False
+.. opcode:: SFL - Set On False
+
+This instruction replicates its result.
.. math::
- dst.x = 0
- dst.y = 0
- dst.z = 0
- dst.w = 0
+ dst = 0
+
+.. note::
+
+ Considered for removal.
-Considered for removal.
-SGT - Set On Greater Than
+.. opcode:: SGT - Set On Greater Than
.. math::
dst.x = (src0.x > src1.x) ? 1 : 0
+
dst.y = (src0.y > src1.y) ? 1 : 0
- dst.z = (src0.z > src1.z) ? 1 : 0
- dst.w = (src0.w > src1.w) ? 1 : 0
+ dst.z = (src0.z > src1.z) ? 1 : 0
-SIN - Sine
+ dst.w = (src0.w > src1.w) ? 1 : 0
-.. math::
- dst.x = \sin{src.x}
+.. opcode:: SIN - Sine
- dst.y = \sin{src.x}
+This instruction replicates its result.
- dst.z = \sin{src.x}
+.. math::
- dst.w = \sin{src.x}
+ dst = \sin{src.x}
-SLE - Set On Less Equal Than
+.. opcode:: SLE - Set On Less Equal Than
.. math::
dst.x = (src0.x <= src1.x) ? 1 : 0
+
dst.y = (src0.y <= src1.y) ? 1 : 0
+
dst.z = (src0.z <= src1.z) ? 1 : 0
+
dst.w = (src0.w <= src1.w) ? 1 : 0
-SNE - Set On Not Equal
+.. opcode:: SNE - Set On Not Equal
.. math::
dst.x = (src0.x != src1.x) ? 1 : 0
+
dst.y = (src0.y != src1.y) ? 1 : 0
+
dst.z = (src0.z != src1.z) ? 1 : 0
+
dst.w = (src0.w != src1.w) ? 1 : 0
-STR - Set On True
+.. opcode:: STR - Set On True
+
+This instruction replicates its result.
.. math::
- dst.x = 1
- dst.y = 1
- dst.z = 1
- dst.w = 1
+ dst = 1
-TEX - Texture Lookup
+.. opcode:: TEX - Texture Lookup
TBD
-TXD - Texture Lookup with Derivatives
+.. opcode:: TXD - Texture Lookup with Derivatives
TBD
-TXP - Projective Texture Lookup
+.. opcode:: TXP - Projective Texture Lookup
TBD
-UP2H - Unpack Two 16-Bit Floats
+.. opcode:: UP2H - Unpack Two 16-Bit Floats
TBD
- Considered for removal.
+.. note::
+
+ Considered for removal.
-UP2US - Unpack Two Unsigned 16-Bit Scalars
+.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
TBD
- Considered for removal.
+.. note::
+
+ Considered for removal.
-UP4B - Unpack Four Signed 8-Bit Values
+.. opcode:: UP4B - Unpack Four Signed 8-Bit Values
TBD
- Considered for removal.
+.. note::
-UP4UB - Unpack Four Unsigned 8-Bit Scalars
+ Considered for removal.
+
+.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
TBD
- Considered for removal.
+.. note::
+
+ Considered for removal.
-X2D - 2D Coordinate Transformation
+.. opcode:: X2D - 2D Coordinate Transformation
.. math::
dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
+
dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
+
dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
-Considered for removal.
+.. note::
+
+ Considered for removal.
From GL_NV_vertex_program2
^^^^^^^^^^^^^^^^^^^^^^^^^^
-ARA - Address Register Add
+.. opcode:: ARA - Address Register Add
TBD
- Considered for removal.
+.. note::
-ARR - Address Register Load With Round
+ Considered for removal.
+
+.. opcode:: ARR - Address Register Load With Round
.. math::
dst.w = round(src.w)
-BRA - Branch
+.. opcode:: BRA - Branch
pc = target
- Considered for removal.
+.. note::
+
+ Considered for removal.
-CAL - Subroutine Call
+.. opcode:: CAL - Subroutine Call
push(pc)
pc = target
-RET - Subroutine Call Return
+.. opcode:: RET - Subroutine Call Return
pc = pop()
Potential restrictions:
* Only occurs at end of function.
-SSG - Set Sign
+.. opcode:: SSG - Set Sign
.. math::
dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
-CMP - Compare
+.. opcode:: CMP - Compare
.. math::
dst.w = (src0.w < 0) ? src1.w : src2.w
-KIL - Conditional Discard
+.. opcode:: KIL - Conditional Discard
.. math::
endif
-SCS - Sine Cosine
+.. opcode:: SCS - Sine Cosine
.. math::
dst.y = 1
-TXB - Texture Lookup With Bias
+.. opcode:: TXB - Texture Lookup With Bias
TBD
-NRM - 3-component Vector Normalise
+.. opcode:: NRM - 3-component Vector Normalise
.. math::
dst.w = 1
-DIV - Divide
+.. opcode:: DIV - Divide
.. math::
dst.w = \frac{src0.w}{src1.w}
-DP2 - 2-component Dot Product
+.. opcode:: DP2 - 2-component Dot Product
-.. math::
+This instruction replicates its result.
- dst.x = src0.x \times src1.x + src0.y \times src1.y
-
- dst.y = src0.x \times src1.x + src0.y \times src1.y
-
- dst.z = src0.x \times src1.x + src0.y \times src1.y
+.. math::
- dst.w = src0.x \times src1.x + src0.y \times src1.y
+ dst = src0.x \times src1.x + src0.y \times src1.y
-TXL - Texture Lookup With LOD
+.. opcode:: TXL - Texture Lookup With LOD
TBD
-BRK - Break
+.. opcode:: BRK - Break
TBD
-IF - If
+.. opcode:: IF - If
TBD
-BGNFOR - Begin a For-Loop
+.. opcode:: BGNFOR - Begin a For-Loop
dst.x = floor(src.x)
dst.y = floor(src.y)
Note: The destination must be a loop register.
The source must be a constant register.
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
+
+ Considered for removal.
-REP - Repeat
+.. opcode:: REP - Repeat
TBD
-ELSE - Else
+.. opcode:: ELSE - Else
TBD
-ENDIF - End If
+.. opcode:: ENDIF - End If
TBD
-ENDFOR - End a For-Loop
+.. opcode:: ENDFOR - End a For-Loop
dst.x = dst.x + dst.z
dst.y = dst.y - 1.0
Note: The destination must be a loop register.
- Considered for cleanup / removal.
+.. note::
-ENDREP - End Repeat
+ Considered for cleanup.
+
+.. note::
+
+ Considered for removal.
+
+.. opcode:: ENDREP - End Repeat
TBD
-PUSHA - Push Address Register On Stack
+.. opcode:: PUSHA - Push Address Register On Stack
push(src.x)
push(src.y)
push(src.z)
push(src.w)
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
+
+ Considered for removal.
-POPA - Pop Address Register From Stack
+.. opcode:: POPA - Pop Address Register From Stack
dst.w = pop()
dst.z = pop()
dst.y = pop()
dst.x = pop()
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
+
+ Considered for removal.
From GL_NV_gpu_program4
Support for these opcodes indicated by a special pipe capability bit (TBD).
-CEIL - Ceiling
+.. opcode:: CEIL - Ceiling
.. math::
dst.w = \lceil src.w\rceil
-I2F - Integer To Float
+.. opcode:: I2F - Integer To Float
.. math::
dst.w = (float) src.w
-NOT - Bitwise Not
+.. opcode:: NOT - Bitwise Not
.. math::
dst.w = ~src.w
-TRUNC - Truncate
+.. opcode:: TRUNC - Truncate
.. math::
dst.w = trunc(src.w)
-SHL - Shift Left
+.. opcode:: SHL - Shift Left
.. math::
dst.w = src0.w << src1.x
-SHR - Shift Right
+.. opcode:: SHR - Shift Right
.. math::
dst.w = src0.w >> src1.x
-AND - Bitwise And
+.. opcode:: AND - Bitwise And
.. math::
dst.w = src0.w & src1.w
-OR - Bitwise Or
+.. opcode:: OR - Bitwise Or
.. math::
dst.w = src0.w | src1.w
-MOD - Modulus
+.. opcode:: MOD - Modulus
.. math::
dst.w = src0.w \bmod src1.w
-XOR - Bitwise Xor
+.. opcode:: XOR - Bitwise Xor
.. math::
- dst.x = src0.x ^ src1.x
+ dst.x = src0.x \oplus src1.x
- dst.y = src0.y ^ src1.y
+ dst.y = src0.y \oplus src1.y
- dst.z = src0.z ^ src1.z
+ dst.z = src0.z \oplus src1.z
- dst.w = src0.w ^ src1.w
+ dst.w = src0.w \oplus src1.w
-SAD - Sum Of Absolute Differences
+.. opcode:: SAD - Sum Of Absolute Differences
.. math::
dst.w = |src0.w - src1.w| + src2.w
-TXF - Texel Fetch
+.. opcode:: TXF - Texel Fetch
TBD
-TXQ - Texture Size Query
+.. opcode:: TXQ - Texture Size Query
TBD
-CONT - Continue
+.. opcode:: CONT - Continue
TBD
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-EMIT - Emit
+.. opcode:: EMIT - Emit
TBD
-ENDPRIM - End Primitive
+.. opcode:: ENDPRIM - End Primitive
TBD
^^^^^^^^^^
-BGNLOOP - Begin a Loop
+.. opcode:: BGNLOOP - Begin a Loop
TBD
-BGNSUB - Begin Subroutine
+.. opcode:: BGNSUB - Begin Subroutine
TBD
-ENDLOOP - End a Loop
+.. opcode:: ENDLOOP - End a Loop
TBD
-ENDSUB - End Subroutine
+.. opcode:: ENDSUB - End Subroutine
TBD
-NOP - No Operation
+.. opcode:: NOP - No Operation
Do nothing.
-NRM4 - 4-component Vector Normalise
-
-.. math::
-
- dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+.. opcode:: NRM4 - 4-component Vector Normalise
- dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+This instruction replicates its result.
- dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+.. math::
- dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+ dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
ps_2_x
^^^^^^^^^^^^
-CALLNZ - Subroutine Call If Not Zero
+.. opcode:: CALLNZ - Subroutine Call If Not Zero
TBD
-IFC - If
+.. opcode:: IFC - If
TBD
-BREAKC - Break Conditional
+.. opcode:: BREAKC - Break Conditional
TBD
+.. _doubleopcodes:
+
+Double Opcodes
+^^^^^^^^^^^^^^^
+
+.. opcode:: DADD - Add Double
+
+.. math::
+
+ dst.xy = src0.xy + src1.xy
+
+ dst.zw = src0.zw + src1.zw
+
+
+.. opcode:: DDIV - Divide Double
+
+.. math::
+
+ dst.xy = src0.xy / src1.xy
+
+ dst.zw = src0.zw / src1.zw
+
+.. opcode:: DSEQ - Set Double on Equal
+
+.. math::
+
+ dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
+
+ dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DSLT - Set Double on Less than
+
+.. math::
+
+ dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
+
+ dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
+
+.. opcode:: DFRAC - Double Fraction
+
+.. math::
+
+ dst.xy = src.xy - \lfloor src.xy\rfloor
+
+ dst.zw = src.zw - \lfloor src.zw\rfloor
+
+
+.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
+
+.. math::
+
+ dst0.xy = frexp(src.xy, dst1.xy)
+
+ dst0.zw = frexp(src.zw, dst1.zw)
+
+.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
+
+.. math::
+
+ dst.xy = ldexp(src0.xy, src1.xy)
+
+ dst.zw = ldexp(src0.zw, src1.zw)
+
+.. opcode:: DMIN - Minimum Double
+
+.. math::
+
+ dst.xy = min(src0.xy, src1.xy)
+
+ dst.zw = min(src0.zw, src1.zw)
+
+.. opcode:: DMAX - Maximum Double
+
+.. math::
+
+ dst.xy = max(src0.xy, src1.xy)
+
+ dst.zw = max(src0.zw, src1.zw)
+
+.. opcode:: DMUL - Multiply Double
+
+.. math::
+
+ dst.xy = src0.xy \times src1.xy
+
+ dst.zw = src0.zw \times src1.zw
+
+
+.. opcode:: DMAD - Multiply And Add Doubles
+
+.. math::
+
+ dst.xy = src0.xy \times src1.xy + src2.xy
+
+ dst.zw = src0.zw \times src1.zw + src2.zw
+
+
+.. opcode:: DRCP - Reciprocal Double
+
+.. math::
+
+ dst.xy = \frac{1}{src.xy}
+
+ dst.zw = \frac{1}{src.zw}
+
+.. opcode:: DSQRT - Square root double
+
+.. math::
+
+ dst.xy = \sqrt{src.xy}
+
+ dst.zw = \sqrt{src.zw}
+
Explanation of symbols used
------------------------------
discard Discard fragment.
- dst First destination register.
+ pc Program counter.
- dst0 First destination register.
+ target Label of target instruction.
- pc Program counter.
- src First source register.
+Other tokens
+---------------
- src0 First source register.
- src1 Second source register.
+Declaration
+^^^^^^^^^^^
- src2 Third source register.
- target Label of target instruction.
+Declares a register that is will be referenced as an operand in Instruction
+tokens.
+File field contains register file that is being declared and is one
+of TGSI_FILE.
-Other tokens
----------------
+UsageMask field specifies which of the register components can be accessed
+and is one of TGSI_WRITEMASK.
+
+Interpolate field is only valid for fragment shader INPUT register files.
+It specifes the way input is being interpolated by the rasteriser and is one
+of TGSI_INTERPOLATE.
+
+If Dimension flag is set to 1, a Declaration Dimension token follows.
+
+If Semantic flag is set to 1, a Declaration Semantic token follows.
+
+CylindricalWrap bitfield is only valid for fragment shader INPUT register
+files. It specifies which register components should be subject to cylindrical
+wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
+is set to 1, the X component should be interpolated according to cylindrical
+wrapping rules.
Declaration Semantic
for the perspective divide, if enabled.
As a vertex shader output, position should be scaled to the viewport. When
-used in fragment shaders, position will ---
-
-XXX --- wait a minute. Should position be in [0,1] for x and y?
+used in fragment shaders, position will be in window coordinates. The convention
+used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
XXX additionally, is there a way to configure the perspective divide? it's
accelerated on most chipsets AFAIK...
"""""""""""""""""""
PSIZE, or point size, is used to specify point sizes per-vertex. It should
- be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum
- size, ``x`` is the maximum size, and ``f`` is the fade threshold.
-
- XXX this is arb_vp. is this what we actually do? should double-check...
+ be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size.
+ Only the first component matters when writing from the vertex shader.
When using this semantic, be sure to set the appropriate state in the
:ref:`rasterizer` first.
""""""""""""""""""""""
XXX no clue
+
+
+Properties
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+ Properties are general directives that apply to the whole TGSI program.
+
+FS_COORD_ORIGIN
+"""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
+The default value is UPPER_LEFT.
+
+If UPPER_LEFT, the position will be (0,0) at the upper left corner and
+increase downward and rightward.
+If LOWER_LEFT, the position will be (0,0) at the lower left corner and
+increase upward and rightward.
+
+OpenGL defaults to LOWER_LEFT, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9/10 use UPPER_LEFT.
+
+FS_COORD_PIXEL_CENTER
+"""""""""""""""""""""
+
+Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
+The default value is HALF_INTEGER.
+
+If HALF_INTEGER, the fractionary part of the position will be 0.5
+If INTEGER, the fractionary part of the position will be 0.0
+
+Note that this does not affect the set of fragments generated by
+rasterization, which is instead controlled by gl_rasterization_rules in the
+rasterizer.
+
+OpenGL defaults to HALF_INTEGER, and is configurable with the
+GL_ARB_fragment_coord_conventions extension.
+
+DirectX 9 uses INTEGER.
+DirectX 10 uses HALF_INTEGER.
+
+
+
+Texture Sampling and Texture Formats
+------------------------------------
+
+This table shows how texture image components are returned as (x,y,z,w) tuples
+by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
+:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
+well.
+
++--------------------+--------------+--------------------+--------------+
+| Texture Components | Gallium | OpenGL | Direct3D 9 |
++====================+==============+====================+==============+
+| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) |
++--------------------+--------------+--------------------+--------------+
+| A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) |
++--------------------+--------------+--------------------+--------------+
+| L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) |
++--------------------+--------------+--------------------+--------------+
+| LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) |
++--------------------+--------------+--------------------+--------------+
+| I | (i, i, i, i) | (i, i, i, i) | N/A |
++--------------------+--------------+--------------------+--------------+
+| UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) |
+| | | [#envmap-bumpmap]_ | |
++--------------------+--------------+--------------------+--------------+
+| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) |
+| | | [#depth-tex-mode]_ | |
++--------------------+--------------+--------------------+--------------+
+
+.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
+.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
+ or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.
#include "brw_context.h"
#include "brw_pipe_rast.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_sf.h"
#include "brw_state.h"
static enum pipe_error upload_sf_prog(struct brw_context *brw)
{
const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
struct brw_sf_prog_key key;
enum pipe_error ret;
unsigned i;
case PIPE_PRIM_TRIANGLES:
/* PIPE_NEW_RAST
*/
- if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL ||
- brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL)
+ if (rast->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ rast->fill_ccw != PIPE_POLYGON_MODE_FILL)
key.primitive = SF_UNFILLED_TRIS;
else
key.primitive = SF_TRIANGLES;
break;
}
- key.do_point_sprite = brw->curr.rast->templ.point_sprite;
- key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */
- key.do_flat_shading = brw->curr.rast->templ.flatshade;
- key.do_twoside_color = brw->curr.rast->templ.light_twoside;
+ key.do_point_sprite = rast->sprite_coord_enable ? 1 : 0;
+ key.sprite_origin_lower_left = (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+ key.point_coord_replace_attrs = rast->sprite_coord_enable;
+ key.do_flat_shading = rast->flatshade;
+ key.do_twoside_color = rast->light_twoside;
if (key.do_twoside_color) {
- key.frontface_ccw = (brw->curr.rast->templ.front_winding ==
- PIPE_WINDING_CCW);
+ key.frontface_ccw = (rast->front_winding == PIPE_WINDING_CCW);
}
if (brw_search_cache(&brw->cache, BRW_SF_PROG,
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "tgsi/tgsi_parse.h"
struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
struct nouveau_stateobj *so = so_new(5, 8, 0);
- if (cso->blend_enable) {
+ if (cso->rt[0].blend_enable) {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
so_data (so, 1);
- so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
- nvgl_blend_func(cso->rgb_src_factor));
- so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
- nvgl_blend_func(cso->rgb_dst_factor));
+ so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rt[0].rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rt[0].rgb_dst_factor));
/* FIXME: Gallium assumes GL_EXT_blend_func_separate.
It is not the case for NV30 */
so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1);
- so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
} else {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1);
so_data (so, 0);
}
so_method(so, rankine, NV34TCL_COLOR_MASK, 1);
- so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
- ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
- ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
- ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+ so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
if (cso->logicop_enable) {
so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
}
so_method(so, rankine, NV34TCL_POINT_SPRITE, 1);
- if (cso->point_sprite) {
+ if (cso->point_quad_rasterization) {
unsigned psctl = (1 << 0), i;
for (i = 0; i < 8; i++) {
- if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ if ((cso->sprite_coord_enable >> i) & 1)
psctl |= (1 << (8 + i));
}
static void
nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv30_context *nv30 = nv30_context(pipe);
- nv30->constbuf[shader] = buf->buffer;
- nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+ nv30->constbuf[shader] = buf;
+ nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
if (shader == PIPE_SHADER_VERTEX) {
nv30->dirty |= NV30_NEW_VERTPROG;
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "draw/draw_context.h"
struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
struct nouveau_stateobj *so = so_new(5, 8, 0);
- if (cso->blend_enable) {
+ if (cso->rt[0].blend_enable) {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
so_data (so, 1);
- so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
- nvgl_blend_func(cso->rgb_src_factor));
- so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
- nvgl_blend_func(cso->rgb_dst_factor));
+ so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rt[0].rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rt[0].rgb_dst_factor));
so_method(so, curie, NV40TCL_BLEND_EQUATION, 1);
- so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 |
- nvgl_blend_eqn(cso->rgb_func));
+ so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rt[0].rgb_func));
} else {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 1);
so_data (so, 0);
}
so_method(so, curie, NV40TCL_COLOR_MASK, 1);
- so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
- ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
- ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
- ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+ so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
if (cso->logicop_enable) {
so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
}
so_method(so, curie, NV40TCL_POINT_SPRITE, 1);
- if (cso->point_sprite) {
+ if (cso->point_quad_rasterization) {
unsigned psctl = (1 << 0), i;
for (i = 0; i < 8; i++) {
- if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ if ((cso->sprite_coord_enable >> i) & 1)
psctl |= (1 << (8 + i));
}
static void
nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv40_context *nv40 = nv40_context(pipe);
- nv40->constbuf[shader] = buf->buffer;
- nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+ nv40->constbuf[shader] = buf;
+ nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
if (shader == PIPE_SHADER_VERTEX) {
nv40->dirty |= NV40_NEW_VERTPROG;
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
int rhw; /* result hw for FP outputs, or interpolant index */
int acc; /* instruction where this reg is last read (first insn == 1) */
+
+ int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */
+ int indirect[2]; /* index into pc->addr, or -1 */
+
+ ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */
};
#define NV50_MOD_NEG 1
int immd_nr;
struct nv50_reg **addr;
int addr_nr;
- uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
+ struct nv50_reg *sysval;
+ int sysval_nr;
struct nv50_reg *temp_temp[16];
struct nv50_program_exec *temp_temp_exec[16];
uint8_t edgeflag_out;
};
+static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *);
+
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
{
reg->hw = hw;
reg->mod = 0;
reg->rhw = -1;
+ reg->vtx = -1;
reg->acc = 0;
+ reg->indirect[0] = reg->indirect[1] = -1;
+ reg->buf_index = (type == P_CONST) ? 1 : 0;
}
static INLINE unsigned
/* remove records of temporary address register values */
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- pc->r_addr[i].rhw = -1;
+ if (pc->r_addr[i].index < 0)
+ pc->r_addr[i].acc = 0;
}
static void
if (reg) {
alloc_reg(pc, reg);
*ri = *reg;
+ reg->indirect[0] = reg->indirect[1] = -1;
reg->mod = 0;
}
return ri;
return FALSE;
}
+static INLINE boolean
+is_control_flow(struct nv50_program_exec *e)
+{
+ return (e->inst[0] & 2);
+}
+
static INLINE void
set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
struct nv50_program_exec *e)
static INLINE void
set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
{
+ assert(a->type == P_ADDR);
+
assert(!(e->inst[0] & 0x0c000000));
assert(!(e->inst[1] & 0x00000004));
e->inst[0] |= (a->hw & 3) << 26;
- e->inst[1] |= (a->hw >> 2) << 2;
+ e->inst[1] |= a->hw & 4;
+}
+
+static void
+emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t);
+
+static void
+emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int);
+
+static void
+emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[1] = 0x40000000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_addr(e, src);
+
+ emit(pc, e);
}
static void
emit(pc, e);
}
-static struct nv50_reg *
-alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
-{
- struct nv50_reg *a_tgsi = NULL, *a = NULL;
- int i;
- uint8_t avail = ~pc->addr_alloc;
-
- if (!ref) {
- /* allocate for TGSI_FILE_ADDRESS */
- while (avail) {
- i = ffs(avail) - 1;
-
- if (pc->r_addr[i].rhw < 0 ||
- pc->r_addr[i].acc != pc->insn_cur) {
- pc->addr_alloc |= (1 << i);
-
- pc->r_addr[i].rhw = -1;
- pc->r_addr[i].index = i;
- return &pc->r_addr[i];
- }
- avail &= ~(1 << i);
- }
- assert(0);
- return NULL;
- }
-
- /* Allocate and set an address reg so we can access 'ref'.
- *
- * If and r_addr->index will be -1 or the hw index the value
- * value in rhw is relative to. If rhw < 0, the reg has not
- * been initialized or is in use for TGSI_FILE_ADDRESS.
- */
- while (avail) { /* only consider regs that are not TGSI */
- i = ffs(avail) - 1;
- avail &= ~(1 << i);
-
- if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
- /* prefer an usused reg with low hw index */
- a = &pc->r_addr[i];
- continue;
- }
- if (!a && pc->r_addr[i].acc != pc->insn_cur)
- a = &pc->r_addr[i];
-
- if (ref->hw - pc->r_addr[i].rhw >= 128)
- continue;
-
- if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
- (ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
- pc->r_addr[i].acc = pc->insn_cur;
- return &pc->r_addr[i];
- }
- }
- assert(a);
-
- if (ref->acc < 0)
- a_tgsi = pc->addr[ref->index];
-
- emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4);
-
- a->rhw = ref->hw & ~0x7f;
- a->acc = pc->insn_cur;
- a->index = a_tgsi ? ref->index : -1;
- return a;
-}
-
#define INTERP_LINEAR 0
#define INTERP_FLAT 1
#define INTERP_PERSPECTIVE 2
e->param.shift = s;
e->param.mask = m << (s % 32);
- if (src->hw > 127)
- set_addr(e, alloc_addr(pc, src));
+ if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */
+ set_addr(e, get_address_reg(pc, src));
else
if (src->acc < 0) {
assert(src->type == P_CONST);
- set_addr(e, pc->addr[src->index]);
+ set_addr(e, pc->addr[src->indirect[0]]);
}
- e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
+ e->inst[1] |= (src->buf_index << 22);
}
/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
if (src->type == P_ATTR) {
set_long(pc, e);
e->inst[1] |= 0x00200000;
+
+ if (src->vtx >= 0) {
+ /* indirect (vertex base + c) load from p[] */
+ e->inst[0] |= 0x01800000;
+ set_addr(e, get_address_reg(pc, src));
+ }
}
alloc_reg(pc, src);
if (src->type == P_ATTR) {
set_long(pc, e);
e->inst[1] |= 0x00200000;
+
+ if (src->vtx >= 0) {
+ e->inst[0] |= 0x01800000; /* src from p[] */
+ set_addr(e, get_address_reg(pc, src));
+ }
} else
if (src->type == P_CONST || src->type == P_IMMD) {
struct nv50_reg *temp = temp_temp(pc, e);
src = temp;
} else
if (src->type == P_CONST || src->type == P_IMMD) {
- assert(!(e->inst[0] & 0x00800000));
- if (e->inst[0] & 0x01000000) {
+ if (e->inst[0] & 0x01800000) {
struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
} else {
+ assert(!(e->inst[0] & 0x00800000));
set_data(pc, src, 0x7f, 16, e);
e->inst[0] |= 0x00800000;
}
src = temp;
} else
if (src->type == P_CONST || src->type == P_IMMD) {
- assert(!(e->inst[0] & 0x01000000));
- if (e->inst[0] & 0x00800000) {
+ if (e->inst[0] & 0x01800000) {
struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
} else {
+ assert(!(e->inst[0] & 0x01000000));
set_data(pc, src, 0x7f, 32+14, e);
e->inst[0] |= 0x01000000;
}
e->inst[0] |= dst->hw << 2;
e->inst[0] |= s << 16; /* shift left */
- set_src_0_restricted(pc, src, e);
+ set_src_0(pc, src, e);
emit(pc, e);
}
+static boolean
+address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r)
+{
+ if (!r)
+ return FALSE;
+
+ if (r->vtx != a->vtx)
+ return FALSE;
+ if (r->vtx >= 0)
+ return (r->indirect[1] == a->indirect[1]);
+
+ if (r->hw < a->rhw || (r->hw - a->rhw) >= 128)
+ return FALSE;
+
+ if (a->index >= 0)
+ return (a->index == r->indirect[0]);
+ return (a->indirect[0] == r->indirect[0]);
+}
+
+static void
+load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *a, int shift)
+{
+ struct nv50_reg mem, *temp;
+
+ ctor_reg(&mem, P_ATTR, -1, dst->vtx);
+
+ assert(dst->type == P_ADDR);
+ if (!a) {
+ emit_arl(pc, dst, &mem, 0);
+ return;
+ }
+ temp = alloc_temp(pc, NULL);
+
+ if (shift) {
+ emit_mov_from_addr(pc, temp, a);
+ if (shift < 0)
+ emit_shl_imm(pc, temp, temp, shift);
+ emit_arl(pc, dst, temp, MAX2(shift, 0));
+ }
+ emit_mov(pc, temp, &mem);
+ set_addr(pc->p->exec_tail, dst);
+
+ emit_arl(pc, dst, temp, 0);
+ free_temp(pc, temp);
+}
+
+/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS
+ * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX
+ * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX
+ * case (vtx < 0, acc >= 0): memory address too high to encode
+ * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS
+ */
+static struct nv50_reg *
+get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+ int i;
+ struct nv50_reg *a_ref, *a = NULL;
+
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+ if (pc->r_addr[i].acc == 0)
+ a = &pc->r_addr[i]; /* an unused address reg */
+ else
+ if (address_reg_suitable(&pc->r_addr[i], ref)) {
+ pc->r_addr[i].acc = pc->insn_cur;
+ return &pc->r_addr[i];
+ } else
+ if (!a && pc->r_addr[i].index < 0 &&
+ pc->r_addr[i].acc < pc->insn_cur)
+ a = &pc->r_addr[i];
+ }
+ if (!a) {
+ /* We'll be able to spill address regs when this
+ * mess is replaced with a proper compiler ...
+ */
+ NOUVEAU_ERR("out of address regs\n");
+ abort();
+ return NULL;
+ }
+
+ /* initialize and reserve for this TGSI instruction */
+ a->rhw = 0;
+ a->index = a->indirect[0] = a->indirect[1] = -1;
+ a->acc = pc->insn_cur;
+
+ if (!ref) {
+ a->vtx = -1;
+ return a;
+ }
+ a->vtx = ref->vtx;
+
+ /* now put in the correct value ... */
+
+ if (ref->vtx >= 0) {
+ a->indirect[1] = ref->indirect[1];
+
+ /* For an indirect vertex index, we need to shift address right
+ * by 2, the address register will contain vtx * 16, we need to
+ * load from a[vtx * 4].
+ */
+ load_vertex_base(pc, a, (ref->acc < 0) ?
+ pc->addr[ref->indirect[1]] : NULL, -2);
+ } else {
+ assert(ref->acc < 0 || ref->indirect[0] < 0);
+
+ a->rhw = ref->hw & ~0x7f;
+ a->indirect[0] = ref->indirect[0];
+ a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL;
+
+ emit_add_addr_imm(pc, a, a_ref, a->rhw * 4);
+ }
+ return a;
+}
+
#define NV50_MAX_F32 0x880
#define NV50_MAX_S32 0x08c
#define NV50_MAX_U32 0x084
emit_control_flow(pc, 0x3, pred, cc);
}
+static void
+emit_prim_cmd(struct nv50_pc *pc, unsigned cmd)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000 | (cmd << 9);
+ e->inst[1] = 0xc0000000;
+ set_long(pc, e);
+
+ emit(pc, e);
+}
+
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
{
struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
if (!r) {
- r = alloc_addr(pc, NULL);
- pc->addr[dst->Register.Index * 4 + c] = r;
+ r = get_address_reg(pc, NULL);
+ r->index = dst->Register.Index * 4 + c;
+ pc->addr[r->index] = r;
}
assert(r);
return r;
}
case TGSI_FILE_NULL:
return NULL;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(pc->sysval[dst->Register.Index].type == P_RESULT);
+ assert(c == 0);
+ return &pc->sysval[dst->Register.Index];
default:
break;
}
switch (src->Register.File) {
case TGSI_FILE_INPUT:
r = &pc->attr[src->Register.Index * 4 + c];
+
+ if (!src->Dimension.Dimension)
+ break;
+ r = reg_instance(pc, r);
+ r->vtx = src->Dimension.Index;
+
+ if (!src->Dimension.Indirect)
+ break;
+ swz = tgsi_util_get_src_register_swizzle(
+ &src->DimIndirect, 0);
+ r->acc = -1;
+ r->indirect[1] = src->DimIndirect.Index * 4 + swz;
break;
case TGSI_FILE_TEMPORARY:
r = &pc->temp[src->Register.Index * 4 + c];
* use the index field to select the address reg.
*/
r = reg_instance(pc, NULL);
+ ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c);
+
swz = tgsi_util_get_src_register_swizzle(
- &src->Indirect, 0);
- ctor_reg(r, P_CONST,
- src->Indirect.Index * 4 + swz,
- src->Register.Index * 4 + c);
+ &src->Indirect, 0);
r->acc = -1;
+ r->indirect[0] = src->Indirect.Index * 4 + swz;
break;
case TGSI_FILE_IMMEDIATE:
r = &pc->immd[src->Register.Index * 4 + c];
r = pc->addr[src->Register.Index * 4 + c];
assert(r);
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(c == 0);
+ r = &pc->sysval[src->Register.Index];
+ break;
default:
assert(0);
break;
r->mod |= mod & NV50_MOD_I32;
assert(r);
- if (r->acc >= 0 && r != temp)
+ if (r->acc >= 0 && r->vtx < 0 && r != temp)
return reg_instance(pc, r); /* will clear r->mod */
return r;
}
}
break;
case TGSI_OPCODE_ARL:
- assert(src[0][0]);
temp = temp_temp(pc, NULL);
- emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
- emit_arl(pc, dst[0], temp, 4);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, temp, src[0][c], -1,
+ CVT_FLOOR | CVT_S32_F32);
+ emit_arl(pc, dst[c], temp, 4);
+ }
break;
case TGSI_OPCODE_BGNLOOP:
pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_EMIT:
+ emit_prim_cmd(pc, 1);
+ break;
case TGSI_OPCODE_ENDIF:
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_ENDPRIM:
+ emit_prim_cmd(pc, 2);
+ break;
case TGSI_OPCODE_ENDSUB:
assert(pc->in_subroutine);
+ terminate_mbb(pc);
pc->in_subroutine = FALSE;
break;
case TGSI_OPCODE_EX2:
if (!is_long(pc->p->exec_tail))
convert_to_long(pc, pc->p->exec_tail);
else
- if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+ if (is_immd(pc->p->exec_tail) ||
+ is_join(pc->p->exec_tail) ||
+ is_control_flow(pc->p->exec_tail))
emit_nop(pc);
pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
+
+ terminate_mbb(pc);
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
static unsigned
nv50_revdep_reorder(unsigned m[4], unsigned rdep[4])
{
- unsigned i, c, x, unsafe;
+ unsigned i, c, x, unsafe = 0;
for (c = 0; c < 4; c++)
m[c] = c;
* value of 0 for back-facing, and 0xffffffff for front-facing.
*/
static void
-load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a)
+load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv)
{
- struct nv50_reg *one = alloc_immd(pc, 1.0f);
+ struct nv50_reg *temp = alloc_temp(pc, NULL);
+ int r_pred = 0;
- assert(a->rhw == -1);
- alloc_reg(pc, a); /* do this before rhw is set */
- a->rhw = 255;
- load_interpolant(pc, a);
- emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND);
+ temp->rhw = 255;
+ emit_interp(pc, temp, NULL, INTERP_FLAT);
- FREE(one);
+ emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32);
+
+ emit_not(pc, temp, temp);
+ set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
+ emit_cvt(pc, sv, temp, -1, CVT_F32_S32);
+ set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
+
+ free_temp(pc, temp);
+}
+
+static void
+load_instance_id(struct nv50_pc *pc, unsigned index)
+{
+ struct nv50_reg reg, mem;
+
+ ctor_reg(®, P_TEMP, -1, -1);
+ ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */
+ mem.buf_index = 2;
+
+ emit_add_b32(pc, ®, &pc->sysval[index], &mem);
+ pc->sysval[index] = reg;
+}
+
+static void
+copy_semantic_info(struct nv50_program *p)
+{
+ unsigned i, id;
+
+ for (i = 0; i < p->cfg.in_nr; ++i) {
+ id = p->cfg.in[i].id;
+ p->cfg.in[i].sn = p->info.input_semantic_name[id];
+ p->cfg.in[i].si = p->info.input_semantic_index[id];
+ }
+
+ for (i = 0; i < p->cfg.out_nr; ++i) {
+ id = p->cfg.out[i].id;
+ p->cfg.out[i].sn = p->info.output_semantic_name[id];
+ p->cfg.out[i].si = p->info.output_semantic_index[id];
+ }
}
static boolean
struct tgsi_parse_context tp;
struct nv50_program *p = pc->p;
boolean ret = FALSE;
- unsigned i, c, flat_nr = 0;
+ unsigned i, c, instance_id, vertex_id, flat_nr = 0;
tgsi_parse_init(&tp, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&tp)) {
switch (d->Semantic.Name) {
case TGSI_SEMANTIC_BCOLOR:
p->cfg.two_side[si].hw = first;
- if (p->cfg.io_nr > first)
- p->cfg.io_nr = first;
+ if (p->cfg.out_nr > first)
+ p->cfg.out_nr = first;
break;
case TGSI_SEMANTIC_PSIZE:
p->cfg.psiz = first;
- if (p->cfg.io_nr > first)
- p->cfg.io_nr = first;
+ if (p->cfg.out_nr > first)
+ p->cfg.out_nr = first;
break;
case TGSI_SEMANTIC_EDGEFLAG:
pc->edgeflag_out = first;
pc->interp_mode[i] = mode;
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(d->Declaration.Semantic);
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_FACE:
+ assert(p->type == PIPE_SHADER_FRAGMENT);
+ load_frontfacing(pc,
+ &pc->sysval[first]);
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ assert(p->type == PIPE_SHADER_VERTEX);
+ instance_id = first;
+ p->cfg.regs[0] |= (1 << 4);
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ assert(p->type != PIPE_SHADER_VERTEX);
+ p->cfg.prim_id = first;
+ break;
+ /*
+ case TGSI_SEMANTIC_PRIMIDIN:
+ assert(p->type == PIPE_SHADER_GEOMETRY);
+ pc->sysval[first].hw = 6;
+ p->cfg.regs[0] |= (1 << 8);
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ assert(p->type == PIPE_SHADER_VERTEX);
+ vertex_id = first;
+ p->cfg.regs[0] |= (1 << 12) | (1 << 0);
+ break;
+ */
+ }
+ break;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
}
}
- if (p->type == PIPE_SHADER_VERTEX) {
+ if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) {
int rid = 0;
- for (i = 0; i < pc->attr_nr * 4; ++i) {
- if (pc->attr[i].acc) {
- pc->attr[i].hw = rid++;
- p->cfg.attr[i / 32] |= 1 << (i % 32);
+ if (p->type == PIPE_SHADER_GEOMETRY) {
+ for (i = 0; i < pc->attr_nr; ++i) {
+ p->cfg.in[i].hw = rid;
+ p->cfg.in[i].id = i;
+
+ for (c = 0; c < 4; ++c) {
+ int n = i * 4 + c;
+ if (!pc->attr[n].acc)
+ continue;
+ pc->attr[n].hw = rid++;
+ p->cfg.in[i].mask |= 1 << c;
+ }
+ }
+ } else {
+ for (i = 0; i < pc->attr_nr * 4; ++i) {
+ if (pc->attr[i].acc) {
+ pc->attr[i].hw = rid++;
+ p->cfg.attr[i / 32] |= 1 << (i % 32);
+ }
+ }
+ if (p->cfg.regs[0] & (1 << 0))
+ pc->sysval[vertex_id].hw = rid++;
+ if (p->cfg.regs[0] & (1 << 4)) {
+ pc->sysval[instance_id].hw = rid++;
+ load_instance_id(pc, instance_id);
}
}
for (i = 0, rid = 0; i < pc->result_nr; ++i) {
- p->cfg.io[i].hw = rid;
- p->cfg.io[i].id = i;
+ p->cfg.out[i].hw = rid;
+ p->cfg.out[i].id = i;
for (c = 0; c < 4; ++c) {
int n = i * 4 + c;
if (!pc->result[n].acc)
continue;
pc->result[n].hw = rid++;
- p->cfg.io[i].mask |= 1 << c;
+ p->cfg.out[i].mask |= 1 << c;
}
}
+ if (p->cfg.prim_id < 0x40) {
+ /* GP has to write to PrimitiveID */
+ ctor_reg(&pc->sysval[p->cfg.prim_id],
+ P_RESULT, p->cfg.prim_id, rid);
+ p->cfg.prim_id = rid++;
+ }
for (c = 0; c < 2; ++c)
if (p->cfg.two_side[c].hw < 0x40)
- p->cfg.two_side[c] = p->cfg.io[
+ p->cfg.two_side[c] = p->cfg.out[
p->cfg.two_side[c].hw];
if (p->cfg.psiz < 0x40)
- p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw;
+ p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw;
+
+ copy_semantic_info(p);
} else
if (p->type == PIPE_SHADER_FRAGMENT) {
int rid, aid;
pc->allow32 = TRUE;
- int base = (TGSI_SEMANTIC_POSITION ==
- p->info.input_semantic_name[0]) ? 0 : 1;
+ /* do we read FragCoord ? */
+ if (pc->attr_nr &&
+ p->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ /* select FCRD components we want accessible */
+ for (c = 0; c < 4; ++c)
+ if (pc->attr[c].acc)
+ p->cfg.regs[1] |= 1 << (24 + c);
+ aid = 0;
+ } else /* offset by 1 if FCRD.w is needed for pinterp */
+ aid = popcnt4(p->cfg.regs[1] >> 24);
/* non-flat interpolants have to be mapped to
* the lower hardware IDs, so sort them:
*/
for (i = 0; i < pc->attr_nr; i++) {
if (pc->interp_mode[i] == INTERP_FLAT)
- p->cfg.io[m++].id = i;
+ p->cfg.in[m++].id = i;
else {
if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
- p->cfg.io[n].linear = TRUE;
- p->cfg.io[n++].id = i;
+ p->cfg.in[n].linear = TRUE;
+ p->cfg.in[n++].id = i;
}
}
-
- if (!base) /* set w-coordinate mask from perspective interp */
- p->cfg.io[0].mask |= p->cfg.regs[1] >> 24;
-
- aid = popcnt4( /* if fcrd isn't contained in cfg.io */
- base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask);
+ copy_semantic_info(p);
for (n = 0; n < pc->attr_nr; ++n) {
- p->cfg.io[n].hw = rid = aid;
- i = p->cfg.io[n].id;
+ p->cfg.in[n].hw = rid = aid;
+ i = p->cfg.in[n].id;
if (p->info.input_semantic_name[n] ==
TGSI_SEMANTIC_FACE) {
if (!pc->attr[i * 4 + c].acc)
continue;
pc->attr[i * 4 + c].rhw = rid++;
- p->cfg.io[n].mask |= 1 << c;
+ p->cfg.in[n].mask |= 1 << c;
load_interpolant(pc, &pc->attr[i * 4 + c]);
}
- aid += popcnt4(p->cfg.io[n].mask);
+ aid += popcnt4(p->cfg.in[n].mask);
}
- if (!base)
- p->cfg.regs[1] |= p->cfg.io[0].mask << 24;
-
m = popcnt4(p->cfg.regs[1] >> 24);
/* set count of non-position inputs and of non-flat
p->cfg.regs[1] |= aid - m;
if (flat_nr) {
- i = p->cfg.io[pc->attr_nr - flat_nr].hw;
+ i = p->cfg.in[pc->attr_nr - flat_nr].hw;
p->cfg.regs[1] |= (i - m) << 16;
} else
p->cfg.regs[1] |= p->cfg.regs[1] << 16;
/* mark color semantic for light-twoside */
- n = 0x40;
- for (i = 0; i < pc->attr_nr; i++) {
- ubyte si, sn;
-
- sn = p->info.input_semantic_name[p->cfg.io[i].id];
- si = p->info.input_semantic_index[p->cfg.io[i].id];
-
- if (sn == TGSI_SEMANTIC_COLOR) {
- p->cfg.two_side[si] = p->cfg.io[i];
-
- /* increase colour count */
- p->cfg.regs[0] += popcnt4(
- p->cfg.two_side[si].mask) << 16;
-
- n = MIN2(n, p->cfg.io[i].hw - m);
+ n = 0x80;
+ for (i = 0; i < p->cfg.in_nr; i++) {
+ if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) {
+ n = MIN2(n, p->cfg.in[i].hw - m);
+ p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i];
+
+ p->cfg.regs[0] += /* increase colour count */
+ popcnt4(p->cfg.in[i].mask) << 16;
}
}
- if (n < 0x40)
+ if (n < 0x80)
p->cfg.regs[0] += n;
+ if (p->cfg.prim_id < 0x40) {
+ pc->sysval[p->cfg.prim_id].rhw = rid++;
+ emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL,
+ INTERP_FLAT);
+ /* increase FP_INTERPOLANT_CTRL_COUNT */
+ p->cfg.regs[1] += 1;
+ }
+
/* Initialize FP results:
* FragDepth is always first TGSI and last hw output
*/
FREE(pc->attr);
if (pc->temp)
FREE(pc->temp);
+ if (pc->sysval)
+ FREE(pc->sysval);
+ if (pc->insn_pos)
+ FREE(pc->insn_pos);
FREE(pc);
}
+static INLINE uint32_t
+nv50_map_gs_output_prim(unsigned pprim)
+{
+ switch (pprim) {
+ case PIPE_PRIM_POINTS:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
+ case PIPE_PRIM_LINE_STRIP:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+ default:
+ NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim);
+ abort();
+ return 0;
+ }
+}
+
static boolean
ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
{
pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
assert(pc->addr_nr <= 2);
+ pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
p->cfg.high_temp = 4;
p->cfg.two_side[0].hw = 0x40;
p->cfg.two_side[1].hw = 0x40;
+ p->cfg.prim_id = 0x40;
p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+ for (i = 0; i < p->info.num_properties; ++i) {
+ unsigned *data = &p->info.properties[i].data[0];
+
+ switch (p->info.properties[i].name) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ p->cfg.prim_type = nv50_map_gs_output_prim(data[0]);
+ break;
+ case TGSI_PROPERTY_GS_MAX_VERTICES:
+ p->cfg.vert_count = data[0];
+ break;
+ default:
+ break;
+ }
+ }
+
switch (p->type) {
case PIPE_SHADER_VERTEX:
p->cfg.psiz = 0x40;
p->cfg.clpd = 0x40;
- p->cfg.io_nr = pc->result_nr;
+ p->cfg.out_nr = pc->result_nr;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ assert(p->cfg.prim_type);
+ assert(p->cfg.vert_count);
+
+ p->cfg.psiz = 0x80;
+ p->cfg.clpd = 0x80;
+ p->cfg.prim_id = 0x80;
+ p->cfg.out_nr = pc->result_nr;
+ p->cfg.in_nr = pc->attr_nr;
+
+ p->cfg.two_side[0].hw = 0x80;
+ p->cfg.two_side[1].hw = 0x80;
break;
case PIPE_SHADER_FRAGMENT:
rtype[0] = rtype[1] = P_TEMP;
p->cfg.regs[0] = 0x01000004;
- p->cfg.io_nr = pc->attr_nr;
+ p->cfg.in_nr = pc->attr_nr;
if (p->info.writes_z) {
p->cfg.regs[2] |= 0x00000100;
return FALSE;
}
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1);
+ ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
+ if (pc->sysval_nr) {
+ pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *));
+ if (!pc->sysval)
+ return FALSE;
+ /* will only ever use SYSTEM_VALUE[i].x (hopefully) */
+ for (i = 0; i < pc->sysval_nr; ++i)
+ ctor_reg(&pc->sysval[i], rtype[0], i, -1);
+ }
return TRUE;
}
if (p->param_nr) {
unsigned cb;
- uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ uint32_t *map = pipe_buffer_map(pscreen,
+ nv50->constbuf[p->type],
PIPE_BUFFER_USAGE_CPU_READ);
-
- if (p->type == PIPE_SHADER_VERTEX)
+ switch (p->type) {
+ case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break;
+ case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break;
+ default:
cb = NV50_CB_PVP;
- else
- cb = NV50_CB_PFP;
+ assert(p->type == PIPE_SHADER_VERTEX);
+ break;
+ }
nv50_program_upload_data(nv50, map, 0, p->param_nr, cb);
pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]);
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(5, 8, 2);
+ so = so_new(5, 7, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
+ NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
so_data (so, p->cfg.attr[0]);
so_data (so, p->cfg.attr[1]);
so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2);
- so_data (so, p->cfg.high_result); //8);
+ so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, 0); /* program start offset */
so_ref(NULL, &so);
}
+void
+nv50_geomprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->geomprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(6, 7, 2);
+ so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
+ so_data (so, p->cfg.prim_type);
+ so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
+ so_data (so, p->cfg.vert_count);
+ so_method(so, tesla, NV50TCL_GP_START_ID, 1);
+ so_data (so, 0);
+ so_ref(so, &nv50->state.geomprog);
+ so_ref(NULL, &so);
+}
+
static uint32_t
nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
{
+ struct nv50_program *vp;
struct nv50_program *fp = nv50->fragprog;
- struct nv50_program *vp = nv50->vertprog;
unsigned i, c, m = base;
uint32_t origin = 0x00000010;
+ vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog;
+
/* XXX: this might not work correctly in all cases yet - we'll
* just assume that an FP generic input that is not written in
* the VP is PointCoord.
*/
memset(pntc, 0, 8 * sizeof(uint32_t));
- for (i = 0; i < fp->cfg.io_nr; i++) {
- uint8_t sn, si;
- uint8_t j, k = fp->cfg.io[i].id;
- unsigned n = popcnt4(fp->cfg.io[i].mask);
+ for (i = 0; i < fp->cfg.in_nr; i++) {
+ unsigned j, n = popcnt4(fp->cfg.in[i].mask);
- if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) {
+ if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) {
m += n;
continue;
}
- for (j = 0; j < vp->info.num_outputs; ++j) {
- sn = vp->info.output_semantic_name[j];
- si = vp->info.output_semantic_index[j];
-
- if (sn == fp->info.input_semantic_name[k] &&
- si == fp->info.input_semantic_index[k])
+ for (j = 0; j < vp->cfg.out_nr; ++j)
+ if (vp->cfg.out[j].sn == fp->cfg.in[i].sn &&
+ vp->cfg.out[j].si == fp->cfg.in[i].si)
break;
- }
- if (j < vp->cfg.out_nr) {
- ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[
- vp->cfg.out[j].si];
+ if (j < vp->info.num_outputs) {
+ ubyte enable =
- (nv50->rasterizer->pipe.sprite_coord_enable >> si) & 1;
++ (nv50->rasterizer->pipe.sprite_coord_enable >> vp->cfg.out[j].si) & 1;
- if (mode == PIPE_SPRITE_COORD_NONE) {
+ if (enable == 0) {
m += n;
continue;
- } else
- if (mode == PIPE_SPRITE_COORD_LOWER_LEFT)
- origin = 0;
+ }
}
/* this is either PointCoord or replaced by sprite coords */
for (c = 0; c < 4; c++) {
- if (!(fp->cfg.io[i].mask & (1 << c)))
+ if (!(fp->cfg.in[i].mask & (1 << c)))
continue;
pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
++m;
}
}
- return origin;
+ return (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT ? 0 : origin);
}
static int
-nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4],
- struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
+nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
+ struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
{
int c;
uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw;
- uint8_t *map = (uint8_t *)p_map;
+ uint8_t *map = (uint8_t *)map32;
for (c = 0; c < 4; ++c) {
if (mf & 1) {
if (fpi->linear == TRUE)
lin[mid / 32] |= 1 << (mid % 32);
- map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40);
+ if (mv & 1)
+ map[mid] = oid;
+ else
+ map[mid] = (c == 3) ? (zval + 1) : zval;
+ ++mid;
}
oid += mv & 1;
}
void
-nv50_linkage_validate(struct nv50_context *nv50)
+nv50_fp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *vp = nv50->vertprog;
struct nv50_program *fp = nv50->fragprog;
struct nouveau_stateobj *so;
- struct nv50_sreg4 dummy, *vpo;
+ struct nv50_sreg4 dummy;
int i, n, c, m = 0;
- uint32_t map[16], lin[4], reg[5], pcrd[8];
+ uint32_t map[16], lin[4], reg[6], pcrd[8];
+ uint8_t zval = 0x40;
+ if (nv50->geomprog) {
+ vp = nv50->geomprog;
+ zval = 0x80;
+ }
memset(map, 0, sizeof(map));
memset(lin, 0, sizeof(lin));
reg[1] = 0x00000004; /* low and high clip distance map ids */
reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */
reg[3] = 0x00000000; /* point size map id & enable */
+ reg[5] = 0x00000000; /* primitive ID map slot */
reg[0] = fp->cfg.regs[0]; /* colour semantic reg */
reg[4] = fp->cfg.regs[1]; /* interpolant info */
dummy.linear = FALSE;
dummy.mask = 0xf; /* map all components of HPOS */
- m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]);
+ m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]);
dummy.mask = 0x0;
if (vp->cfg.clpd < 0x40) {
- for (c = 0; c < vp->cfg.clpd_nr; ++c)
- map[m++] = vp->cfg.clpd + c;
+ for (c = 0; c < vp->cfg.clpd_nr; ++c) {
+ map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8);
+ ++m;
+ }
reg[1] = (m << 8);
}
/* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
if (nv50->rasterizer->pipe.light_twoside) {
- vpo = &vp->cfg.two_side[0];
+ struct nv50_sreg4 *vpo = &vp->cfg.two_side[0];
+ struct nv50_sreg4 *fpi = &fp->cfg.two_side[0];
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]);
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]);
+ m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]);
+ m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]);
}
reg[0] += m - 4; /* adjust FFC0 id */
reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
- for (i = 0; i < fp->cfg.io_nr; i++) {
- ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id];
- ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id];
-
- /* position must be mapped first */
- assert(i == 0 || sn != TGSI_SEMANTIC_POSITION);
-
+ for (i = 0; i < fp->cfg.in_nr; i++) {
/* maybe even remove these from cfg.io */
- if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE)
+ if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION ||
+ fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE)
continue;
- /* VP outputs and vp->cfg.io are in the same order */
- for (n = 0; n < vp->info.num_outputs; ++n) {
- if (vp->info.output_semantic_name[n] == sn &&
- vp->info.output_semantic_index[n] == si)
+ for (n = 0; n < vp->cfg.out_nr; ++n)
+ if (vp->cfg.out[n].sn == fp->cfg.in[i].sn &&
+ vp->cfg.out[n].si == fp->cfg.in[i].si)
break;
- }
- vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy;
- m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
+ m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i],
+ (n < vp->cfg.out_nr) ?
+ &vp->cfg.out[n] : &dummy);
+ }
+ /* PrimitiveID either is replaced by the system value, or
+ * written by the geometry shader into an output register
+ */
+ if (fp->cfg.prim_id < 0x40) {
+ map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8);
+ reg[5] = m++;
}
if (nv50->rasterizer->pipe.point_size_per_vertex) {
reg[3] = (m++ << 4) | 1;
}
- /* now fill the stateobj */
- so = so_new(7, 57, 0);
+ /* now fill the stateobj (at most 28 so_data) */
+ so = so_new(10, 54, 0);
n = (m + 3) / 4;
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
- so_datap (so, map, n);
+ assert(m <= 32);
+ if (vp->type == PIPE_SHADER_GEOMETRY) {
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ } else {
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->cfg.regs[0]);
+
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
+ so_data (so, reg[5]);
+
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ }
so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
so_datap (so, reg, 4);
so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
so_datap (so, lin, 4);
- if (nv50->rasterizer->pipe.point_sprite) {
+ if (nv50->rasterizer->pipe.sprite_coord_enable) {
so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
so_data (so,
nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff));
so_datap (so, pcrd, 8);
}
- so_ref(so, &nv50->state.programs);
- so_ref(NULL, &so);
+ so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
+ so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
+
+ so_ref(so, &nv50->state.fp_linkage);
+ so_ref(NULL, &so);
+}
+
+static int
+construct_vp_gp_mapping(uint32_t *map32, int m,
+ struct nv50_program *vp, struct nv50_program *gp)
+{
+ uint8_t *map = (uint8_t *)map32;
+ int i, j, c;
+
+ for (i = 0; i < gp->cfg.in_nr; ++i) {
+ uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask;
+
+ for (j = 0; j < vp->cfg.out_nr; ++j) {
+ if (vp->cfg.out[j].sn == gp->cfg.in[i].sn &&
+ vp->cfg.out[j].si == gp->cfg.in[i].si) {
+ mv = vp->cfg.out[j].mask;
+ oid = vp->cfg.out[j].hw;
+ break;
+ }
+ }
+
+ for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
+ if (mg & mv & 1)
+ map[m++] = oid;
+ else
+ if (mg & 1)
+ map[m++] = (c == 3) ? 0x41 : 0x40;
+ oid += mv & 1;
+ }
+ }
+ return m;
+}
+
+void
+nv50_gp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *gp = nv50->geomprog;
+ uint32_t map[16];
+ int m = 0;
+
+ if (!gp) {
+ so_ref(NULL, &nv50->state.gp_linkage);
+ return;
+ }
+ memset(map, 0, sizeof(map));
+
+ m = construct_vp_gp_mapping(map, m, vp, gp);
+
+ so = so_new(3, 24 - 3, 0);
+
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]);
+
+ assert(m <= 32);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+
+ m = (m + 3) / 4;
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
+ so_datap (so, map, m);
+
+ so_ref(so, &nv50->state.gp_linkage);
+ so_ref(NULL, &so);
}
void
nouveau_bo_ref(NULL, &p->bo);
+ FREE(p->immd);
nouveau_resource_free(&p->data[0]);
p->translated = 0;
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "tgsi/tgsi_parse.h"
#include "nouveau/nouveau_stateobj.h"
+static INLINE uint32_t
+nv50_colormask(unsigned mask)
+{
+ uint32_t cmask = 0;
+
+ if (mask & PIPE_MASK_R)
+ cmask |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ cmask |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ cmask |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ cmask |= 0x1000;
+
+ return cmask;
+}
+
static void *
nv50_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
struct nouveau_stateobj *so = so_new(5, 24, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
- unsigned cmask = 0, i;
+ unsigned i, blend_enabled = 0;
/*XXX ignored:
* - dither
*/
- if (cso->blend_enable == 0) {
- so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ if (cso->independent_blend_enable) {
+ for (i = 0; i < 8; ++i) {
+ so_data(so, cso->rt[i].blend_enable);
+ if (cso->rt[i].blend_enable)
+ blend_enabled = 1;
+ }
+ } else
+ if (cso->rt[0].blend_enable) {
+ blend_enabled = 1;
for (i = 0; i < 8; i++)
- so_data(so, 0);
+ so_data(so, 1);
} else {
- so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
for (i = 0; i < 8; i++)
- so_data(so, 1);
+ so_data(so, 0);
+ }
+ if (blend_enabled) {
so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
- so_data (so, nvgl_blend_eqn(cso->rgb_func));
- so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor));
- so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor));
- so_data (so, nvgl_blend_eqn(cso->alpha_func));
- so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor));
+ so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_src_factor));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_dst_factor));
+ so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_src_factor));
so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
- so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_dst_factor));
}
if (cso->logicop_enable == 0 ) {
so_data (so, nvgl_logicop_func(cso->logicop_func));
}
- if (cso->colormask & PIPE_MASK_R)
- cmask |= (1 << 0);
- if (cso->colormask & PIPE_MASK_G)
- cmask |= (1 << 4);
- if (cso->colormask & PIPE_MASK_B)
- cmask |= (1 << 8);
- if (cso->colormask & PIPE_MASK_A)
- cmask |= (1 << 12);
so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8);
- for (i = 0; i < 8; i++)
- so_data(so, cmask);
+ if (cso->independent_blend_enable)
+ for (i = 0; i < 8; ++i)
+ so_data(so, nv50_colormask(cso->rt[i].colormask));
+ else {
+ uint32_t cmask = nv50_colormask(cso->rt[0].colormask);
+ for (i = 0; i < 8; i++)
+ so_data(so, cmask);
+ }
bso->pipe = *cso;
so_ref(so, &bso->so);
so_data (so, fui(cso->point_size));
so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1);
- so_data (so, cso->point_sprite);
+ so_data (so, cso->point_quad_rasterization ? 1 : 0);
so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
if (cso->front_winding == PIPE_WINDING_CCW) {
struct nv50_program *p = hwcso;
nv50_program_destroy(nv50, p);
- FREE((void*)p->pipe.tokens);
+ FREE((void *)p->pipe.tokens);
FREE(p);
}
struct nv50_program *p = hwcso;
nv50_program_destroy(nv50, p);
- FREE((void*)p->pipe.tokens);
+ FREE((void *)p->pipe.tokens);
+ FREE(p);
+}
+
+static void *
+nv50_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_GEOMETRY;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_GEOMPROG;
+}
+
+static void
+nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void *)p->pipe.tokens);
FREE(p);
}
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
- const struct pipe_constant_buffer *buf )
+ struct pipe_buffer *buf )
{
struct nv50_context *nv50 = nv50_context(pipe);
if (shader == PIPE_SHADER_VERTEX) {
- nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
+ nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
nv50->dirty |= NV50_NEW_VERTPROG_CB;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
+ nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
nv50->dirty |= NV50_NEW_FRAGPROG_CB;
+ } else
+ if (shader == PIPE_SHADER_GEOMETRY) {
+ nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
+ nv50->dirty |= NV50_NEW_GEOMPROG_CB;
}
}
nv50->pipe.bind_fs_state = nv50_fp_state_bind;
nv50->pipe.delete_fs_state = nv50_fp_state_delete;
+ nv50->pipe.create_gs_state = nv50_gp_state_create;
+ nv50->pipe.bind_gs_state = nv50_gp_state_bind;
+ nv50->pipe.delete_gs_state = nv50_gp_state_delete;
+
nv50->pipe.set_blend_color = nv50_set_blend_color;
nv50->pipe.set_clip_state = nv50_set_clip_state;
nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
#include "tgsi/tgsi_parse.h"
#include "pipe/p_config.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "r300_context.h"
#include "r300_reg.h"
dstA == PIPE_BLENDFACTOR_ONE);
}
+static unsigned bgra_cmask(unsigned mask)
+{
+ /* Gallium uses RGBA color ordering while R300 expects BGRA. */
+
+ return ((mask & PIPE_MASK_R) << 2) |
+ ((mask & PIPE_MASK_B) >> 2) |
+ (mask & (PIPE_MASK_G | PIPE_MASK_A));
+}
+
/* Create a new blend state based on the CSO blend state.
*
* This encompasses alpha blending, logic/raster ops, and blend dithering. */
static void* r300_create_blend_state(struct pipe_context* pipe,
const struct pipe_blend_state* state)
{
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
- if (state->blend_enable)
+ if (state->rt[0].blend_enable)
{
- unsigned eqRGB = state->rgb_func;
- unsigned srcRGB = state->rgb_src_factor;
- unsigned dstRGB = state->rgb_dst_factor;
+ unsigned eqRGB = state->rt[0].rgb_func;
+ unsigned srcRGB = state->rt[0].rgb_src_factor;
+ unsigned dstRGB = state->rt[0].rgb_dst_factor;
- unsigned eqA = state->alpha_func;
- unsigned srcA = state->alpha_src_factor;
- unsigned dstA = state->alpha_dst_factor;
+ unsigned eqA = state->rt[0].alpha_func;
+ unsigned srcA = state->rt[0].alpha_src_factor;
+ unsigned dstA = state->rt[0].alpha_dst_factor;
/* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
* this is just the crappy D3D naming */
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
}
- /* Color Channel Mask */
- if (state->colormask & PIPE_MASK_R) {
- blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0;
- }
- if (state->colormask & PIPE_MASK_G) {
- blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0;
- }
- if (state->colormask & PIPE_MASK_B) {
- blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0;
- }
- if (state->colormask & PIPE_MASK_A) {
- blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0;
+ /* Color channel masks for all MRTs. */
+ blend->color_channel_mask = bgra_cmask(state->rt[0].colormask);
+ if (r300screen->caps->is_r500 && state->independent_blend_enable) {
+ if (state->rt[1].blend_enable) {
+ blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4;
+ }
+ if (state->rt[2].blend_enable) {
+ blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8;
+ }
+ if (state->rt[3].blend_enable) {
+ blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12;
+ }
}
if (state->dither) {
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_color_state* state =
(struct r300_blend_color_state*)r300->blend_color_state.state;
union util_color uc;
float_to_fixed10(color->color[2]) |
(float_to_fixed10(color->color[1]) << 16);
+ r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2;
r300->blend_color_state.dirty = TRUE;
}
if (r300_screen(pipe->screen)->caps->has_tcl) {
memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state));
- r300->clip_state.dirty = TRUE;
+ r300->clip_state.size = 29;
} else {
draw_flush(r300->draw);
draw_set_clip_state(r300->draw, state);
+ r300->clip_state.size = 2;
}
+
+ r300->clip_state.dirty = TRUE;
}
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
(r300_translate_stencil_op(state->stencil[1].zfail_op) <<
R300_S_BACK_ZFAIL_OP_SHIFT);
- /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
if (caps->is_r500)
{
dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
r300_translate_alpha_function(state->alpha.func) |
R300_FG_ALPHA_FUNC_ENABLE;
- /* XXX figure out why emitting 10bit alpha ref causes CS to dump */
- /* always use 8bit alpha ref */
+ /* We could use 10bit alpha ref but who needs that? */
dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
if (caps->is_r500)
void* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
r300->dsa_state.state = state;
+ r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6;
r300->dsa_state.dirty = TRUE;
}
FREE(state);
}
-static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
- struct r300_scissor_regs *scissor,
- boolean is_r500)
-{
- if (is_r500) {
- scissor->top_left =
- (state->minx << R300_SCISSORS_X_SHIFT) |
- (state->miny << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
- ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
- } else {
- /* Offset of 1440 in non-R500 chipsets. */
- scissor->top_left =
- ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
- ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
- (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
- }
-}
-
static void
r300_set_framebuffer_state(struct pipe_context* pipe,
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_scissor_state* scissor =
- (struct r300_scissor_state*)r300->scissor_state.state;
- struct pipe_scissor_state pscissor;
+ uint32_t zbuffer_bpp = 0;
+
+ r300->fb_state.size = (10 * state->nr_cbufs) +
+ (2 * (4 - state->nr_cbufs)) +
+ (state->zsbuf ? 10 : 0) + 6;
+
+ if (state->nr_cbufs > 4) {
+ debug_printf("r300: Implementation error: Too many MRTs in %s, "
+ "refusing to bind framebuffer state!\n", __FUNCTION__);
+ return;
+ }
if (r300->draw) {
draw_flush(r300->draw);
}
- r300->framebuffer_state = *state;
-
- /* XXX Arg. This is silly. */
- pscissor.minx = pscissor.miny = 0;
- pscissor.maxx = state->width;
- pscissor.maxy = state->height;
- r300_set_scissor_regs(&pscissor, &scissor->framebuffer,
- r300_screen(r300->context.screen)->caps->is_r500);
+ memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
/* Don't rely on the order of states being set for the first time. */
- r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
-
+ /* XXX wait what */
r300->blend_state.dirty = TRUE;
r300->dsa_state.dirty = TRUE;
+ r300->fb_state.dirty = TRUE;
r300->scissor_state.dirty = TRUE;
+
+ /* Polygon offset depends on the zbuffer bit depth. */
+ if (state->zsbuf && r300->polygon_offset_enabled) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
+ case 2:
+ zbuffer_bpp = 16;
+ break;
+ case 4:
+ zbuffer_bpp = 24;
+ break;
+ }
+
+ if (r300->zbuffer_bpp != zbuffer_bpp) {
+ r300->zbuffer_bpp = zbuffer_bpp;
+ r300->rs_state.dirty = TRUE;
+ }
+ }
}
/* Create fragment shader state. */
r300_pick_fragment_shader(r300);
if (r300->vs && r300_vertex_shader_setup_wpos(r300)) {
- r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+ r300->vertex_format_state.dirty = TRUE;
}
r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
rs->point_size = pack_float_16_6x(state->point_size) |
(pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
- rs->point_minmax =
- ((int)(state->point_size_min * 6.0) <<
+ /* set hw limits - clamping done by state tracker in vs or point_size
+ XXX always need to emit this? */
+ rs->point_minmax =
+ ((int)(0.0 * 6.0) <<
R300_GA_POINT_MINMAX_MIN_SHIFT) |
- ((int)(state->point_size_max * 6.0) <<
+ ((int)(4096.0 * 6.0) <<
R300_GA_POINT_MINMAX_MAX_SHIFT);
rs->line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
- /* XXX I think there is something wrong with the polygon mode,
- * XXX re-test when r300g is in a better shape */
-
/* Enable polygon mode */
if (state->fill_cw != PIPE_POLYGON_MODE_FILL ||
state->fill_ccw != PIPE_POLYGON_MODE_FILL) {
}
if (rs->polygon_offset_enable) {
- rs->depth_offset_front = rs->depth_offset_back =
- fui(state->offset_units);
- rs->depth_scale_front = rs->depth_scale_back =
- fui(state->offset_scale);
+ rs->depth_offset = state->offset_units;
+ rs->depth_scale = state->offset_scale;
}
if (state->line_stipple_enable) {
draw_set_rasterizer_state(r300->draw, &rs->rs);
}
- r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport;
+ if (rs) {
+ r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport;
+ r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw;
+ } else {
+ r300->tcl_bypass = FALSE;
+ r300->polygon_offset_enabled = FALSE;
+ }
r300->rs_state.state = rs;
r300->rs_state.dirty = TRUE;
r300->viewport_state.dirty = TRUE;
/* XXX Clean these up when we move to atom emits */
- r300->dirty_state |= R300_NEW_RS_BLOCK;
if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
}
const struct pipe_scissor_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_scissor_state* scissor =
- (struct r300_scissor_state*)r300->scissor_state.state;
- r300_set_scissor_regs(state, &scissor->scissor,
- r300_screen(r300->context.screen)->caps->is_r500);
+ memcpy(r300->scissor_state.state, state,
+ sizeof(struct pipe_scissor_state));
r300->scissor_state.dirty = TRUE;
}
draw_set_vertex_buffers(r300->draw, count, buffers);
}
- r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+ r300->vertex_format_state.dirty = TRUE;
+}
+
+static boolean r300_validate_aos(struct r300_context *r300)
+{
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->vertex_element;
+ int i;
+
+ /* Check if formats and strides are aligned to the size of DWORD. */
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
+ util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
}
static void r300_set_vertex_elements(struct pipe_context* pipe,
draw_flush(r300->draw);
draw_set_vertex_elements(r300->draw, count, elements);
}
+
+ if (!r300_validate_aos(r300)) {
+ /* XXX We should fallback using draw. */
+ assert(0);
+ abort();
+ }
}
static void* r300_create_vs_state(struct pipe_context* pipe,
r300_vertex_shader_setup_wpos(r300);
}
+ r300->vertex_format_state.dirty = TRUE;
+
r300->dirty_state |=
- R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS |
- R300_NEW_VERTEX_FORMAT;
+ R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
} else {
draw_flush(r300->draw);
draw_bind_vertex_shader(r300->draw,
static void r300_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ struct pipe_buffer *buf)
{
struct r300_context* r300 = r300_context(pipe);
void *mapped;
- if (buf == NULL || buf->buffer->size == 0 ||
- (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
+ if (buf == NULL || buf->size == 0 ||
+ (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
{
r300->shader_constants[shader].count = 0;
return;
}
- assert((buf->buffer->size % 4 * sizeof(float)) == 0);
- memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size);
- r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float));
- pipe_buffer_unmap(pipe->screen, buf->buffer);
+ assert((buf->size % 4 * sizeof(float)) == 0);
+ memcpy(r300->shader_constants[shader].constants, mapped, buf->size);
+ r300->shader_constants[shader].count = buf->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pipe->screen, buf);
if (shader == PIPE_SHADER_VERTEX)
r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
**************************************************************************/
#include "sp_video_context.h"
-#include <pipe/p_inlines.h>
+#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include "softpipe/sp_winsys.h"
#include "softpipe/sp_texture.h"
static void
rast.scissor = 0;
rast.poly_smooth = 0;
rast.poly_stipple_enable = 0;
- rast.point_sprite = 0;
+ rast.sprite_coord_enable = 0;
rast.point_size_per_vertex = 0;
rast.multisample = 0;
rast.line_smooth = 0;
rast.bypass_vs_clip_and_viewport = 0;
rast.line_width = 1;
rast.point_smooth = 0;
+ rast.point_quad_rasterization = 0;
rast.point_size = 1;
rast.offset_units = 1;
rast.offset_scale = 1;
- /*rast.sprite_coord_mode[i] = ;*/
ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
- blend.blend_enable = 0;
- blend.rgb_func = PIPE_BLEND_ADD;
- blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
- blend.alpha_func = PIPE_BLEND_ADD;
- blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.independent_blend_enable = 0;
+ blend.rt[0].blend_enable = 0;
+ blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+ blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+ blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
blend.logicop_enable = 0;
blend.logicop_func = PIPE_LOGICOP_CLEAR;
/* Needed to allow color writes to FB, even if blending disabled */
- blend.colormask = PIPE_MASK_RGBA;
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
blend.dither = 0;
ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix;
- ctx->pipe = softpipe_create(screen);
+ ctx->pipe = screen->context_create(screen, NULL);
if (!ctx->pipe) {
FREE(ctx);
return NULL;
#define SVGA_TEX_UNITS 8
+ #define SVGA_MAX_POINTSIZE 80.0
struct draw_vertex_shader;
struct svga_shader_result;
float slopescaledepthbias;
float depthbias;
float pointsize;
- float pointsize_min;
- float pointsize_max;
unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */
unsigned need_pipeline:16; /* which prims do we need help for? */
unsigned texture_timestamp;
- /* Internally generated shaders:
- */
- unsigned white_fs_id;
-
/*
*/
struct svga_sw_state sw;
void svga_hwtnl_flush_retry( struct svga_context *svga );
+struct pipe_context *
+svga_context_create(struct pipe_screen *screen,
+ void *priv);
+
/***********************************************************************
* Inline conversion functions. These are better-typed than the
**********************************************************/
#include "draw/draw_context.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "svga_context.h"
-#include "svga_state.h"
#include "svga_hw_reg.h"
/* light_twoside - XXX: need fragment shader varient */
/* poly_smooth - XXX: no fallback available */
/* poly_stipple_enable - draw module */
- /* point_sprite - ? */
+ /* sprite_coord_enable - ? */
+ /* point_quad_rasterization - ? */
/* point_size_per_vertex - ? */
/* sprite_coord_mode - ??? */
/* bypass_vs_viewport_and_clip - handled by viewport setup */
rast->antialiasedlineenable = templ->line_smooth;
rast->lastpixel = templ->line_last_pixel;
rast->pointsize = templ->point_size;
- rast->pointsize_min = templ->point_size_min;
- rast->pointsize_max = templ->point_size_max;
rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
/* Use swtnl + decomposition implement these:
**********************************************************/
#include "util/u_memory.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "util/u_string.h"
#include "util/u_math.h"
#include "svga_screen.h"
#include "svga_screen_texture.h"
#include "svga_screen_buffer.h"
-#include "svga_cmd.h"
#include "svga_debug.h"
-#include "svga_hw_reg.h"
#include "svga3d_shaderdefs.h"
/* Keep this to a reasonable size to avoid failures in
* conform/pntaa.c:
*/
- return 80.0;
+ return SVGA_MAX_POINTSIZE;
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 4.0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+
default:
return 0;
}
screen->get_param = svga_get_param;
screen->get_paramf = svga_get_paramf;
screen->is_format_supported = svga_is_format_supported;
+ screen->context_create = svga_context_create;
screen->fence_reference = svga_fence_reference;
screen->fence_signalled = svga_fence_signalled;
screen->fence_finish = svga_fence_finish;
pipe_mutex_init(svgascreen->tex_mutex);
pipe_mutex_init(svgascreen->swc_mutex);
- LIST_INITHEAD(&svgascreen->cached_buffers);
-
svga_screen_cache_init(svgascreen);
return screen;
*
**********************************************************/
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "svga_state.h"
#include "svga_cmd.h"
-#include "svga_hw_reg.h"
-
-
struct rs_queue {
unsigned rs_count;
EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail );
EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail );
- EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail );
- EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail );
+ /* XXX still need to set this? */
+ EMIT_RS_FLOAT( svga, 0.0, POINTSIZEMIN, fail );
+ EMIT_RS_FLOAT( svga, SVGA_MAX_POINTSIZE, POINTSIZEMAX, fail );
}
if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE))
return;
trace_dump_struct_begin("pipe_reference");
- trace_dump_member(int, &reference->count, count);
+ trace_dump_member(int, reference, count);
trace_dump_struct_end();
}
trace_dump_member(bool, state, poly_smooth);
trace_dump_member(bool, state, poly_stipple_enable);
trace_dump_member(bool, state, point_smooth);
- trace_dump_member(bool, state, point_sprite);
+ trace_dump_member(uint, state, sprite_coord_enable);
+ trace_dump_member(bool, state, sprite_coord_mode);
+ trace_dump_member(bool, state, point_quad_rasterization);
trace_dump_member(bool, state, point_size_per_vertex);
trace_dump_member(bool, state, multisample);
trace_dump_member(bool, state, line_smooth);
trace_dump_member(float, state, line_width);
trace_dump_member(float, state, point_size);
- trace_dump_member(float, state, point_size_min);
- trace_dump_member(float, state, point_size_max);
trace_dump_member(float, state, offset_units);
trace_dump_member(float, state, offset_scale);
- trace_dump_member_array(uint, state, sprite_coord_mode);
-
trace_dump_struct_end();
}
}
-void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
+void trace_dump_constant_buffer(const struct pipe_buffer *state)
{
if (!trace_dumping_enabled_locked())
return;
trace_dump_struct_begin("pipe_constant_buffer");
- trace_dump_member(buffer_ptr, state, buffer);
+ trace_dump_reference(&state->reference);
trace_dump_struct_end();
}
trace_dump_struct_end();
}
+static void trace_dump_rt_blend_state(const struct pipe_rt_blend_state *state)
+{
+ trace_dump_member(uint, state, rgb_func);
+ trace_dump_member(uint, state, rgb_src_factor);
+ trace_dump_member(uint, state, rgb_dst_factor);
+
+ trace_dump_member(uint, state, alpha_func);
+ trace_dump_member(uint, state, alpha_src_factor);
+ trace_dump_member(uint, state, alpha_dst_factor);
+
+ trace_dump_member(uint, state, colormask);
+
+}
void trace_dump_blend_state(const struct pipe_blend_state *state)
{
+ unsigned valid_entries = 1;
if (!trace_dumping_enabled_locked())
return;
trace_dump_struct_begin("pipe_blend_state");
- trace_dump_member(bool, state, blend_enable);
-
- trace_dump_member(uint, state, rgb_func);
- trace_dump_member(uint, state, rgb_src_factor);
- trace_dump_member(uint, state, rgb_dst_factor);
-
- trace_dump_member(uint, state, alpha_func);
- trace_dump_member(uint, state, alpha_src_factor);
- trace_dump_member(uint, state, alpha_dst_factor);
+ trace_dump_member(bool, state, dither);
trace_dump_member(bool, state, logicop_enable);
trace_dump_member(uint, state, logicop_func);
- trace_dump_member(uint, state, colormask);
- trace_dump_member(bool, state, dither);
+ trace_dump_member(bool, state, independent_blend_enable);
+
+ if (state->independent_blend_enable)
+ valid_entries = PIPE_MAX_COLOR_BUFS;
+
+ trace_dump_struct_array(rt_blend_state, state->rt, valid_entries);
trace_dump_struct_end();
}
trace_dump_member(uint, state, compare_mode);
trace_dump_member(uint, state, compare_func);
trace_dump_member(bool, state, normalized_coords);
- trace_dump_member(uint, state, prefilter);
trace_dump_member(float, state, lod_bias);
trace_dump_member(float, state, min_lod);
trace_dump_member(float, state, max_lod);
/**
* Point sprite coord modes
*/
- #define PIPE_SPRITE_COORD_NONE 0
- #define PIPE_SPRITE_COORD_UPPER_LEFT 1
- #define PIPE_SPRITE_COORD_LOWER_LEFT 2
+ #define PIPE_SPRITE_COORD_UPPER_LEFT 0
+ #define PIPE_SPRITE_COORD_LOWER_LEFT 1
/**
#define PIPE_CAP_NPOT_TEXTURES 2
#define PIPE_CAP_TWO_SIDED_STENCIL 3
#define PIPE_CAP_GLSL 4 /* XXX need something better */
-#define PIPE_CAP_S3TC 5 /* XXX: deprecated; cap determined via supported sampler formats */
+#define PIPE_CAP_DUAL_SOURCE_BLEND 5
#define PIPE_CAP_ANISOTROPIC_FILTER 6
#define PIPE_CAP_POINT_SPRITE 7
#define PIPE_CAP_MAX_RENDER_TARGETS 8
#define PIPE_CAP_MAX_PREDICATE_REGISTERS 30
#define PIPE_CAP_MAX_COMBINED_SAMPLERS 31 /*< Maximum texture image units accessible from vertex
and fragment shaders combined */
+#define PIPE_CAP_MAX_CONST_BUFFERS 32
+#define PIPE_CAP_MAX_CONST_BUFFER_SIZE 33 /*< In bytes */
+#define PIPE_CAP_INDEP_BLEND_ENABLE 34 /*< blend enables and write masks per rendertarget */
+#define PIPE_CAP_INDEP_BLEND_FUNC 35 /*< different blend funcs per rendertarget */
+#define PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT 36
+#define PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT 37
+#define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER 38
+#define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER 39
/**
#include "p_compiler.h"
#include "p_defines.h"
#include "p_format.h"
-#include "p_refcnt.h"
#include "p_screen.h"
#define PIPE_MAX_ATTRIBS 32
#define PIPE_MAX_CLIP_PLANES 6
#define PIPE_MAX_COLOR_BUFS 8
-#define PIPE_MAX_CONSTANT 32
+#define PIPE_MAX_CONSTANT_BUFFERS 32
#define PIPE_MAX_SAMPLERS 16
#define PIPE_MAX_VERTEX_SAMPLERS 16
#define PIPE_MAX_SHADER_INPUTS 16
#define PIPE_MAX_TEXTURE_LEVELS 16
-/* fwd decls */
-struct pipe_surface;
+struct pipe_reference
+{
+ int32_t count; /* atomic */
+};
/**
unsigned poly_smooth:1;
unsigned poly_stipple_enable:1;
unsigned point_smooth:1;
- unsigned point_sprite:1;
+ unsigned sprite_coord_enable:PIPE_MAX_SHADER_OUTPUTS;
+ unsigned sprite_coord_mode:1; /**< PIPE_SPRITE_COORD_ */
+ unsigned point_quad_rasterization:1; /** points rasterized as quads or points */
unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
unsigned multisample:1; /* XXX maybe more ms state in future */
unsigned line_smooth:1;
float line_width;
float point_size; /**< used when no per-vertex size */
- float point_size_min; /* XXX - temporary, will go away */
- float point_size_max; /* XXX - temporary, will go away */
float offset_units;
float offset_scale;
- ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */
};
};
-/**
- * Constants for vertex/fragment shaders
- */
-struct pipe_constant_buffer
-{
- struct pipe_buffer *buffer;
-};
-
-
struct pipe_shader_state
{
const struct tgsi_token *tokens;
};
-struct pipe_blend_state
+struct pipe_rt_blend_state
{
unsigned blend_enable:1;
unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
+ unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */
+};
+
+struct pipe_blend_state
+{
+ unsigned independent_blend_enable:1;
unsigned logicop_enable:1;
unsigned logicop_func:4; /**< PIPE_LOGICOP_x */
-
- unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */
unsigned dither:1;
+ struct pipe_rt_blend_state rt[PIPE_MAX_COLOR_BUFS];
};
unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */
unsigned compare_func:3; /**< PIPE_FUNC_x */
unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */
- unsigned prefilter:4; /**< Wierd sampling state exposed by some api's */
float lod_bias; /**< LOD/lambda bias */
float min_lod, max_lod; /**< LOD clamp range, after bias */
float border_color[4];
/** Offset of this attribute, in bytes, from the start of the vertex */
unsigned src_offset;
+ /** Instance data rate divisor. 0 means this is per-vertex data,
+ * n means per-instance data used for n consecutive instances (n > 0).
+ */
+ unsigned instance_divisor;
+
/** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does
* this attribute live in?
*/
};
-/* Reference counting helper functions */
-static INLINE void
-pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf)
-{
- struct pipe_buffer *old_buf = *ptr;
-
- if (pipe_reference(&(*ptr)->reference, &buf->reference))
- old_buf->screen->buffer_destroy(old_buf);
- *ptr = buf;
-}
-
-static INLINE void
-pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
-{
- struct pipe_surface *old_surf = *ptr;
-
- if (pipe_reference(&(*ptr)->reference, &surf->reference))
- old_surf->texture->screen->tex_surface_destroy(old_surf);
- *ptr = surf;
-}
-
-static INLINE void
-pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex)
-{
- struct pipe_texture *old_tex = *ptr;
-
- if (pipe_reference(&(*ptr)->reference, &tex->reference))
- old_tex->screen->texture_destroy(old_tex);
- *ptr = tex;
-}
-
-
#ifdef __cplusplus
}
#endif
"pipe_blend_color": gallium.BlendColor,
"pipe_blend_state": gallium.Blend,
#"pipe_clip_state": gallium.Clip,
- #"pipe_constant_buffer": gallium.ConstantBuffer,
+ #"pipe_buffer": gallium.Buffer,
"pipe_depth_state": gallium.Depth,
"pipe_stencil_state": gallium.Stencil,
"pipe_alpha_state": gallium.Alpha,
member_array_factories = {
- "pipe_rasterizer_state": {"sprite_coord_mode": gallium.ByteArray},
+ #"pipe_rasterizer_state": {"sprite_coord_mode": gallium.ByteArray},
"pipe_poly_stipple": {"stipple": gallium.UnsignedArray},
"pipe_viewport_state": {"scale": gallium.FloatArray, "translate": gallium.FloatArray},
#"pipe_clip_state": {"ucp": gallium.FloatArray},
sys.stdout.flush()
def set_constant_buffer(self, shader, index, buffer):
- if buffer is not None and buffer.buffer is not None:
- self.real.set_constant_buffer(shader, index, buffer.buffer)
+ if buffer is not None:
+ self.real.set_constant_buffer(shader, index, buffer)
- self.dump_constant_buffer(buffer.buffer)
+ self.dump_constant_buffer(buffer)
def set_framebuffer_state(self, state):
_state = gallium.Framebuffer()
gallium.PIPE_FORMAT_R32G32B32_FLOAT: '3f',
gallium.PIPE_FORMAT_R32G32B32A32_FLOAT: '4f',
gallium.PIPE_FORMAT_B8G8R8A8_UNORM: '4B',
+ gallium.PIPE_FORMAT_R8G8B8A8_UNORM: '4B',
+ gallium.PIPE_FORMAT_R16G16B16_SNORM: '3h',
}[velem.src_format]
data = vbuf.buffer.read()
#include "main/glheader.h"
#include "main/context.h"
-#include "main/hash.h"
#include "main/imports.h"
#include "main/macros.h"
#include "main/mtypes.h"
matrix = &ctx->_ModelProjectMatrix;
}
else if (mat == STATE_TEXTURE_MATRIX) {
+ ASSERT(index < Elements(ctx->TextureMatrixStack));
matrix = ctx->TextureMatrixStack[index].Top;
}
else if (mat == STATE_PROGRAM_MATRIX) {
+ ASSERT(index < Elements(ctx->ProgramMatrixStack));
matrix = ctx->ProgramMatrixStack[index].Top;
}
else if (mat == STATE_COLOR_MATRIX) {
value[3] = (GLfloat)(ctx->Fog.Density * ONE_DIV_SQRT_LN2);
return;
+ case STATE_POINT_SIZE_CLAMPED:
+ {
+ /* this includes implementation dependent limits, to avoid
+ * another potentially necessary clamp.
+ * Note: for sprites, point smooth (point AA) is ignored
+ * and we'll clamp to MinPointSizeAA and MaxPointSize, because we
+ * expect drivers will want to say their minimum for AA size is 0.0
+ * but for non-AA it's 1.0 (because normal points with size below 1.0
+ * need to get rounded up to 1.0, hence never disappear). GL does
+ * not specify max clamp size for sprites, other than it needs to be
+ * at least as large as max AA size, hence use non-AA size there.
+ */
+ GLfloat minImplSize;
+ GLfloat maxImplSize;
+ if (ctx->Point.PointSprite) {
+ minImplSize = ctx->Const.MinPointSizeAA;
+ maxImplSize = ctx->Const.MaxPointSize;
+ }
+ else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+ minImplSize = ctx->Const.MinPointSizeAA;
+ maxImplSize = ctx->Const.MaxPointSizeAA;
+ }
+ else {
+ minImplSize = ctx->Const.MinPointSize;
+ maxImplSize = ctx->Const.MaxPointSize;
+ }
+ value[0] = ctx->Point.Size;
+ value[1] = ctx->Point.MinSize >= minImplSize ? ctx->Point.MinSize : minImplSize;
+ value[2] = ctx->Point.MaxSize <= maxImplSize ? ctx->Point.MaxSize : maxImplSize;
+ value[3] = ctx->Point.Threshold;
+ }
+ return;
+ case STATE_POINT_SIZE_IMPL_CLAMP:
+ {
+ /* for implementation clamp only in vs */
+ GLfloat minImplSize;
+ GLfloat maxImplSize;
+ if (ctx->Point.PointSprite) {
+ minImplSize = ctx->Const.MinPointSizeAA;
+ maxImplSize = ctx->Const.MaxPointSize;
+ }
+ else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+ minImplSize = ctx->Const.MinPointSizeAA;
+ maxImplSize = ctx->Const.MaxPointSizeAA;
+ }
+ else {
+ minImplSize = ctx->Const.MinPointSize;
+ maxImplSize = ctx->Const.MaxPointSize;
+ }
+ value[0] = ctx->Point.Size;
+ value[1] = minImplSize;
+ value[2] = maxImplSize;
+ value[3] = ctx->Point.Threshold;
+ }
+ return;
case STATE_LIGHT_SPOT_DIR_NORMALIZED:
{
/* here, state[2] is the light number */
return _NEW_TEXTURE;
case STATE_FOG_PARAMS_OPTIMIZED:
return _NEW_FOG;
+ case STATE_POINT_SIZE_CLAMPED:
+ case STATE_POINT_SIZE_IMPL_CLAMP:
+ return _NEW_POINT | _NEW_MULTISAMPLE;
case STATE_LIGHT_SPOT_DIR_NORMALIZED:
case STATE_LIGHT_POSITION:
case STATE_LIGHT_POSITION_NORMALIZED:
case STATE_FOG_PARAMS_OPTIMIZED:
append(dst, "fogParamsOptimized");
break;
+ case STATE_POINT_SIZE_CLAMPED:
+ append(dst, "pointSizeClamped");
+ break;
+ case STATE_POINT_SIZE_IMPL_CLAMP:
+ append(dst, "pointSizeImplClamp");
+ break;
case STATE_LIGHT_SPOT_DIR_NORMALIZED:
append(dst, "lightSpotDirNormalized");
break;
mat = ctx->ProjectionMatrixStack.Top;
}
else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
- mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
+ GLuint unit = MIN2(ctx->Texture.CurrentUnit,
+ Elements(ctx->TextureMatrixStack) - 1);
+ mat = ctx->TextureMatrixStack[unit].Top;
}
else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
mat = ctx->ColorMatrixStack.Top;
else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
- ASSERT(n < MAX_PROGRAM_MATRICES);
+ ASSERT(n < Elements(ctx->ProgramMatrixStack));
mat = ctx->ProgramMatrixStack[n].Top;
}
else {
#include "main/imports.h"
#include "main/context.h"
-#include "main/extensions.h"
#include "main/macros.h"
#include "pipe/p_context.h"
= _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH));
c->MaxPointSizeAA
= _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH_AA));
+ /* called after _mesa_create_context/_mesa_init_point, fix default user
+ * settable max point size up
+ */
+ st->ctx->Point.MaxSize = MAX2(c->MaxPointSize, c->MaxPointSizeAA);
+ /* these are not queryable. Note that GL basically mandates a 1.0 minimum
+ * for non-aa sizes, but we can go down to 0.0 for aa points.
+ */
+ c->MinPointSize = 1.0f;
+ c->MinPointSizeAA = 0.0f;
c->MaxTextureMaxAnisotropy
= _maxf(2.0f, screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_ANISOTROPY));
* Extensions that are supported by all Gallium drivers:
*/
ctx->Extensions.ARB_copy_buffer = GL_TRUE;
+ ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE;
ctx->Extensions.ARB_fragment_program = GL_TRUE;
ctx->Extensions.ARB_map_buffer_range = GL_TRUE;
ctx->Extensions.ARB_multisample = GL_TRUE;
ctx->Extensions.EXT_blend_subtract = GL_TRUE;
ctx->Extensions.EXT_framebuffer_blit = GL_TRUE;
ctx->Extensions.EXT_framebuffer_object = GL_TRUE;
+ ctx->Extensions.EXT_framebuffer_multisample = GL_TRUE;
ctx->Extensions.EXT_fog_coord = GL_TRUE;
ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE;
ctx->Extensions.EXT_pixel_buffer_object = GL_TRUE;
ctx->Extensions.NV_texgen_reflection = GL_TRUE;
ctx->Extensions.NV_texture_env_combine4 = GL_TRUE;
+#if FEATURE_OES_draw_texture
+ ctx->Extensions.OES_draw_texture = GL_TRUE;
+#endif
+
ctx->Extensions.SGI_color_matrix = GL_TRUE;
ctx->Extensions.SGIS_generate_mipmap = GL_TRUE;
if (st->pipe->render_condition) {
ctx->Extensions.NV_conditional_render = GL_TRUE;
}
+
+ if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_ENABLE)) {
+ ctx->Extensions.EXT_draw_buffers2 = GL_TRUE;
+ }
+
+#if 0 /* not yet */
+ if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) {
+ ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE;
+ }
+#endif
}
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/p_state.h"
+#include "pipe/p_context.h"
#include "tgsi/tgsi_ureg.h"
#include "st_mesa_to_tgsi.h"
+#include "st_context.h"
#include "shader/prog_instruction.h"
#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
unsigned token;
};
+
+/**
+ * Intermediate state used during shader translation.
+ */
struct st_translate {
struct ureg_program *ureg;
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[1];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+ struct ureg_dst psizregreal;
+ struct ureg_src pointSizeConst;
+ GLint psizoutindex;
+ GLboolean prevInstWrotePsiz;
const GLuint *inputMapping;
const GLuint *outputMapping;
return t->temps[index];
case PROGRAM_OUTPUT:
+ if (index == t->psizoutindex)
+ t->prevInstWrotePsiz = GL_TRUE;
return t->outputs[t->outputMapping[index]];
case PROGRAM_ADDRESS:
t->temps[index] = ureg_DECL_temporary( t->ureg );
return ureg_src(t->temps[index]);
- case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_UNIFORM:
ASSERT(index >= 0);
return t->constants[index];
+ case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT: /* ie, immediate */
if (index < 0)
return ureg_DECL_constant( t->ureg, 0 );
}
}
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ */
+static void
+emit_adjusted_wpos( struct st_translate *t,
+ const struct gl_program *program, GLfloat value)
+{
+ struct ureg_program *ureg = t->ureg;
+ struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+ struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+ ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y),
+ wpos_input, ureg_imm1f(ureg, value));
+
+ t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
/**
* Emit the TGSI instructions for inverting the WPOS y coordinate.
winSizeState);
struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
- struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
+ struct ureg_dst wpos_temp;
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
/* MOV wpos_temp, input[wpos]
*/
- ureg_MOV( ureg, wpos_temp, wpos_input );
+ if (wpos_input.File == TGSI_FILE_TEMPORARY)
+ wpos_temp = ureg_dst(wpos_input);
+ else {
+ wpos_temp = ureg_DECL_temporary( ureg );
+ ureg_MOV( ureg, wpos_temp, wpos_input );
+ }
/* SUB wpos_temp.y, winsize_const, wpos_input
*/
t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
}
+
static void
emit_edgeflags( struct st_translate *t,
const struct gl_program *program )
ureg_MOV( ureg, edge_dst, edge_src );
}
+
/**
* Translate Mesa program to TGSI format.
* \param program the program to translate
* \param outputSemanticIndex the semantic index (ex: which texcoord) for
* each output
*
- * \return array of translated tokens, caller's responsibility to free
+ * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
*/
enum pipe_error
st_translate_mesa_program(
{
struct st_translate translate, *t;
unsigned i;
+ enum pipe_error ret = PIPE_OK;
t = &translate;
memset(t, 0, sizeof *t);
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
+ t->psizoutindex = -1;
+ t->prevInstWrotePsiz = GL_FALSE;
/*_mesa_print_program(program);*/
* Declare input attributes.
*/
if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ struct gl_fragment_program* fp = (struct gl_fragment_program*)program;
for (i = 0; i < numInputs; i++) {
- t->inputs[i] = ureg_DECL_fs_input(ureg,
- inputSemanticName[i],
- inputSemanticIndex[i],
- interpMode[i]);
+ if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
+ t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
+ inputSemanticName[i],
+ inputSemanticIndex[i],
+ interpMode[i],
+ TGSI_CYLINDRICAL_WRAP_X);
+ }
+ else {
+ t->inputs[i] = ureg_DECL_fs_input(ureg,
+ inputSemanticName[i],
+ inputSemanticIndex[i],
+ interpMode[i]);
+ }
}
if (program->InputsRead & FRAG_BIT_WPOS) {
/* Must do this after setting up t->inputs, and before
* emitting constant references, below:
*/
- emit_inverted_wpos( t, program );
+ struct pipe_screen* pscreen = st_context(ctx)->pipe->screen;
+ boolean invert = FALSE;
+
+ if (fp->OriginUpperLeft) {
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+ }
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+ ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+ invert = TRUE;
+ }
+ else
+ assert(0);
+ }
+ else {
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+ ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+ invert = TRUE;
+ else
+ assert(0);
+ }
+
+ if (fp->PixelCenterInteger) {
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+ ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+ emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
+ else
+ assert(0);
+ }
+ else {
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+ }
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+ ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+ emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
+ }
+ else
+ assert(0);
+ }
+
+ /* we invert after adjustment so that we avoid the MOV to temporary,
+ * and reuse the adjustment ADD instead */
+ if (invert)
+ emit_inverted_wpos(t, program);
}
if (program->InputsRead & FRAG_BIT_FACE) {
t->outputs[i] = ureg_DECL_output( ureg,
outputSemanticName[i],
outputSemanticIndex[i] );
+ if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
+ static const gl_state_index pointSizeClampState[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
+ /* XXX: note we are modifying the incoming shader here! Need to
+ * do this before emitting the constant decls below, or this
+ * will be missed:
+ */
+ unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters,
+ pointSizeClampState);
+ struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
+ t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+ t->psizregreal = t->outputs[i];
+ t->psizoutindex = i;
+ t->outputs[i] = psizregtemp;
+ }
}
if (passthrough_edgeflags)
emit_edgeflags( t, program );
t->constants = CALLOC( program->Parameters->NumParameters,
sizeof t->constants[0] );
- if (t->constants == NULL)
+ if (t->constants == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out;
+ }
for (i = 0; i < program->Parameters->NumParameters; i++) {
switch (program->Parameters->Parameters[i].Type) {
for (i = 0; i < program->NumInstructions; i++) {
set_insn_start( t, ureg_get_instruction_number( ureg ));
compile_instruction( t, &program->Instructions[i] );
+
+ /* note can't do that easily at the end of prog due to
+ possible early return */
+ if (t->prevInstWrotePsiz && program->Id) {
+ set_insn_start( t, ureg_get_instruction_number( ureg ));
+ ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
+ ureg_src(t->outputs[t->psizoutindex]),
+ ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+ ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
+ ureg_src(t->outputs[t->psizoutindex]),
+ ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+ }
+ t->prevInstWrotePsiz = GL_FALSE;
}
/* Fix up all emitted labels:
t->insn[t->labels[i].branch_target] );
}
- return PIPE_OK;
-
out:
FREE(t->insn);
FREE(t->labels);
debug_printf("%s: translate error flag set\n", __FUNCTION__);
}
- return PIPE_ERROR_OUT_OF_MEMORY;
+ return ret;
}