2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "common/gen_device_info.h"
27 #include "genxml/gen_macros.h"
29 #include "brw_context.h"
31 #include "brw_defines.h"
33 #include "brw_state.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffer_objects.h"
39 #include "intel_fbo.h"
41 #include "main/fbobject.h"
42 #include "main/framebuffer.h"
43 #include "main/stencil.h"
44 #include "main/transformfeedback.h"
47 emit_dwords(struct brw_context
*brw
, unsigned n
)
49 intel_batchbuffer_begin(brw
, n
, RENDER_RING
);
50 uint32_t *map
= brw
->batch
.map_next
;
51 brw
->batch
.map_next
+= n
;
52 intel_batchbuffer_advance(brw
);
58 uint32_t read_domains
;
59 uint32_t write_domain
;
64 emit_reloc(struct brw_context
*brw
,
65 void *location
, struct brw_address address
, uint32_t delta
)
67 uint32_t offset
= (char *) location
- (char *) brw
->batch
.map
;
69 return brw_emit_reloc(&brw
->batch
, offset
, address
.bo
,
70 address
.offset
+ delta
,
72 address
.write_domain
);
75 #define __gen_address_type struct brw_address
76 #define __gen_user_data struct brw_context
79 __gen_combine_address(struct brw_context
*brw
, void *location
,
80 struct brw_address address
, uint32_t delta
)
82 if (address
.bo
== NULL
) {
83 return address
.offset
+ delta
;
85 return emit_reloc(brw
, location
, address
, delta
);
89 static inline struct brw_address
90 render_bo(struct brw_bo
*bo
, uint32_t offset
)
92 return (struct brw_address
) {
95 .read_domains
= I915_GEM_DOMAIN_RENDER
,
96 .write_domain
= I915_GEM_DOMAIN_RENDER
,
100 static inline struct brw_address
101 instruction_bo(struct brw_bo
*bo
, uint32_t offset
)
103 return (struct brw_address
) {
106 .read_domains
= I915_GEM_DOMAIN_INSTRUCTION
,
107 .write_domain
= I915_GEM_DOMAIN_INSTRUCTION
,
111 #include "genxml/genX_pack.h"
113 #define _brw_cmd_length(cmd) cmd ## _length
114 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
115 #define _brw_cmd_header(cmd) cmd ## _header
116 #define _brw_cmd_pack(cmd) cmd ## _pack
118 #define brw_batch_emit(brw, cmd, name) \
119 for (struct cmd name = { _brw_cmd_header(cmd) }, \
120 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
121 __builtin_expect(_dst != NULL, 1); \
122 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
125 #define brw_batch_emitn(brw, cmd, n, ...) ({ \
126 uint32_t *_dw = emit_dwords(brw, n); \
127 struct cmd template = { \
128 _brw_cmd_header(cmd), \
129 .DWordLength = n - _brw_cmd_length_bias(cmd), \
132 _brw_cmd_pack(cmd)(brw, _dw, &template); \
133 _dw + 1; /* Array starts at dw[1] */ \
136 #define brw_state_emit(brw, cmd, align, offset, name) \
137 for (struct cmd name = { 0, }, \
138 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
140 __builtin_expect(_dst != NULL, 1); \
141 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
146 * Determine the appropriate attribute override value to store into the
147 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
148 * override value contains two pieces of information: the location of the
149 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
150 * flag indicating whether to "swizzle" the attribute based on the direction
151 * the triangle is facing.
153 * If an attribute is "swizzled", then the given VUE location is used for
154 * front-facing triangles, and the VUE location that immediately follows is
155 * used for back-facing triangles. We use this to implement the mapping from
156 * gl_FrontColor/gl_BackColor to gl_Color.
158 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
159 * being instructed to begin reading attribute data. It can be set to a
160 * nonzero value to prevent the SF unit from wasting time reading elements of
161 * the VUE that are not needed by the fragment shader. It is measured in
162 * 256-bit increments.
165 genX(get_attr_override
)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr
,
166 const struct brw_vue_map
*vue_map
,
167 int urb_entry_read_offset
, int fs_attr
,
168 bool two_side_color
, uint32_t *max_source_attr
)
170 /* Find the VUE slot for this attribute. */
171 int slot
= vue_map
->varying_to_slot
[fs_attr
];
173 /* Viewport and Layer are stored in the VUE header. We need to override
174 * them to zero if earlier stages didn't write them, as GL requires that
175 * they read back as zero when not explicitly set.
177 if (fs_attr
== VARYING_SLOT_VIEWPORT
|| fs_attr
== VARYING_SLOT_LAYER
) {
178 attr
->ComponentOverrideX
= true;
179 attr
->ComponentOverrideW
= true;
180 attr
->ConstantSource
= CONST_0000
;
182 if (!(vue_map
->slots_valid
& VARYING_BIT_LAYER
))
183 attr
->ComponentOverrideY
= true;
184 if (!(vue_map
->slots_valid
& VARYING_BIT_VIEWPORT
))
185 attr
->ComponentOverrideZ
= true;
190 /* If there was only a back color written but not front, use back
191 * as the color instead of undefined
193 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL0
)
194 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC0
];
195 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL1
)
196 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC1
];
199 /* This attribute does not exist in the VUE--that means that the vertex
200 * shader did not write to it. This means that either:
202 * (a) This attribute is a texture coordinate, and it is going to be
203 * replaced with point coordinates (as a consequence of a call to
204 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
205 * hardware will ignore whatever attribute override we supply.
207 * (b) This attribute is read by the fragment shader but not written by
208 * the vertex shader, so its value is undefined. Therefore the
209 * attribute override we supply doesn't matter.
211 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
212 * previous shader stage.
214 * Note that we don't have to worry about the cases where the attribute
215 * is gl_PointCoord or is undergoing point sprite coordinate
216 * replacement, because in those cases, this function isn't called.
218 * In case (c), we need to program the attribute overrides so that the
219 * primitive ID will be stored in this slot. In every other case, the
220 * attribute override we supply doesn't matter. So just go ahead and
221 * program primitive ID in every case.
223 attr
->ComponentOverrideW
= true;
224 attr
->ComponentOverrideX
= true;
225 attr
->ComponentOverrideY
= true;
226 attr
->ComponentOverrideZ
= true;
227 attr
->ConstantSource
= PRIM_ID
;
231 /* Compute the location of the attribute relative to urb_entry_read_offset.
232 * Each increment of urb_entry_read_offset represents a 256-bit value, so
233 * it counts for two 128-bit VUE slots.
235 int source_attr
= slot
- 2 * urb_entry_read_offset
;
236 assert(source_attr
>= 0 && source_attr
< 32);
238 /* If we are doing two-sided color, and the VUE slot following this one
239 * represents a back-facing color, then we need to instruct the SF unit to
240 * do back-facing swizzling.
242 bool swizzling
= two_side_color
&&
243 ((vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL0
&&
244 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC0
) ||
245 (vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL1
&&
246 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC1
));
248 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
249 if (*max_source_attr
< source_attr
+ swizzling
)
250 *max_source_attr
= source_attr
+ swizzling
;
252 attr
->SourceAttribute
= source_attr
;
254 attr
->SwizzleSelect
= INPUTATTR_FACING
;
259 genX(calculate_attr_overrides
)(const struct brw_context
*brw
,
260 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr_overrides
,
261 uint32_t *point_sprite_enables
,
262 uint32_t *urb_entry_read_length
,
263 uint32_t *urb_entry_read_offset
)
265 const struct gl_context
*ctx
= &brw
->ctx
;
268 const struct gl_point_attrib
*point
= &ctx
->Point
;
270 /* BRW_NEW_FS_PROG_DATA */
271 const struct brw_wm_prog_data
*wm_prog_data
=
272 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
273 uint32_t max_source_attr
= 0;
275 *point_sprite_enables
= 0;
277 /* BRW_NEW_FRAGMENT_PROGRAM
279 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
280 * the full vertex header. Otherwise, we can program the SF to start
281 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
282 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
283 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
286 bool fs_needs_vue_header
= brw
->fragment_program
->info
.inputs_read
&
287 (VARYING_BIT_LAYER
| VARYING_BIT_VIEWPORT
);
289 *urb_entry_read_offset
= fs_needs_vue_header
? 0 : 1;
291 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
292 * description of dw10 Point Sprite Texture Coordinate Enable:
294 * "This field must be programmed to zero when non-point primitives
297 * The SandyBridge PRM doesn't explicitly say that point sprite enables
298 * must be programmed to zero when rendering non-point primitives, but
299 * the IvyBridge PRM does, and if we don't, we get garbage.
301 * This is not required on Haswell, as the hardware ignores this state
302 * when drawing non-points -- although we do still need to be careful to
303 * correctly set the attr overrides.
306 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
308 bool drawing_points
= brw_is_drawing_points(brw
);
310 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
311 int input_index
= wm_prog_data
->urb_setup
[attr
];
317 bool point_sprite
= false;
318 if (drawing_points
) {
319 if (point
->PointSprite
&&
320 (attr
>= VARYING_SLOT_TEX0
&& attr
<= VARYING_SLOT_TEX7
) &&
321 (point
->CoordReplace
& (1u << (attr
- VARYING_SLOT_TEX0
)))) {
325 if (attr
== VARYING_SLOT_PNTC
)
329 *point_sprite_enables
|= (1 << input_index
);
332 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
333 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attribute
= { 0 };
336 genX(get_attr_override
)(&attribute
,
337 &brw
->vue_map_geom_out
,
338 *urb_entry_read_offset
, attr
,
339 brw
->ctx
.VertexProgram
._TwoSideEnabled
,
343 /* The hardware can only do the overrides on 16 overrides at a
344 * time, and the other up to 16 have to be lined up so that the
345 * input index = the output index. We'll need to do some
346 * tweaking to make sure that's the case.
348 if (input_index
< 16)
349 attr_overrides
[input_index
] = attribute
;
351 assert(attribute
.SourceAttribute
== input_index
);
354 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
355 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
357 * "This field should be set to the minimum length required to read the
358 * maximum source attribute. The maximum source attribute is indicated
359 * by the maximum value of the enabled Attribute # Source Attribute if
360 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
362 * read_length = ceiling((max_source_attr + 1) / 2)
364 * [errata] Corruption/Hang possible if length programmed larger than
367 * Similar text exists for Ivy Bridge.
369 *urb_entry_read_length
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
372 /* ---------------------------------------------------------------------- */
375 genX(upload_depth_stencil_state
)(struct brw_context
*brw
)
377 struct gl_context
*ctx
= &brw
->ctx
;
380 struct intel_renderbuffer
*depth_irb
=
381 intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
384 struct gl_depthbuffer_attrib
*depth
= &ctx
->Depth
;
387 struct gl_stencil_attrib
*stencil
= &ctx
->Stencil
;
388 const int b
= stencil
->_BackFace
;
391 brw_batch_emit(brw
, GENX(3DSTATE_WM_DEPTH_STENCIL
), wmds
) {
394 brw_state_emit(brw
, GENX(DEPTH_STENCIL_STATE
), 64, &ds_offset
, wmds
) {
396 if (depth
->Test
&& depth_irb
) {
397 wmds
.DepthTestEnable
= true;
398 wmds
.DepthBufferWriteEnable
= brw_depth_writes_enabled(brw
);
399 wmds
.DepthTestFunction
= intel_translate_compare_func(depth
->Func
);
402 if (stencil
->_Enabled
) {
403 wmds
.StencilTestEnable
= true;
404 wmds
.StencilWriteMask
= stencil
->WriteMask
[0] & 0xff;
405 wmds
.StencilTestMask
= stencil
->ValueMask
[0] & 0xff;
407 wmds
.StencilTestFunction
=
408 intel_translate_compare_func(stencil
->Function
[0]);
410 intel_translate_stencil_op(stencil
->FailFunc
[0]);
411 wmds
.StencilPassDepthPassOp
=
412 intel_translate_stencil_op(stencil
->ZPassFunc
[0]);
413 wmds
.StencilPassDepthFailOp
=
414 intel_translate_stencil_op(stencil
->ZFailFunc
[0]);
416 wmds
.StencilBufferWriteEnable
= stencil
->_WriteEnabled
;
418 if (stencil
->_TestTwoSide
) {
419 wmds
.DoubleSidedStencilEnable
= true;
420 wmds
.BackfaceStencilWriteMask
= stencil
->WriteMask
[b
] & 0xff;
421 wmds
.BackfaceStencilTestMask
= stencil
->ValueMask
[b
] & 0xff;
423 wmds
.BackfaceStencilTestFunction
=
424 intel_translate_compare_func(stencil
->Function
[b
]);
425 wmds
.BackfaceStencilFailOp
=
426 intel_translate_stencil_op(stencil
->FailFunc
[b
]);
427 wmds
.BackfaceStencilPassDepthPassOp
=
428 intel_translate_stencil_op(stencil
->ZPassFunc
[b
]);
429 wmds
.BackfaceStencilPassDepthFailOp
=
430 intel_translate_stencil_op(stencil
->ZFailFunc
[b
]);
434 wmds
.StencilReferenceValue
= _mesa_get_stencil_ref(ctx
, 0);
435 wmds
.BackfaceStencilReferenceValue
= _mesa_get_stencil_ref(ctx
, b
);
441 brw_batch_emit(brw
, GENX(3DSTATE_CC_STATE_POINTERS
), ptr
) {
442 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
443 ptr
.DEPTH_STENCIL_STATEChange
= true;
446 brw_batch_emit(brw
, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS
), ptr
) {
447 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
452 static const struct brw_tracked_state
genX(depth_stencil_state
) = {
454 .mesa
= _NEW_BUFFERS
|
457 .brw
= BRW_NEW_BLORP
|
458 (GEN_GEN
>= 8 ? BRW_NEW_CONTEXT
460 BRW_NEW_STATE_BASE_ADDRESS
),
462 .emit
= genX(upload_depth_stencil_state
),
465 /* ---------------------------------------------------------------------- */
468 genX(upload_clip_state
)(struct brw_context
*brw
)
470 struct gl_context
*ctx
= &brw
->ctx
;
473 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
475 /* BRW_NEW_FS_PROG_DATA */
476 struct brw_wm_prog_data
*wm_prog_data
=
477 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
479 brw_batch_emit(brw
, GENX(3DSTATE_CLIP
), clip
) {
480 clip
.StatisticsEnable
= !brw
->meta_in_progress
;
482 if (wm_prog_data
->barycentric_interp_modes
&
483 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS
)
484 clip
.NonPerspectiveBarycentricEnable
= true;
487 clip
.EarlyCullEnable
= true;
491 clip
.FrontWinding
= ctx
->Polygon
._FrontBit
== _mesa_is_user_fbo(fb
);
493 if (ctx
->Polygon
.CullFlag
) {
494 switch (ctx
->Polygon
.CullFaceMode
) {
496 clip
.CullMode
= CULLMODE_FRONT
;
499 clip
.CullMode
= CULLMODE_BACK
;
501 case GL_FRONT_AND_BACK
:
502 clip
.CullMode
= CULLMODE_BOTH
;
505 unreachable("Should not get here: invalid CullFlag");
508 clip
.CullMode
= CULLMODE_NONE
;
513 clip
.UserClipDistanceCullTestEnableBitmask
=
514 brw_vue_prog_data(brw
->vs
.base
.prog_data
)->cull_distance_mask
;
516 clip
.ViewportZClipTestEnable
= !ctx
->Transform
.DepthClamp
;
520 if (ctx
->Light
.ProvokingVertex
== GL_FIRST_VERTEX_CONVENTION
) {
521 clip
.TriangleStripListProvokingVertexSelect
= 0;
522 clip
.TriangleFanProvokingVertexSelect
= 1;
523 clip
.LineStripListProvokingVertexSelect
= 0;
525 clip
.TriangleStripListProvokingVertexSelect
= 2;
526 clip
.TriangleFanProvokingVertexSelect
= 2;
527 clip
.LineStripListProvokingVertexSelect
= 1;
531 clip
.UserClipDistanceClipTestEnableBitmask
=
532 ctx
->Transform
.ClipPlanesEnabled
;
535 clip
.ForceUserClipDistanceClipTestEnableBitmask
= true;
538 if (ctx
->Transform
.ClipDepthMode
== GL_ZERO_TO_ONE
)
539 clip
.APIMode
= APIMODE_D3D
;
541 clip
.APIMode
= APIMODE_OGL
;
543 clip
.GuardbandClipTestEnable
= true;
545 /* BRW_NEW_VIEWPORT_COUNT */
546 const unsigned viewport_count
= brw
->clip
.viewport_count
;
548 if (ctx
->RasterDiscard
) {
549 clip
.ClipMode
= CLIPMODE_REJECT_ALL
;
551 perf_debug("Rasterizer discard is currently implemented via the "
552 "clipper; having the GS not write primitives would "
553 "likely be faster.\n");
556 clip
.ClipMode
= CLIPMODE_NORMAL
;
559 clip
.ClipEnable
= brw
->primitive
!= _3DPRIM_RECTLIST
;
562 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
564 if (!brw_is_drawing_points(brw
) && !brw_is_drawing_lines(brw
))
565 clip
.ViewportXYClipTestEnable
= true;
567 clip
.MinimumPointWidth
= 0.125;
568 clip
.MaximumPointWidth
= 255.875;
569 clip
.MaximumVPIndex
= viewport_count
- 1;
570 if (_mesa_geometric_layers(fb
) == 0)
571 clip
.ForceZeroRTAIndexEnable
= true;
575 static const struct brw_tracked_state
genX(clip_state
) = {
577 .mesa
= _NEW_BUFFERS
|
581 .brw
= BRW_NEW_BLORP
|
583 BRW_NEW_FS_PROG_DATA
|
584 BRW_NEW_GS_PROG_DATA
|
585 BRW_NEW_VS_PROG_DATA
|
586 BRW_NEW_META_IN_PROGRESS
|
588 BRW_NEW_RASTERIZER_DISCARD
|
589 BRW_NEW_TES_PROG_DATA
|
590 BRW_NEW_VIEWPORT_COUNT
,
592 .emit
= genX(upload_clip_state
),
595 /* ---------------------------------------------------------------------- */
598 genX(upload_sf
)(struct brw_context
*brw
)
600 struct gl_context
*ctx
= &brw
->ctx
;
605 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
606 const bool multisampled_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
609 brw_batch_emit(brw
, GENX(3DSTATE_SF
), sf
) {
610 sf
.StatisticsEnable
= true;
611 sf
.ViewportTransformEnable
= brw
->sf
.viewport_transform_enable
;
615 sf
.DepthBufferSurfaceFormat
= brw_depthbuffer_format(brw
);
620 sf
.FrontWinding
= ctx
->Polygon
._FrontBit
== render_to_fbo
;
621 sf
.GlobalDepthOffsetEnableSolid
= ctx
->Polygon
.OffsetFill
;
622 sf
.GlobalDepthOffsetEnableWireframe
= ctx
->Polygon
.OffsetLine
;
623 sf
.GlobalDepthOffsetEnablePoint
= ctx
->Polygon
.OffsetPoint
;
625 switch (ctx
->Polygon
.FrontMode
) {
627 sf
.FrontFaceFillMode
= FILL_MODE_SOLID
;
630 sf
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
633 sf
.FrontFaceFillMode
= FILL_MODE_POINT
;
636 unreachable("not reached");
639 switch (ctx
->Polygon
.BackMode
) {
641 sf
.BackFaceFillMode
= FILL_MODE_SOLID
;
644 sf
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
647 sf
.BackFaceFillMode
= FILL_MODE_POINT
;
650 unreachable("not reached");
653 sf
.ScissorRectangleEnable
= true;
655 if (ctx
->Polygon
.CullFlag
) {
656 switch (ctx
->Polygon
.CullFaceMode
) {
658 sf
.CullMode
= CULLMODE_FRONT
;
661 sf
.CullMode
= CULLMODE_BACK
;
663 case GL_FRONT_AND_BACK
:
664 sf
.CullMode
= CULLMODE_BOTH
;
667 unreachable("not reached");
670 sf
.CullMode
= CULLMODE_NONE
;
674 sf
.LineStippleEnable
= ctx
->Line
.StippleFlag
;
677 if (multisampled_fbo
&& ctx
->Multisample
.Enabled
)
678 sf
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
680 sf
.GlobalDepthOffsetConstant
= ctx
->Polygon
.OffsetUnits
* 2;
681 sf
.GlobalDepthOffsetScale
= ctx
->Polygon
.OffsetFactor
;
682 sf
.GlobalDepthOffsetClamp
= ctx
->Polygon
.OffsetClamp
;
686 sf
.LineWidth
= brw_get_line_width_float(brw
);
688 if (ctx
->Line
.SmoothFlag
) {
689 sf
.LineEndCapAntialiasingRegionWidth
= _10pixels
;
691 sf
.AntiAliasingEnable
= true;
695 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
696 point_size
= CLAMP(ctx
->Point
.Size
, ctx
->Point
.MinSize
, ctx
->Point
.MaxSize
);
697 /* Clamp to the hardware limits */
698 sf
.PointWidth
= CLAMP(point_size
, 0.125f
, 255.875f
);
700 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
701 if (use_state_point_size(brw
))
702 sf
.PointWidthSource
= State
;
705 /* _NEW_POINT | _NEW_MULTISAMPLE */
706 if ((ctx
->Point
.SmoothFlag
|| _mesa_is_multisample_enabled(ctx
)) &&
707 !ctx
->Point
.PointSprite
)
708 sf
.SmoothPointEnable
= true;
711 sf
.AALineDistanceMode
= AALINEDISTANCE_TRUE
;
714 if (ctx
->Light
.ProvokingVertex
!= GL_FIRST_VERTEX_CONVENTION
) {
715 sf
.TriangleStripListProvokingVertexSelect
= 2;
716 sf
.TriangleFanProvokingVertexSelect
= 2;
717 sf
.LineStripListProvokingVertexSelect
= 1;
719 sf
.TriangleFanProvokingVertexSelect
= 1;
723 /* BRW_NEW_FS_PROG_DATA */
724 const struct brw_wm_prog_data
*wm_prog_data
=
725 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
727 sf
.AttributeSwizzleEnable
= true;
728 sf
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
731 * Window coordinates in an FBO are inverted, which means point
732 * sprite origin must be inverted, too.
734 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
) {
735 sf
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
737 sf
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
740 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
741 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
743 uint32_t urb_entry_read_length
;
744 uint32_t urb_entry_read_offset
;
745 uint32_t point_sprite_enables
;
746 genX(calculate_attr_overrides
)(brw
, sf
.Attribute
, &point_sprite_enables
,
747 &urb_entry_read_length
,
748 &urb_entry_read_offset
);
749 sf
.VertexURBEntryReadLength
= urb_entry_read_length
;
750 sf
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
751 sf
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
752 sf
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
757 static const struct brw_tracked_state
genX(sf_state
) = {
764 (GEN_GEN
<= 7 ? _NEW_BUFFERS
| _NEW_POLYGON
: 0),
765 .brw
= BRW_NEW_BLORP
|
767 BRW_NEW_VUE_MAP_GEOM_OUT
|
768 (GEN_GEN
<= 7 ? BRW_NEW_GS_PROG_DATA
|
770 BRW_NEW_TES_PROG_DATA
772 (GEN_GEN
== 6 ? BRW_NEW_FS_PROG_DATA
|
773 BRW_NEW_FRAGMENT_PROGRAM
776 .emit
= genX(upload_sf
),
779 /* ---------------------------------------------------------------------- */
782 genX(upload_wm
)(struct brw_context
*brw
)
784 struct gl_context
*ctx
= &brw
->ctx
;
786 /* BRW_NEW_FS_PROG_DATA */
787 const struct brw_wm_prog_data
*wm_prog_data
=
788 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
790 UNUSED
bool writes_depth
=
791 wm_prog_data
->computed_depth_mode
!= BRW_PSCDEPTH_OFF
;
794 const struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
795 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
797 /* We can't fold this into gen6_upload_wm_push_constants(), because
798 * according to the SNB PRM, vol 2 part 1 section 7.2.2
799 * (3DSTATE_CONSTANT_PS [DevSNB]):
801 * "[DevSNB]: This packet must be followed by WM_STATE."
803 brw_batch_emit(brw
, GENX(3DSTATE_CONSTANT_PS
), wmcp
) {
804 if (wm_prog_data
->base
.nr_params
!= 0) {
805 wmcp
.Buffer0Valid
= true;
806 /* Pointer to the WM constant buffer. Covered by the set of
807 * state flags from gen6_upload_wm_push_constants.
809 wmcp
.PointertoPSConstantBuffer0
= stage_state
->push_const_offset
;
810 wmcp
.PSConstantBuffer0ReadLength
= stage_state
->push_const_size
- 1;
815 brw_batch_emit(brw
, GENX(3DSTATE_WM
), wm
) {
816 wm
.StatisticsEnable
= true;
817 wm
.LineAntialiasingRegionWidth
= _10pixels
;
818 wm
.LineEndCapAntialiasingRegionWidth
= _05pixels
;
821 if (wm_prog_data
->base
.use_alt_mode
)
822 wm
.FloatingPointMode
= Alternate
;
824 wm
.SamplerCount
= DIV_ROUND_UP(stage_state
->sampler_count
, 4);
825 wm
.BindingTableEntryCount
= wm_prog_data
->base
.binding_table
.size_bytes
/ 4;
826 wm
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
827 wm
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
828 wm
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
829 wm
.DispatchGRFStartRegisterForConstantSetupData0
=
830 wm_prog_data
->base
.dispatch_grf_start_reg
;
831 wm
.DispatchGRFStartRegisterForConstantSetupData2
=
832 wm_prog_data
->dispatch_grf_start_reg_2
;
833 wm
.KernelStartPointer0
= stage_state
->prog_offset
;
834 wm
.KernelStartPointer2
= stage_state
->prog_offset
+
835 wm_prog_data
->prog_offset_2
;
836 wm
.DualSourceBlendEnable
=
837 wm_prog_data
->dual_src_blend
&& (ctx
->Color
.BlendEnabled
& 1) &&
838 ctx
->Color
.Blend
[0]._UsesDualSrc
;
839 wm
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
840 wm
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
842 /* From the SNB PRM, volume 2 part 1, page 281:
843 * "If the PS kernel does not need the Position XY Offsets
844 * to compute a Position XY value, then this field should be
845 * programmed to POSOFFSET_NONE."
847 * "SW Recommendation: If the PS kernel needs the Position Offsets
848 * to compute a Position XY value, this field should match Position
849 * ZW Interpolation Mode to ensure a consistent position.xyzw
851 * We only require XY sample offsets. So, this recommendation doesn't
852 * look useful at the moment. We might need this in future.
854 if (wm_prog_data
->uses_pos_offset
)
855 wm
.PositionXYOffsetSelect
= POSOFFSET_SAMPLE
;
857 wm
.PositionXYOffsetSelect
= POSOFFSET_NONE
;
859 if (wm_prog_data
->base
.total_scratch
) {
860 wm
.ScratchSpaceBasePointer
=
861 render_bo(stage_state
->scratch_bo
,
862 ffs(stage_state
->per_thread_scratch
) - 11);
865 wm
.PixelShaderComputedDepth
= writes_depth
;
868 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
871 wm
.LineStippleEnable
= ctx
->Line
.StippleFlag
;
874 wm
.PolygonStippleEnable
= ctx
->Polygon
.StippleFlag
;
875 wm
.BarycentricInterpolationMode
= wm_prog_data
->barycentric_interp_modes
;
879 const bool multisampled_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
881 wm
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
882 wm
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
883 if (wm_prog_data
->uses_kill
||
884 _mesa_is_alpha_test_enabled(ctx
) ||
885 _mesa_is_alpha_to_coverage_enabled(ctx
) ||
886 wm_prog_data
->uses_omask
) {
887 wm
.PixelShaderKillsPixel
= true;
890 /* _NEW_BUFFERS | _NEW_COLOR */
891 if (brw_color_buffer_write_enabled(brw
) || writes_depth
||
892 wm_prog_data
->has_side_effects
|| wm
.PixelShaderKillsPixel
) {
893 wm
.ThreadDispatchEnable
= true;
895 if (multisampled_fbo
) {
896 /* _NEW_MULTISAMPLE */
897 if (ctx
->Multisample
.Enabled
)
898 wm
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
900 wm
.MultisampleRasterizationMode
= MSRASTMODE_OFF_PIXEL
;
902 if (wm_prog_data
->persample_dispatch
)
903 wm
.MultisampleDispatchMode
= MSDISPMODE_PERSAMPLE
;
905 wm
.MultisampleDispatchMode
= MSDISPMODE_PERPIXEL
;
907 wm
.MultisampleRasterizationMode
= MSRASTMODE_OFF_PIXEL
;
908 wm
.MultisampleDispatchMode
= MSDISPMODE_PERSAMPLE
;
912 wm
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
913 wm
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
916 /* The "UAV access enable" bits are unnecessary on HSW because they only
917 * seem to have an effect on the HW-assisted coherency mechanism which we
918 * don't need, and the rasterization-related UAV_ONLY flag and the
919 * DISPATCH_ENABLE bit can be set independently from it.
920 * C.f. gen8_upload_ps_extra().
922 * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
926 if (!(brw_color_buffer_write_enabled(brw
) || writes_depth
) &&
927 wm_prog_data
->has_side_effects
)
933 /* BRW_NEW_FS_PROG_DATA */
934 if (wm_prog_data
->early_fragment_tests
)
935 wm
.EarlyDepthStencilControl
= EDSC_PREPS
;
936 else if (wm_prog_data
->has_side_effects
)
937 wm
.EarlyDepthStencilControl
= EDSC_PSEXEC
;
942 static const struct brw_tracked_state
genX(wm_state
) = {
946 (GEN_GEN
< 8 ? _NEW_BUFFERS
|
950 (GEN_GEN
< 7 ? _NEW_PROGRAM_CONSTANTS
: 0),
951 .brw
= BRW_NEW_BLORP
|
952 BRW_NEW_FS_PROG_DATA
|
953 (GEN_GEN
< 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION
|
957 .emit
= genX(upload_wm
),
960 /* ---------------------------------------------------------------------- */
962 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
963 pkt.KernelStartPointer = stage_state->prog_offset; \
965 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
966 pkt.BindingTableEntryCount = \
967 stage_prog_data->binding_table.size_bytes / 4; \
968 pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
970 if (stage_prog_data->total_scratch) { \
971 pkt.ScratchSpaceBasePointer = \
972 render_bo(stage_state->scratch_bo, 0); \
973 pkt.PerThreadScratchSpace = \
974 ffs(stage_state->per_thread_scratch) - 11; \
977 pkt.DispatchGRFStartRegisterForURBData = \
978 stage_prog_data->dispatch_grf_start_reg; \
979 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
980 pkt.prefix##URBEntryReadOffset = 0; \
982 pkt.StatisticsEnable = true; \
987 genX(upload_vs_state
)(struct brw_context
*brw
)
989 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
990 const struct brw_stage_state
*stage_state
= &brw
->vs
.base
;
992 /* BRW_NEW_VS_PROG_DATA */
993 const struct brw_vue_prog_data
*vue_prog_data
=
994 brw_vue_prog_data(brw
->vs
.base
.prog_data
);
995 const struct brw_stage_prog_data
*stage_prog_data
= &vue_prog_data
->base
;
997 assert(vue_prog_data
->dispatch_mode
== DISPATCH_MODE_SIMD8
||
998 vue_prog_data
->dispatch_mode
== DISPATCH_MODE_4X2_DUAL_OBJECT
);
1000 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1001 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1003 * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
1004 * command that causes the VS Function Enable to toggle. Pipeline
1005 * flush can be executed by sending a PIPE_CONTROL command with CS
1006 * stall bit set and a post sync operation.
1008 * We've already done such a flush at the start of state upload, so we
1009 * don't need to do another one here.
1013 brw_batch_emit(brw
, GENX(3DSTATE_CONSTANT_VS
), cvs
) {
1014 if (stage_state
->push_const_size
!= 0) {
1015 cvs
.Buffer0Valid
= true;
1016 cvs
.PointertoVSConstantBuffer0
= stage_state
->push_const_offset
;
1017 cvs
.VSConstantBuffer0ReadLength
= stage_state
->push_const_size
- 1;
1022 if (GEN_GEN
== 7 && devinfo
->is_ivybridge
)
1023 gen7_emit_vs_workaround_flush(brw
);
1025 brw_batch_emit(brw
, GENX(3DSTATE_VS
), vs
) {
1026 INIT_THREAD_DISPATCH_FIELDS(vs
, Vertex
);
1028 vs
.MaximumNumberofThreads
= devinfo
->max_vs_threads
- 1;
1031 vs
.SIMD8DispatchEnable
=
1032 vue_prog_data
->dispatch_mode
== DISPATCH_MODE_SIMD8
;
1034 vs
.UserClipDistanceCullTestEnableBitmask
=
1035 vue_prog_data
->cull_distance_mask
;
1040 /* Based on my reading of the simulator, the VS constants don't get
1041 * pulled into the VS FF unit until an appropriate pipeline flush
1042 * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
1043 * references to them into a little FIFO. The flushes are common,
1044 * but don't reliably happen between this and a 3DPRIMITIVE, causing
1045 * the primitive to use the wrong constants. Then the FIFO
1046 * containing the constant setup gets added to again on the next
1047 * constants change, and eventually when a flush does happen the
1048 * unit is overwhelmed by constant changes and dies.
1050 * To avoid this, send a PIPE_CONTROL down the line that will
1051 * update the unit immediately loading the constants. The flush
1052 * type bits here were those set by the STATE_BASE_ADDRESS whose
1053 * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
1054 * bug reports that led to this workaround, and may be more than
1055 * what is strictly required to avoid the issue.
1057 brw_emit_pipe_control_flush(brw
,
1058 PIPE_CONTROL_DEPTH_STALL
|
1059 PIPE_CONTROL_INSTRUCTION_INVALIDATE
|
1060 PIPE_CONTROL_STATE_CACHE_INVALIDATE
);
1064 static const struct brw_tracked_state
genX(vs_state
) = {
1066 .mesa
= (GEN_GEN
< 7 ? (_NEW_PROGRAM_CONSTANTS
| _NEW_TRANSFORM
) : 0),
1067 .brw
= BRW_NEW_BATCH
|
1070 BRW_NEW_VS_PROG_DATA
|
1071 (GEN_GEN
< 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION
|
1072 BRW_NEW_VERTEX_PROGRAM
1075 .emit
= genX(upload_vs_state
),
1080 /* ---------------------------------------------------------------------- */
1084 genX(upload_sbe
)(struct brw_context
*brw
)
1086 struct gl_context
*ctx
= &brw
->ctx
;
1087 /* BRW_NEW_FS_PROG_DATA */
1088 const struct brw_wm_prog_data
*wm_prog_data
=
1089 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1091 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attr_overrides
[16] = { { 0 } };
1093 #define attr_overrides sbe.Attribute
1095 uint32_t urb_entry_read_length
;
1096 uint32_t urb_entry_read_offset
;
1097 uint32_t point_sprite_enables
;
1099 brw_batch_emit(brw
, GENX(3DSTATE_SBE
), sbe
) {
1100 sbe
.AttributeSwizzleEnable
= true;
1101 sbe
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
1104 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
1108 * Window coordinates in an FBO are inverted, which means point
1109 * sprite origin must be inverted.
1111 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
)
1112 sbe
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
1114 sbe
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
1116 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
1117 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
1118 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
1119 * BRW_NEW_VUE_MAP_GEOM_OUT
1121 genX(calculate_attr_overrides
)(brw
,
1123 &point_sprite_enables
,
1124 &urb_entry_read_length
,
1125 &urb_entry_read_offset
);
1127 /* Typically, the URB entry read length and offset should be programmed
1128 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
1129 * stage which produces geometry. However, we don't know the proper
1130 * value until we call calculate_attr_overrides().
1132 * To fit with our existing code, we override the inherited values and
1133 * specify it here directly, as we did on previous generations.
1135 sbe
.VertexURBEntryReadLength
= urb_entry_read_length
;
1136 sbe
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
1137 sbe
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
1138 sbe
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
1141 sbe
.ForceVertexURBEntryReadLength
= true;
1142 sbe
.ForceVertexURBEntryReadOffset
= true;
1146 /* prepare the active component dwords */
1147 int input_index
= 0;
1148 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
1149 if (!(brw
->fragment_program
->info
.inputs_read
&
1150 BITFIELD64_BIT(attr
))) {
1154 assert(input_index
< 32);
1156 sbe
.AttributeActiveComponentFormat
[input_index
] = ACTIVE_COMPONENT_XYZW
;
1163 brw_batch_emit(brw
, GENX(3DSTATE_SBE_SWIZ
), sbes
) {
1164 for (int i
= 0; i
< 16; i
++)
1165 sbes
.Attribute
[i
] = attr_overrides
[i
];
1169 #undef attr_overrides
1172 static const struct brw_tracked_state
genX(sbe_state
) = {
1174 .mesa
= _NEW_BUFFERS
|
1179 .brw
= BRW_NEW_BLORP
|
1181 BRW_NEW_FRAGMENT_PROGRAM
|
1182 BRW_NEW_FS_PROG_DATA
|
1183 BRW_NEW_GS_PROG_DATA
|
1184 BRW_NEW_TES_PROG_DATA
|
1185 BRW_NEW_VUE_MAP_GEOM_OUT
|
1186 (GEN_GEN
== 7 ? BRW_NEW_PRIMITIVE
1189 .emit
= genX(upload_sbe
),
1192 /* ---------------------------------------------------------------------- */
1195 * Outputs the 3DSTATE_SO_DECL_LIST command.
1197 * The data output is a series of 64-bit entries containing a SO_DECL per
1198 * stream. We only have one stream of rendering coming out of the GS unit, so
1199 * we only emit stream 0 (low 16 bits) SO_DECLs.
1202 genX(upload_3dstate_so_decl_list
)(struct brw_context
*brw
,
1203 const struct brw_vue_map
*vue_map
)
1205 struct gl_context
*ctx
= &brw
->ctx
;
1206 /* BRW_NEW_TRANSFORM_FEEDBACK */
1207 struct gl_transform_feedback_object
*xfb_obj
=
1208 ctx
->TransformFeedback
.CurrentObject
;
1209 const struct gl_transform_feedback_info
*linked_xfb_info
=
1210 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1211 struct GENX(SO_DECL
) so_decl
[MAX_VERTEX_STREAMS
][128];
1212 int buffer_mask
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1213 int next_offset
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1214 int decls
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1216 STATIC_ASSERT(ARRAY_SIZE(so_decl
[0]) >= MAX_PROGRAM_OUTPUTS
);
1218 memset(so_decl
, 0, sizeof(so_decl
));
1220 /* Construct the list of SO_DECLs to be emitted. The formatting of the
1221 * command feels strange -- each dword pair contains a SO_DECL per stream.
1223 for (unsigned i
= 0; i
< linked_xfb_info
->NumOutputs
; i
++) {
1224 int buffer
= linked_xfb_info
->Outputs
[i
].OutputBuffer
;
1225 struct GENX(SO_DECL
) decl
= {0};
1226 int varying
= linked_xfb_info
->Outputs
[i
].OutputRegister
;
1227 const unsigned components
= linked_xfb_info
->Outputs
[i
].NumComponents
;
1228 unsigned component_mask
= (1 << components
) - 1;
1229 unsigned stream_id
= linked_xfb_info
->Outputs
[i
].StreamId
;
1230 unsigned decl_buffer_slot
= buffer
;
1231 assert(stream_id
< MAX_VERTEX_STREAMS
);
1233 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
1234 * gl_Layer is stored in VARYING_SLOT_PSIZ.y
1235 * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
1237 if (varying
== VARYING_SLOT_PSIZ
) {
1238 assert(components
== 1);
1239 component_mask
<<= 3;
1240 } else if (varying
== VARYING_SLOT_LAYER
) {
1241 assert(components
== 1);
1242 component_mask
<<= 1;
1243 } else if (varying
== VARYING_SLOT_VIEWPORT
) {
1244 assert(components
== 1);
1245 component_mask
<<= 2;
1247 component_mask
<<= linked_xfb_info
->Outputs
[i
].ComponentOffset
;
1250 buffer_mask
[stream_id
] |= 1 << buffer
;
1252 decl
.OutputBufferSlot
= decl_buffer_slot
;
1253 if (varying
== VARYING_SLOT_LAYER
|| varying
== VARYING_SLOT_VIEWPORT
) {
1254 decl
.RegisterIndex
= vue_map
->varying_to_slot
[VARYING_SLOT_PSIZ
];
1256 assert(vue_map
->varying_to_slot
[varying
] >= 0);
1257 decl
.RegisterIndex
= vue_map
->varying_to_slot
[varying
];
1259 decl
.ComponentMask
= component_mask
;
1261 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
1262 * array. Instead, it simply increments DstOffset for the following
1263 * input by the number of components that should be skipped.
1265 * Our hardware is unusual in that it requires us to program SO_DECLs
1266 * for fake "hole" components, rather than simply taking the offset
1267 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1268 * program as many size = 4 holes as we can, then a final hole to
1269 * accommodate the final 1, 2, or 3 remaining.
1271 int skip_components
=
1272 linked_xfb_info
->Outputs
[i
].DstOffset
- next_offset
[buffer
];
1274 next_offset
[buffer
] += skip_components
;
1276 while (skip_components
>= 4) {
1277 struct GENX(SO_DECL
) *d
= &so_decl
[stream_id
][decls
[stream_id
]++];
1279 d
->OutputBufferSlot
= decl_buffer_slot
;
1280 d
->ComponentMask
= 0xf;
1281 skip_components
-= 4;
1284 if (skip_components
> 0) {
1285 struct GENX(SO_DECL
) *d
= &so_decl
[stream_id
][decls
[stream_id
]++];
1287 d
->OutputBufferSlot
= decl_buffer_slot
;
1288 d
->ComponentMask
= (1 << skip_components
) - 1;
1291 assert(linked_xfb_info
->Outputs
[i
].DstOffset
== next_offset
[buffer
]);
1293 next_offset
[buffer
] += components
;
1295 so_decl
[stream_id
][decls
[stream_id
]++] = decl
;
1297 if (decls
[stream_id
] > max_decls
)
1298 max_decls
= decls
[stream_id
];
1302 dw
= brw_batch_emitn(brw
, GENX(3DSTATE_SO_DECL_LIST
), 3 + 2 * max_decls
,
1303 .StreamtoBufferSelects0
= buffer_mask
[0],
1304 .StreamtoBufferSelects1
= buffer_mask
[1],
1305 .StreamtoBufferSelects2
= buffer_mask
[2],
1306 .StreamtoBufferSelects3
= buffer_mask
[3],
1307 .NumEntries0
= decls
[0],
1308 .NumEntries1
= decls
[1],
1309 .NumEntries2
= decls
[2],
1310 .NumEntries3
= decls
[3]);
1312 for (int i
= 0; i
< max_decls
; i
++) {
1313 GENX(SO_DECL_ENTRY_pack
)(
1314 brw
, dw
+ 2 + i
* 2,
1315 &(struct GENX(SO_DECL_ENTRY
)) {
1316 .Stream0Decl
= so_decl
[0][i
],
1317 .Stream1Decl
= so_decl
[1][i
],
1318 .Stream2Decl
= so_decl
[2][i
],
1319 .Stream3Decl
= so_decl
[3][i
],
1325 genX(upload_3dstate_so_buffers
)(struct brw_context
*brw
)
1327 struct gl_context
*ctx
= &brw
->ctx
;
1328 /* BRW_NEW_TRANSFORM_FEEDBACK */
1329 struct gl_transform_feedback_object
*xfb_obj
=
1330 ctx
->TransformFeedback
.CurrentObject
;
1332 const struct gl_transform_feedback_info
*linked_xfb_info
=
1333 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1335 struct brw_transform_feedback_object
*brw_obj
=
1336 (struct brw_transform_feedback_object
*) xfb_obj
;
1337 uint32_t mocs_wb
= brw
->gen
>= 9 ? SKL_MOCS_WB
: BDW_MOCS_WB
;
1340 /* Set up the up to 4 output buffers. These are the ranges defined in the
1341 * gl_transform_feedback_object.
1343 for (int i
= 0; i
< 4; i
++) {
1344 struct intel_buffer_object
*bufferobj
=
1345 intel_buffer_object(xfb_obj
->Buffers
[i
]);
1348 brw_batch_emit(brw
, GENX(3DSTATE_SO_BUFFER
), sob
) {
1349 sob
.SOBufferIndex
= i
;
1354 uint32_t start
= xfb_obj
->Offset
[i
];
1355 assert(start
% 4 == 0);
1356 uint32_t end
= ALIGN(start
+ xfb_obj
->Size
[i
], 4);
1358 intel_bufferobj_buffer(brw
, bufferobj
, start
, end
- start
);
1359 assert(end
<= bo
->size
);
1361 brw_batch_emit(brw
, GENX(3DSTATE_SO_BUFFER
), sob
) {
1362 sob
.SOBufferIndex
= i
;
1364 sob
.SurfaceBaseAddress
= render_bo(bo
, start
);
1366 sob
.SurfacePitch
= linked_xfb_info
->Buffers
[i
].Stride
* 4;
1367 sob
.SurfaceEndAddress
= render_bo(bo
, end
);
1369 sob
.SOBufferEnable
= true;
1370 sob
.StreamOffsetWriteEnable
= true;
1371 sob
.StreamOutputBufferOffsetAddressEnable
= true;
1372 sob
.SOBufferMOCS
= mocs_wb
;
1374 sob
.SurfaceSize
= MAX2(xfb_obj
->Size
[i
] / 4, 1) - 1;
1375 sob
.StreamOutputBufferOffsetAddress
=
1376 instruction_bo(brw_obj
->offset_bo
, i
* sizeof(uint32_t));
1378 if (brw_obj
->zero_offsets
) {
1379 /* Zero out the offset and write that to offset_bo */
1380 sob
.StreamOffset
= 0;
1382 /* Use offset_bo as the "Stream Offset." */
1383 sob
.StreamOffset
= 0xFFFFFFFF;
1390 brw_obj
->zero_offsets
= false;
1395 query_active(struct gl_query_object
*q
)
1397 return q
&& q
->Active
;
1401 genX(upload_3dstate_streamout
)(struct brw_context
*brw
, bool active
,
1402 const struct brw_vue_map
*vue_map
)
1404 struct gl_context
*ctx
= &brw
->ctx
;
1405 /* BRW_NEW_TRANSFORM_FEEDBACK */
1406 struct gl_transform_feedback_object
*xfb_obj
=
1407 ctx
->TransformFeedback
.CurrentObject
;
1409 brw_batch_emit(brw
, GENX(3DSTATE_STREAMOUT
), sos
) {
1411 int urb_entry_read_offset
= 0;
1412 int urb_entry_read_length
= (vue_map
->num_slots
+ 1) / 2 -
1413 urb_entry_read_offset
;
1415 sos
.SOFunctionEnable
= true;
1416 sos
.SOStatisticsEnable
= true;
1418 /* BRW_NEW_RASTERIZER_DISCARD */
1419 if (ctx
->RasterDiscard
) {
1420 if (!query_active(ctx
->Query
.PrimitivesGenerated
[0])) {
1421 sos
.RenderingDisable
= true;
1423 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
1424 "query active relies on the clipper.");
1429 if (ctx
->Light
.ProvokingVertex
!= GL_FIRST_VERTEX_CONVENTION
)
1430 sos
.ReorderMode
= TRAILING
;
1433 sos
.SOBufferEnable0
= xfb_obj
->Buffers
[0] != NULL
;
1434 sos
.SOBufferEnable1
= xfb_obj
->Buffers
[1] != NULL
;
1435 sos
.SOBufferEnable2
= xfb_obj
->Buffers
[2] != NULL
;
1436 sos
.SOBufferEnable3
= xfb_obj
->Buffers
[3] != NULL
;
1438 const struct gl_transform_feedback_info
*linked_xfb_info
=
1439 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1440 /* Set buffer pitches; 0 means unbound. */
1441 if (xfb_obj
->Buffers
[0])
1442 sos
.Buffer0SurfacePitch
= linked_xfb_info
->Buffers
[0].Stride
* 4;
1443 if (xfb_obj
->Buffers
[1])
1444 sos
.Buffer1SurfacePitch
= linked_xfb_info
->Buffers
[1].Stride
* 4;
1445 if (xfb_obj
->Buffers
[2])
1446 sos
.Buffer2SurfacePitch
= linked_xfb_info
->Buffers
[2].Stride
* 4;
1447 if (xfb_obj
->Buffers
[3])
1448 sos
.Buffer3SurfacePitch
= linked_xfb_info
->Buffers
[3].Stride
* 4;
1451 /* We always read the whole vertex. This could be reduced at some
1452 * point by reading less and offsetting the register index in the
1455 sos
.Stream0VertexReadOffset
= urb_entry_read_offset
;
1456 sos
.Stream0VertexReadLength
= urb_entry_read_length
- 1;
1457 sos
.Stream1VertexReadOffset
= urb_entry_read_offset
;
1458 sos
.Stream1VertexReadLength
= urb_entry_read_length
- 1;
1459 sos
.Stream2VertexReadOffset
= urb_entry_read_offset
;
1460 sos
.Stream2VertexReadLength
= urb_entry_read_length
- 1;
1461 sos
.Stream3VertexReadOffset
= urb_entry_read_offset
;
1462 sos
.Stream3VertexReadLength
= urb_entry_read_length
- 1;
1468 genX(upload_sol
)(struct brw_context
*brw
)
1470 struct gl_context
*ctx
= &brw
->ctx
;
1471 /* BRW_NEW_TRANSFORM_FEEDBACK */
1472 bool active
= _mesa_is_xfb_active_and_unpaused(ctx
);
1475 genX(upload_3dstate_so_buffers
)(brw
);
1477 /* BRW_NEW_VUE_MAP_GEOM_OUT */
1478 genX(upload_3dstate_so_decl_list
)(brw
, &brw
->vue_map_geom_out
);
1481 /* Finally, set up the SOL stage. This command must always follow updates to
1482 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
1483 * MMIO register updates (current performed by the kernel at each batch
1486 genX(upload_3dstate_streamout
)(brw
, active
, &brw
->vue_map_geom_out
);
1489 static const struct brw_tracked_state
genX(sol_state
) = {
1492 .brw
= BRW_NEW_BATCH
|
1494 BRW_NEW_RASTERIZER_DISCARD
|
1495 BRW_NEW_VUE_MAP_GEOM_OUT
|
1496 BRW_NEW_TRANSFORM_FEEDBACK
,
1498 .emit
= genX(upload_sol
),
1501 /* ---------------------------------------------------------------------- */
1504 genX(upload_ps
)(struct brw_context
*brw
)
1506 UNUSED
const struct gl_context
*ctx
= &brw
->ctx
;
1507 UNUSED
const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1509 /* BRW_NEW_FS_PROG_DATA */
1510 const struct brw_wm_prog_data
*prog_data
=
1511 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1512 const struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
1517 brw_batch_emit(brw
, GENX(3DSTATE_PS
), ps
) {
1518 /* Initialize the execution mask with VMask. Otherwise, derivatives are
1519 * incorrect for subspans where some of the pixels are unlit. We believe
1520 * the bit just didn't take effect in previous generations.
1522 ps
.VectorMaskEnable
= GEN_GEN
>= 8;
1525 DIV_ROUND_UP(CLAMP(stage_state
->sampler_count
, 0, 16), 4);
1527 /* BRW_NEW_FS_PROG_DATA */
1528 ps
.BindingTableEntryCount
= prog_data
->base
.binding_table
.size_bytes
/ 4;
1530 if (prog_data
->base
.use_alt_mode
)
1531 ps
.FloatingPointMode
= Alternate
;
1533 /* Haswell requires the sample mask to be set in this packet as well as
1534 * in 3DSTATE_SAMPLE_MASK; the values should match.
1537 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
1539 ps
.SampleMask
= gen6_determine_sample_mask(brw
);
1542 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
1543 * it implicitly scales for different GT levels (which have some # of
1546 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
1549 ps
.MaximumNumberofThreadsPerPSD
= 64 - 1;
1551 ps
.MaximumNumberofThreadsPerPSD
= 64 - 2;
1553 ps
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
1556 if (prog_data
->base
.nr_params
> 0)
1557 ps
.PushConstantEnable
= true;
1560 /* From the IVB PRM, volume 2 part 1, page 287:
1561 * "This bit is inserted in the PS payload header and made available to
1562 * the DataPort (either via the message header or via header bypass) to
1563 * indicate that oMask data (one or two phases) is included in Render
1564 * Target Write messages. If present, the oMask data is used to mask off
1567 ps
.oMaskPresenttoRenderTarget
= prog_data
->uses_omask
;
1569 /* The hardware wedges if you have this bit set but don't turn on any
1570 * dual source blend factors.
1572 * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
1574 ps
.DualSourceBlendEnable
= prog_data
->dual_src_blend
&&
1575 (ctx
->Color
.BlendEnabled
& 1) &&
1576 ctx
->Color
.Blend
[0]._UsesDualSrc
;
1578 /* BRW_NEW_FS_PROG_DATA */
1579 ps
.AttributeEnable
= (prog_data
->num_varying_inputs
!= 0);
1582 /* From the documentation for this packet:
1583 * "If the PS kernel does not need the Position XY Offsets to
1584 * compute a Position Value, then this field should be programmed
1585 * to POSOFFSET_NONE."
1587 * "SW Recommendation: If the PS kernel needs the Position Offsets
1588 * to compute a Position XY value, this field should match Position
1589 * ZW Interpolation Mode to ensure a consistent position.xyzw
1592 * We only require XY sample offsets. So, this recommendation doesn't
1593 * look useful at the moment. We might need this in future.
1595 if (prog_data
->uses_pos_offset
)
1596 ps
.PositionXYOffsetSelect
= POSOFFSET_SAMPLE
;
1598 ps
.PositionXYOffsetSelect
= POSOFFSET_NONE
;
1600 ps
.RenderTargetFastClearEnable
= brw
->wm
.fast_clear_op
;
1601 ps
._8PixelDispatchEnable
= prog_data
->dispatch_8
;
1602 ps
._16PixelDispatchEnable
= prog_data
->dispatch_16
;
1603 ps
.DispatchGRFStartRegisterForConstantSetupData0
=
1604 prog_data
->base
.dispatch_grf_start_reg
;
1605 ps
.DispatchGRFStartRegisterForConstantSetupData2
=
1606 prog_data
->dispatch_grf_start_reg_2
;
1608 ps
.KernelStartPointer0
= stage_state
->prog_offset
;
1609 ps
.KernelStartPointer2
= stage_state
->prog_offset
+
1610 prog_data
->prog_offset_2
;
1612 if (prog_data
->base
.total_scratch
) {
1613 ps
.ScratchSpaceBasePointer
=
1614 render_bo(stage_state
->scratch_bo
,
1615 ffs(stage_state
->per_thread_scratch
) - 11);
1620 static const struct brw_tracked_state
genX(ps_state
) = {
1622 .mesa
= _NEW_MULTISAMPLE
|
1623 (GEN_GEN
< 8 ? _NEW_BUFFERS
|
1626 .brw
= BRW_NEW_BATCH
|
1628 BRW_NEW_FS_PROG_DATA
,
1630 .emit
= genX(upload_ps
),
1635 /* ---------------------------------------------------------------------- */
1639 genX(upload_raster
)(struct brw_context
*brw
)
1641 struct gl_context
*ctx
= &brw
->ctx
;
1644 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
1647 struct gl_polygon_attrib
*polygon
= &ctx
->Polygon
;
1650 struct gl_point_attrib
*point
= &ctx
->Point
;
1652 brw_batch_emit(brw
, GENX(3DSTATE_RASTER
), raster
) {
1653 if (polygon
->_FrontBit
== render_to_fbo
)
1654 raster
.FrontWinding
= CounterClockwise
;
1656 if (polygon
->CullFlag
) {
1657 switch (polygon
->CullFaceMode
) {
1659 raster
.CullMode
= CULLMODE_FRONT
;
1662 raster
.CullMode
= CULLMODE_BACK
;
1664 case GL_FRONT_AND_BACK
:
1665 raster
.CullMode
= CULLMODE_BOTH
;
1668 unreachable("not reached");
1671 raster
.CullMode
= CULLMODE_NONE
;
1674 point
->SmoothFlag
= raster
.SmoothPointEnable
;
1676 raster
.DXMultisampleRasterizationEnable
=
1677 _mesa_is_multisample_enabled(ctx
);
1679 raster
.GlobalDepthOffsetEnableSolid
= polygon
->OffsetFill
;
1680 raster
.GlobalDepthOffsetEnableWireframe
= polygon
->OffsetLine
;
1681 raster
.GlobalDepthOffsetEnablePoint
= polygon
->OffsetPoint
;
1683 switch (polygon
->FrontMode
) {
1685 raster
.FrontFaceFillMode
= FILL_MODE_SOLID
;
1688 raster
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
1691 raster
.FrontFaceFillMode
= FILL_MODE_POINT
;
1694 unreachable("not reached");
1697 switch (polygon
->BackMode
) {
1699 raster
.BackFaceFillMode
= FILL_MODE_SOLID
;
1702 raster
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
1705 raster
.BackFaceFillMode
= FILL_MODE_POINT
;
1708 unreachable("not reached");
1712 raster
.AntialiasingEnable
= ctx
->Line
.SmoothFlag
;
1715 raster
.ScissorRectangleEnable
= ctx
->Scissor
.EnableFlags
;
1717 /* _NEW_TRANSFORM */
1718 if (!ctx
->Transform
.DepthClamp
) {
1720 raster
.ViewportZFarClipTestEnable
= true;
1721 raster
.ViewportZNearClipTestEnable
= true;
1723 raster
.ViewportZClipTestEnable
= true;
1727 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
1729 raster
.ConservativeRasterizationEnable
=
1730 ctx
->IntelConservativeRasterization
;
1733 raster
.GlobalDepthOffsetClamp
= polygon
->OffsetClamp
;
1734 raster
.GlobalDepthOffsetScale
= polygon
->OffsetFactor
;
1736 raster
.GlobalDepthOffsetConstant
= polygon
->OffsetUnits
* 2;
1740 static const struct brw_tracked_state
genX(raster_state
) = {
1742 .mesa
= _NEW_BUFFERS
|
1749 .brw
= BRW_NEW_BLORP
|
1751 BRW_NEW_CONSERVATIVE_RASTERIZATION
,
1753 .emit
= genX(upload_raster
),
1756 /* ---------------------------------------------------------------------- */
1759 genX(upload_ps_extra
)(struct brw_context
*brw
)
1761 UNUSED
struct gl_context
*ctx
= &brw
->ctx
;
1763 const struct brw_wm_prog_data
*prog_data
=
1764 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1766 brw_batch_emit(brw
, GENX(3DSTATE_PS_EXTRA
), psx
) {
1767 psx
.PixelShaderValid
= true;
1768 psx
.PixelShaderComputedDepthMode
= prog_data
->computed_depth_mode
;
1769 psx
.PixelShaderKillsPixel
= prog_data
->uses_kill
;
1770 psx
.AttributeEnable
= prog_data
->num_varying_inputs
!= 0;
1771 psx
.PixelShaderUsesSourceDepth
= prog_data
->uses_src_depth
;
1772 psx
.PixelShaderUsesSourceW
= prog_data
->uses_src_w
;
1773 psx
.PixelShaderIsPerSample
= prog_data
->persample_dispatch
;
1775 /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
1776 if (prog_data
->uses_sample_mask
) {
1778 if (prog_data
->post_depth_coverage
)
1779 psx
.InputCoverageMaskState
= ICMS_DEPTH_COVERAGE
;
1780 else if (prog_data
->inner_coverage
&& ctx
->IntelConservativeRasterization
)
1781 psx
.InputCoverageMaskState
= ICMS_INNER_CONSERVATIVE
;
1783 psx
.InputCoverageMaskState
= ICMS_NORMAL
;
1785 psx
.PixelShaderUsesInputCoverageMask
= true;
1789 psx
.oMaskPresenttoRenderTarget
= prog_data
->uses_omask
;
1791 psx
.PixelShaderPullsBary
= prog_data
->pulls_bary
;
1792 psx
.PixelShaderComputesStencil
= prog_data
->computed_stencil
;
1795 /* The stricter cross-primitive coherency guarantees that the hardware
1796 * gives us with the "Accesses UAV" bit set for at least one shader stage
1797 * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
1798 * are redundant within the current image, atomic counter and SSBO GL
1799 * APIs, which all have very loose ordering and coherency requirements
1800 * and generally rely on the application to insert explicit barriers when
1801 * a shader invocation is expected to see the memory writes performed by
1802 * the invocations of some previous primitive. Regardless of the value
1803 * of "UAV coherency required", the "Accesses UAV" bits will implicitly
1804 * cause an in most cases useless DC flush when the lowermost stage with
1805 * the bit set finishes execution.
1807 * It would be nice to disable it, but in some cases we can't because on
1808 * Gen8+ it also has an influence on rasterization via the PS UAV-only
1809 * signal (which could be set independently from the coherency mechanism
1810 * in the 3DSTATE_WM command on Gen7), and because in some cases it will
1811 * determine whether the hardware skips execution of the fragment shader
1812 * or not via the ThreadDispatchEnable signal. However if we know that
1813 * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
1814 * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
1815 * difference so we may just disable it here.
1817 * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
1818 * take into account KillPixels when no depth or stencil writes are
1819 * enabled. In order for occlusion queries to work correctly with no
1820 * attachments, we need to force-enable here.
1822 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
1825 if ((prog_data
->has_side_effects
|| prog_data
->uses_kill
) &&
1826 !brw_color_buffer_write_enabled(brw
))
1827 psx
.PixelShaderHasUAV
= true;
1831 const struct brw_tracked_state
genX(ps_extra
) = {
1833 .mesa
= _NEW_BUFFERS
| _NEW_COLOR
,
1834 .brw
= BRW_NEW_BLORP
|
1836 BRW_NEW_FRAGMENT_PROGRAM
|
1837 BRW_NEW_FS_PROG_DATA
|
1838 BRW_NEW_CONSERVATIVE_RASTERIZATION
,
1840 .emit
= genX(upload_ps_extra
),
1844 /* ---------------------------------------------------------------------- */
1847 genX(init_atoms
)(struct brw_context
*brw
)
1850 static const struct brw_tracked_state
*render_atoms
[] =
1852 /* Once all the programs are done, we know how large urb entry
1853 * sizes need to be and can decide if we need to change the urb
1857 &brw_recalculate_urb_fence
,
1862 /* Surface state setup. Must come before the VS/WM unit. The binding
1863 * table upload must be last.
1865 &brw_vs_pull_constants
,
1866 &brw_wm_pull_constants
,
1867 &brw_renderbuffer_surfaces
,
1868 &brw_renderbuffer_read_surfaces
,
1869 &brw_texture_surfaces
,
1870 &brw_vs_binding_table
,
1871 &brw_wm_binding_table
,
1876 /* These set up state for brw_psp_urb_cbs */
1880 &brw_vs_unit
, /* always required, enabled or not */
1886 &brw_invariant_state
,
1888 &brw_binding_table_pointers
,
1889 &brw_blend_constant_color
,
1893 &brw_polygon_stipple
,
1894 &brw_polygon_stipple_offset
,
1901 &brw_indices
, /* must come before brw_vertices */
1905 &brw_constant_buffer
1908 static const struct brw_tracked_state
*render_atoms
[] =
1910 &gen6_sf_and_clip_viewports
,
1912 /* Command packets: */
1915 &gen6_viewport_state
, /* must do after *_vp stages */
1918 &gen6_blend_state
, /* must do before cc unit */
1919 &gen6_color_calc_state
, /* must do before cc unit */
1920 &gen6_depth_stencil_state
, /* must do before cc unit */
1922 &gen6_vs_push_constants
, /* Before vs_state */
1923 &gen6_gs_push_constants
, /* Before gs_state */
1924 &gen6_wm_push_constants
, /* Before wm_state */
1926 /* Surface state setup. Must come before the VS/WM unit. The binding
1927 * table upload must be last.
1929 &brw_vs_pull_constants
,
1930 &brw_vs_ubo_surfaces
,
1931 &brw_gs_pull_constants
,
1932 &brw_gs_ubo_surfaces
,
1933 &brw_wm_pull_constants
,
1934 &brw_wm_ubo_surfaces
,
1935 &gen6_renderbuffer_surfaces
,
1936 &brw_renderbuffer_read_surfaces
,
1937 &brw_texture_surfaces
,
1939 &brw_vs_binding_table
,
1940 &gen6_gs_binding_table
,
1941 &brw_wm_binding_table
,
1946 &gen6_sampler_state
,
1947 &gen6_multisample_state
,
1955 &gen6_scissor_state
,
1957 &gen6_binding_table_pointers
,
1961 &brw_polygon_stipple
,
1962 &brw_polygon_stipple_offset
,
1968 &brw_indices
, /* must come before brw_vertices */
1973 static const struct brw_tracked_state
*render_atoms
[] =
1975 /* Command packets: */
1978 &gen7_sf_clip_viewport
,
1981 &gen7_push_constant_space
,
1983 &gen6_blend_state
, /* must do before cc unit */
1984 &gen6_color_calc_state
, /* must do before cc unit */
1985 &genX(depth_stencil_state
), /* must do before cc unit */
1987 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
1988 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
1989 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
1990 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
1991 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
1993 &gen6_vs_push_constants
, /* Before vs_state */
1994 &gen7_tcs_push_constants
,
1995 &gen7_tes_push_constants
,
1996 &gen6_gs_push_constants
, /* Before gs_state */
1997 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
1999 /* Surface state setup. Must come before the VS/WM unit. The binding
2000 * table upload must be last.
2002 &brw_vs_pull_constants
,
2003 &brw_vs_ubo_surfaces
,
2004 &brw_vs_abo_surfaces
,
2005 &brw_tcs_pull_constants
,
2006 &brw_tcs_ubo_surfaces
,
2007 &brw_tcs_abo_surfaces
,
2008 &brw_tes_pull_constants
,
2009 &brw_tes_ubo_surfaces
,
2010 &brw_tes_abo_surfaces
,
2011 &brw_gs_pull_constants
,
2012 &brw_gs_ubo_surfaces
,
2013 &brw_gs_abo_surfaces
,
2014 &brw_wm_pull_constants
,
2015 &brw_wm_ubo_surfaces
,
2016 &brw_wm_abo_surfaces
,
2017 &gen6_renderbuffer_surfaces
,
2018 &brw_renderbuffer_read_surfaces
,
2019 &brw_texture_surfaces
,
2020 &brw_vs_binding_table
,
2021 &brw_tcs_binding_table
,
2022 &brw_tes_binding_table
,
2023 &brw_gs_binding_table
,
2024 &brw_wm_binding_table
,
2031 &gen6_multisample_state
,
2045 &gen6_scissor_state
,
2049 &brw_polygon_stipple
,
2050 &brw_polygon_stipple_offset
,
2056 &brw_indices
, /* must come before brw_vertices */
2063 static const struct brw_tracked_state
*render_atoms
[] =
2066 &gen8_sf_clip_viewport
,
2069 &gen7_push_constant_space
,
2072 &gen6_color_calc_state
,
2074 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
2075 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
2076 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
2077 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
2078 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
2080 &gen6_vs_push_constants
, /* Before vs_state */
2081 &gen7_tcs_push_constants
,
2082 &gen7_tes_push_constants
,
2083 &gen6_gs_push_constants
, /* Before gs_state */
2084 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
2086 /* Surface state setup. Must come before the VS/WM unit. The binding
2087 * table upload must be last.
2089 &brw_vs_pull_constants
,
2090 &brw_vs_ubo_surfaces
,
2091 &brw_vs_abo_surfaces
,
2092 &brw_tcs_pull_constants
,
2093 &brw_tcs_ubo_surfaces
,
2094 &brw_tcs_abo_surfaces
,
2095 &brw_tes_pull_constants
,
2096 &brw_tes_ubo_surfaces
,
2097 &brw_tes_abo_surfaces
,
2098 &brw_gs_pull_constants
,
2099 &brw_gs_ubo_surfaces
,
2100 &brw_gs_abo_surfaces
,
2101 &brw_wm_pull_constants
,
2102 &brw_wm_ubo_surfaces
,
2103 &brw_wm_abo_surfaces
,
2104 &gen6_renderbuffer_surfaces
,
2105 &brw_renderbuffer_read_surfaces
,
2106 &brw_texture_surfaces
,
2107 &brw_vs_binding_table
,
2108 &brw_tcs_binding_table
,
2109 &brw_tes_binding_table
,
2110 &brw_gs_binding_table
,
2111 &brw_wm_binding_table
,
2118 &gen8_multisample_state
,
2127 &genX(raster_state
),
2133 &genX(depth_stencil_state
),
2136 &gen6_scissor_state
,
2140 &brw_polygon_stipple
,
2141 &brw_polygon_stipple_offset
,
2158 STATIC_ASSERT(ARRAY_SIZE(render_atoms
) <= ARRAY_SIZE(brw
->render_atoms
));
2159 brw_copy_pipeline_atoms(brw
, BRW_RENDER_PIPELINE
,
2160 render_atoms
, ARRAY_SIZE(render_atoms
));
2163 static const struct brw_tracked_state
*compute_atoms
[] =
2166 &brw_cs_image_surfaces
,
2167 &gen7_cs_push_constants
,
2168 &brw_cs_pull_constants
,
2169 &brw_cs_ubo_surfaces
,
2170 &brw_cs_abo_surfaces
,
2171 &brw_cs_texture_surfaces
,
2172 &brw_cs_work_groups_surface
,
2177 STATIC_ASSERT(ARRAY_SIZE(compute_atoms
) <= ARRAY_SIZE(brw
->compute_atoms
));
2178 brw_copy_pipeline_atoms(brw
, BRW_COMPUTE_PIPELINE
,
2179 compute_atoms
, ARRAY_SIZE(compute_atoms
));