2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "common/gen_device_info.h"
27 #include "genxml/gen_macros.h"
29 #include "brw_context.h"
30 #include "brw_state.h"
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36 #include "intel_fbo.h"
38 #include "main/fbobject.h"
39 #include "main/framebuffer.h"
40 #include "main/stencil.h"
41 #include "main/transformfeedback.h"
44 emit_dwords(struct brw_context
*brw
, unsigned n
)
46 intel_batchbuffer_begin(brw
, n
, RENDER_RING
);
47 uint32_t *map
= brw
->batch
.map_next
;
48 brw
->batch
.map_next
+= n
;
49 intel_batchbuffer_advance(brw
);
55 uint32_t read_domains
;
56 uint32_t write_domain
;
61 emit_reloc(struct brw_context
*brw
,
62 void *location
, struct brw_address address
, uint32_t delta
)
64 uint32_t offset
= (char *) location
- (char *) brw
->batch
.map
;
66 return brw_emit_reloc(&brw
->batch
, offset
, address
.bo
,
67 address
.offset
+ delta
,
69 address
.write_domain
);
72 #define __gen_address_type struct brw_address
73 #define __gen_user_data struct brw_context
76 __gen_combine_address(struct brw_context
*brw
, void *location
,
77 struct brw_address address
, uint32_t delta
)
79 if (address
.bo
== NULL
) {
80 return address
.offset
+ delta
;
82 return emit_reloc(brw
, location
, address
, delta
);
86 static inline struct brw_address
87 render_bo(struct brw_bo
*bo
, uint32_t offset
)
89 return (struct brw_address
) {
92 .read_domains
= I915_GEM_DOMAIN_RENDER
,
93 .write_domain
= I915_GEM_DOMAIN_RENDER
,
97 static inline struct brw_address
98 instruction_bo(struct brw_bo
*bo
, uint32_t offset
)
100 return (struct brw_address
) {
103 .read_domains
= I915_GEM_DOMAIN_INSTRUCTION
,
104 .write_domain
= I915_GEM_DOMAIN_INSTRUCTION
,
108 #include "genxml/genX_pack.h"
110 #define _brw_cmd_length(cmd) cmd ## _length
111 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
112 #define _brw_cmd_header(cmd) cmd ## _header
113 #define _brw_cmd_pack(cmd) cmd ## _pack
115 #define brw_batch_emit(brw, cmd, name) \
116 for (struct cmd name = { _brw_cmd_header(cmd) }, \
117 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
118 __builtin_expect(_dst != NULL, 1); \
119 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
122 #define brw_batch_emitn(brw, cmd, n, ...) ({ \
123 uint32_t *_dw = emit_dwords(brw, n); \
124 struct cmd template = { \
125 _brw_cmd_header(cmd), \
126 .DWordLength = n - _brw_cmd_length_bias(cmd), \
129 _brw_cmd_pack(cmd)(brw, _dw, &template); \
130 _dw + 1; /* Array starts at dw[1] */ \
133 #define brw_state_emit(brw, cmd, align, offset, name) \
134 for (struct cmd name = { 0, }, \
135 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
137 __builtin_expect(_dst != NULL, 1); \
138 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
143 * Determine the appropriate attribute override value to store into the
144 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
145 * override value contains two pieces of information: the location of the
146 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
147 * flag indicating whether to "swizzle" the attribute based on the direction
148 * the triangle is facing.
150 * If an attribute is "swizzled", then the given VUE location is used for
151 * front-facing triangles, and the VUE location that immediately follows is
152 * used for back-facing triangles. We use this to implement the mapping from
153 * gl_FrontColor/gl_BackColor to gl_Color.
155 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
156 * being instructed to begin reading attribute data. It can be set to a
157 * nonzero value to prevent the SF unit from wasting time reading elements of
158 * the VUE that are not needed by the fragment shader. It is measured in
159 * 256-bit increments.
162 genX(get_attr_override
)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr
,
163 const struct brw_vue_map
*vue_map
,
164 int urb_entry_read_offset
, int fs_attr
,
165 bool two_side_color
, uint32_t *max_source_attr
)
167 /* Find the VUE slot for this attribute. */
168 int slot
= vue_map
->varying_to_slot
[fs_attr
];
170 /* Viewport and Layer are stored in the VUE header. We need to override
171 * them to zero if earlier stages didn't write them, as GL requires that
172 * they read back as zero when not explicitly set.
174 if (fs_attr
== VARYING_SLOT_VIEWPORT
|| fs_attr
== VARYING_SLOT_LAYER
) {
175 attr
->ComponentOverrideX
= true;
176 attr
->ComponentOverrideW
= true;
177 attr
->ConstantSource
= CONST_0000
;
179 if (!(vue_map
->slots_valid
& VARYING_BIT_LAYER
))
180 attr
->ComponentOverrideY
= true;
181 if (!(vue_map
->slots_valid
& VARYING_BIT_VIEWPORT
))
182 attr
->ComponentOverrideZ
= true;
187 /* If there was only a back color written but not front, use back
188 * as the color instead of undefined
190 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL0
)
191 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC0
];
192 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL1
)
193 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC1
];
196 /* This attribute does not exist in the VUE--that means that the vertex
197 * shader did not write to it. This means that either:
199 * (a) This attribute is a texture coordinate, and it is going to be
200 * replaced with point coordinates (as a consequence of a call to
201 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
202 * hardware will ignore whatever attribute override we supply.
204 * (b) This attribute is read by the fragment shader but not written by
205 * the vertex shader, so its value is undefined. Therefore the
206 * attribute override we supply doesn't matter.
208 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
209 * previous shader stage.
211 * Note that we don't have to worry about the cases where the attribute
212 * is gl_PointCoord or is undergoing point sprite coordinate
213 * replacement, because in those cases, this function isn't called.
215 * In case (c), we need to program the attribute overrides so that the
216 * primitive ID will be stored in this slot. In every other case, the
217 * attribute override we supply doesn't matter. So just go ahead and
218 * program primitive ID in every case.
220 attr
->ComponentOverrideW
= true;
221 attr
->ComponentOverrideX
= true;
222 attr
->ComponentOverrideY
= true;
223 attr
->ComponentOverrideZ
= true;
224 attr
->ConstantSource
= PRIM_ID
;
228 /* Compute the location of the attribute relative to urb_entry_read_offset.
229 * Each increment of urb_entry_read_offset represents a 256-bit value, so
230 * it counts for two 128-bit VUE slots.
232 int source_attr
= slot
- 2 * urb_entry_read_offset
;
233 assert(source_attr
>= 0 && source_attr
< 32);
235 /* If we are doing two-sided color, and the VUE slot following this one
236 * represents a back-facing color, then we need to instruct the SF unit to
237 * do back-facing swizzling.
239 bool swizzling
= two_side_color
&&
240 ((vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL0
&&
241 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC0
) ||
242 (vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL1
&&
243 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC1
));
245 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
246 if (*max_source_attr
< source_attr
+ swizzling
)
247 *max_source_attr
= source_attr
+ swizzling
;
249 attr
->SourceAttribute
= source_attr
;
251 attr
->SwizzleSelect
= INPUTATTR_FACING
;
256 genX(calculate_attr_overrides
)(const struct brw_context
*brw
,
257 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr_overrides
,
258 uint32_t *point_sprite_enables
,
259 uint32_t *urb_entry_read_length
,
260 uint32_t *urb_entry_read_offset
)
262 const struct gl_context
*ctx
= &brw
->ctx
;
265 const struct gl_point_attrib
*point
= &ctx
->Point
;
267 /* BRW_NEW_FS_PROG_DATA */
268 const struct brw_wm_prog_data
*wm_prog_data
=
269 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
270 uint32_t max_source_attr
= 0;
272 *point_sprite_enables
= 0;
274 /* BRW_NEW_FRAGMENT_PROGRAM
276 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
277 * the full vertex header. Otherwise, we can program the SF to start
278 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
279 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
280 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
283 bool fs_needs_vue_header
= brw
->fragment_program
->info
.inputs_read
&
284 (VARYING_BIT_LAYER
| VARYING_BIT_VIEWPORT
);
286 *urb_entry_read_offset
= fs_needs_vue_header
? 0 : 1;
288 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
289 * description of dw10 Point Sprite Texture Coordinate Enable:
291 * "This field must be programmed to zero when non-point primitives
294 * The SandyBridge PRM doesn't explicitly say that point sprite enables
295 * must be programmed to zero when rendering non-point primitives, but
296 * the IvyBridge PRM does, and if we don't, we get garbage.
298 * This is not required on Haswell, as the hardware ignores this state
299 * when drawing non-points -- although we do still need to be careful to
300 * correctly set the attr overrides.
303 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
305 bool drawing_points
= brw_is_drawing_points(brw
);
307 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
308 int input_index
= wm_prog_data
->urb_setup
[attr
];
314 bool point_sprite
= false;
315 if (drawing_points
) {
316 if (point
->PointSprite
&&
317 (attr
>= VARYING_SLOT_TEX0
&& attr
<= VARYING_SLOT_TEX7
) &&
318 (point
->CoordReplace
& (1u << (attr
- VARYING_SLOT_TEX0
)))) {
322 if (attr
== VARYING_SLOT_PNTC
)
326 *point_sprite_enables
|= (1 << input_index
);
329 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
330 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attribute
= { 0 };
333 genX(get_attr_override
)(&attribute
,
334 &brw
->vue_map_geom_out
,
335 *urb_entry_read_offset
, attr
,
336 brw
->ctx
.VertexProgram
._TwoSideEnabled
,
340 /* The hardware can only do the overrides on 16 overrides at a
341 * time, and the other up to 16 have to be lined up so that the
342 * input index = the output index. We'll need to do some
343 * tweaking to make sure that's the case.
345 if (input_index
< 16)
346 attr_overrides
[input_index
] = attribute
;
348 assert(attribute
.SourceAttribute
== input_index
);
351 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
352 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
354 * "This field should be set to the minimum length required to read the
355 * maximum source attribute. The maximum source attribute is indicated
356 * by the maximum value of the enabled Attribute # Source Attribute if
357 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
359 * read_length = ceiling((max_source_attr + 1) / 2)
361 * [errata] Corruption/Hang possible if length programmed larger than
364 * Similar text exists for Ivy Bridge.
366 *urb_entry_read_length
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
369 /* ---------------------------------------------------------------------- */
372 genX(upload_depth_stencil_state
)(struct brw_context
*brw
)
374 struct gl_context
*ctx
= &brw
->ctx
;
377 struct intel_renderbuffer
*depth_irb
=
378 intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
381 struct gl_depthbuffer_attrib
*depth
= &ctx
->Depth
;
384 struct gl_stencil_attrib
*stencil
= &ctx
->Stencil
;
385 const int b
= stencil
->_BackFace
;
388 brw_batch_emit(brw
, GENX(3DSTATE_WM_DEPTH_STENCIL
), wmds
) {
391 brw_state_emit(brw
, GENX(DEPTH_STENCIL_STATE
), 64, &ds_offset
, wmds
) {
393 if (depth
->Test
&& depth_irb
) {
394 wmds
.DepthTestEnable
= true;
395 wmds
.DepthBufferWriteEnable
= brw_depth_writes_enabled(brw
);
396 wmds
.DepthTestFunction
= intel_translate_compare_func(depth
->Func
);
399 if (stencil
->_Enabled
) {
400 wmds
.StencilTestEnable
= true;
401 wmds
.StencilWriteMask
= stencil
->WriteMask
[0] & 0xff;
402 wmds
.StencilTestMask
= stencil
->ValueMask
[0] & 0xff;
404 wmds
.StencilTestFunction
=
405 intel_translate_compare_func(stencil
->Function
[0]);
407 intel_translate_stencil_op(stencil
->FailFunc
[0]);
408 wmds
.StencilPassDepthPassOp
=
409 intel_translate_stencil_op(stencil
->ZPassFunc
[0]);
410 wmds
.StencilPassDepthFailOp
=
411 intel_translate_stencil_op(stencil
->ZFailFunc
[0]);
413 wmds
.StencilBufferWriteEnable
= stencil
->_WriteEnabled
;
415 if (stencil
->_TestTwoSide
) {
416 wmds
.DoubleSidedStencilEnable
= true;
417 wmds
.BackfaceStencilWriteMask
= stencil
->WriteMask
[b
] & 0xff;
418 wmds
.BackfaceStencilTestMask
= stencil
->ValueMask
[b
] & 0xff;
420 wmds
.BackfaceStencilTestFunction
=
421 intel_translate_compare_func(stencil
->Function
[b
]);
422 wmds
.BackfaceStencilFailOp
=
423 intel_translate_stencil_op(stencil
->FailFunc
[b
]);
424 wmds
.BackfaceStencilPassDepthPassOp
=
425 intel_translate_stencil_op(stencil
->ZPassFunc
[b
]);
426 wmds
.BackfaceStencilPassDepthFailOp
=
427 intel_translate_stencil_op(stencil
->ZFailFunc
[b
]);
431 wmds
.StencilReferenceValue
= _mesa_get_stencil_ref(ctx
, 0);
432 wmds
.BackfaceStencilReferenceValue
= _mesa_get_stencil_ref(ctx
, b
);
438 brw_batch_emit(brw
, GENX(3DSTATE_CC_STATE_POINTERS
), ptr
) {
439 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
440 ptr
.DEPTH_STENCIL_STATEChange
= true;
443 brw_batch_emit(brw
, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS
), ptr
) {
444 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
449 static const struct brw_tracked_state
genX(depth_stencil_state
) = {
451 .mesa
= _NEW_BUFFERS
|
454 .brw
= BRW_NEW_BLORP
|
455 (GEN_GEN
>= 8 ? BRW_NEW_CONTEXT
457 BRW_NEW_STATE_BASE_ADDRESS
),
459 .emit
= genX(upload_depth_stencil_state
),
462 /* ---------------------------------------------------------------------- */
465 genX(upload_clip_state
)(struct brw_context
*brw
)
467 struct gl_context
*ctx
= &brw
->ctx
;
470 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
472 /* BRW_NEW_FS_PROG_DATA */
473 struct brw_wm_prog_data
*wm_prog_data
=
474 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
476 brw_batch_emit(brw
, GENX(3DSTATE_CLIP
), clip
) {
477 clip
.StatisticsEnable
= !brw
->meta_in_progress
;
479 if (wm_prog_data
->barycentric_interp_modes
&
480 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS
)
481 clip
.NonPerspectiveBarycentricEnable
= true;
484 clip
.EarlyCullEnable
= true;
488 clip
.FrontWinding
= ctx
->Polygon
._FrontBit
== _mesa_is_user_fbo(fb
);
490 if (ctx
->Polygon
.CullFlag
) {
491 switch (ctx
->Polygon
.CullFaceMode
) {
493 clip
.CullMode
= CULLMODE_FRONT
;
496 clip
.CullMode
= CULLMODE_BACK
;
498 case GL_FRONT_AND_BACK
:
499 clip
.CullMode
= CULLMODE_BOTH
;
502 unreachable("Should not get here: invalid CullFlag");
505 clip
.CullMode
= CULLMODE_NONE
;
510 clip
.UserClipDistanceCullTestEnableBitmask
=
511 brw_vue_prog_data(brw
->vs
.base
.prog_data
)->cull_distance_mask
;
513 clip
.ViewportZClipTestEnable
= !ctx
->Transform
.DepthClamp
;
517 if (ctx
->Light
.ProvokingVertex
== GL_FIRST_VERTEX_CONVENTION
) {
518 clip
.TriangleStripListProvokingVertexSelect
= 0;
519 clip
.TriangleFanProvokingVertexSelect
= 1;
520 clip
.LineStripListProvokingVertexSelect
= 0;
522 clip
.TriangleStripListProvokingVertexSelect
= 2;
523 clip
.TriangleFanProvokingVertexSelect
= 2;
524 clip
.LineStripListProvokingVertexSelect
= 1;
528 clip
.UserClipDistanceClipTestEnableBitmask
=
529 ctx
->Transform
.ClipPlanesEnabled
;
532 clip
.ForceUserClipDistanceClipTestEnableBitmask
= true;
535 if (ctx
->Transform
.ClipDepthMode
== GL_ZERO_TO_ONE
)
536 clip
.APIMode
= APIMODE_D3D
;
538 clip
.APIMode
= APIMODE_OGL
;
540 clip
.GuardbandClipTestEnable
= true;
542 /* BRW_NEW_VIEWPORT_COUNT */
543 const unsigned viewport_count
= brw
->clip
.viewport_count
;
545 if (ctx
->RasterDiscard
) {
546 clip
.ClipMode
= CLIPMODE_REJECT_ALL
;
548 perf_debug("Rasterizer discard is currently implemented via the "
549 "clipper; having the GS not write primitives would "
550 "likely be faster.\n");
553 clip
.ClipMode
= CLIPMODE_NORMAL
;
556 clip
.ClipEnable
= brw
->primitive
!= _3DPRIM_RECTLIST
;
559 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
561 if (!brw_is_drawing_points(brw
) && !brw_is_drawing_lines(brw
))
562 clip
.ViewportXYClipTestEnable
= true;
564 clip
.MinimumPointWidth
= 0.125;
565 clip
.MaximumPointWidth
= 255.875;
566 clip
.MaximumVPIndex
= viewport_count
- 1;
567 if (_mesa_geometric_layers(fb
) == 0)
568 clip
.ForceZeroRTAIndexEnable
= true;
572 static const struct brw_tracked_state
genX(clip_state
) = {
574 .mesa
= _NEW_BUFFERS
|
578 .brw
= BRW_NEW_BLORP
|
580 BRW_NEW_FS_PROG_DATA
|
581 BRW_NEW_GS_PROG_DATA
|
582 BRW_NEW_VS_PROG_DATA
|
583 BRW_NEW_META_IN_PROGRESS
|
585 BRW_NEW_RASTERIZER_DISCARD
|
586 BRW_NEW_TES_PROG_DATA
|
587 BRW_NEW_VIEWPORT_COUNT
,
589 .emit
= genX(upload_clip_state
),
592 /* ---------------------------------------------------------------------- */
595 genX(upload_sf
)(struct brw_context
*brw
)
597 struct gl_context
*ctx
= &brw
->ctx
;
602 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
603 const bool multisampled_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
606 brw_batch_emit(brw
, GENX(3DSTATE_SF
), sf
) {
607 sf
.StatisticsEnable
= true;
608 sf
.ViewportTransformEnable
= brw
->sf
.viewport_transform_enable
;
612 sf
.DepthBufferSurfaceFormat
= brw_depthbuffer_format(brw
);
617 sf
.FrontWinding
= ctx
->Polygon
._FrontBit
== render_to_fbo
;
618 sf
.GlobalDepthOffsetEnableSolid
= ctx
->Polygon
.OffsetFill
;
619 sf
.GlobalDepthOffsetEnableWireframe
= ctx
->Polygon
.OffsetLine
;
620 sf
.GlobalDepthOffsetEnablePoint
= ctx
->Polygon
.OffsetPoint
;
622 switch (ctx
->Polygon
.FrontMode
) {
624 sf
.FrontFaceFillMode
= FILL_MODE_SOLID
;
627 sf
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
630 sf
.FrontFaceFillMode
= FILL_MODE_POINT
;
633 unreachable("not reached");
636 switch (ctx
->Polygon
.BackMode
) {
638 sf
.BackFaceFillMode
= FILL_MODE_SOLID
;
641 sf
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
644 sf
.BackFaceFillMode
= FILL_MODE_POINT
;
647 unreachable("not reached");
650 sf
.ScissorRectangleEnable
= true;
652 if (ctx
->Polygon
.CullFlag
) {
653 switch (ctx
->Polygon
.CullFaceMode
) {
655 sf
.CullMode
= CULLMODE_FRONT
;
658 sf
.CullMode
= CULLMODE_BACK
;
660 case GL_FRONT_AND_BACK
:
661 sf
.CullMode
= CULLMODE_BOTH
;
664 unreachable("not reached");
667 sf
.CullMode
= CULLMODE_NONE
;
671 sf
.LineStippleEnable
= ctx
->Line
.StippleFlag
;
674 if (multisampled_fbo
&& ctx
->Multisample
.Enabled
)
675 sf
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
677 sf
.GlobalDepthOffsetConstant
= ctx
->Polygon
.OffsetUnits
* 2;
678 sf
.GlobalDepthOffsetScale
= ctx
->Polygon
.OffsetFactor
;
679 sf
.GlobalDepthOffsetClamp
= ctx
->Polygon
.OffsetClamp
;
683 sf
.LineWidth
= brw_get_line_width_float(brw
);
685 if (ctx
->Line
.SmoothFlag
) {
686 sf
.LineEndCapAntialiasingRegionWidth
= _10pixels
;
688 sf
.AntiAliasingEnable
= true;
692 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
693 point_size
= CLAMP(ctx
->Point
.Size
, ctx
->Point
.MinSize
, ctx
->Point
.MaxSize
);
694 /* Clamp to the hardware limits */
695 sf
.PointWidth
= CLAMP(point_size
, 0.125f
, 255.875f
);
697 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
698 if (use_state_point_size(brw
))
699 sf
.PointWidthSource
= State
;
702 /* _NEW_POINT | _NEW_MULTISAMPLE */
703 if ((ctx
->Point
.SmoothFlag
|| _mesa_is_multisample_enabled(ctx
)) &&
704 !ctx
->Point
.PointSprite
)
705 sf
.SmoothPointEnable
= true;
708 sf
.AALineDistanceMode
= AALINEDISTANCE_TRUE
;
711 if (ctx
->Light
.ProvokingVertex
!= GL_FIRST_VERTEX_CONVENTION
) {
712 sf
.TriangleStripListProvokingVertexSelect
= 2;
713 sf
.TriangleFanProvokingVertexSelect
= 2;
714 sf
.LineStripListProvokingVertexSelect
= 1;
716 sf
.TriangleFanProvokingVertexSelect
= 1;
720 /* BRW_NEW_FS_PROG_DATA */
721 const struct brw_wm_prog_data
*wm_prog_data
=
722 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
724 sf
.AttributeSwizzleEnable
= true;
725 sf
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
728 * Window coordinates in an FBO are inverted, which means point
729 * sprite origin must be inverted, too.
731 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
) {
732 sf
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
734 sf
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
737 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
738 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
740 uint32_t urb_entry_read_length
;
741 uint32_t urb_entry_read_offset
;
742 uint32_t point_sprite_enables
;
743 genX(calculate_attr_overrides
)(brw
, sf
.Attribute
, &point_sprite_enables
,
744 &urb_entry_read_length
,
745 &urb_entry_read_offset
);
746 sf
.VertexURBEntryReadLength
= urb_entry_read_length
;
747 sf
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
748 sf
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
749 sf
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
754 static const struct brw_tracked_state
genX(sf_state
) = {
761 (GEN_GEN
<= 7 ? _NEW_BUFFERS
| _NEW_POLYGON
: 0),
762 .brw
= BRW_NEW_BLORP
|
764 BRW_NEW_VUE_MAP_GEOM_OUT
|
765 (GEN_GEN
<= 7 ? BRW_NEW_GS_PROG_DATA
|
767 BRW_NEW_TES_PROG_DATA
769 (GEN_GEN
== 6 ? BRW_NEW_FS_PROG_DATA
|
770 BRW_NEW_FRAGMENT_PROGRAM
773 .emit
= genX(upload_sf
),
776 /* ---------------------------------------------------------------------- */
779 genX(upload_wm
)(struct brw_context
*brw
)
781 struct gl_context
*ctx
= &brw
->ctx
;
783 /* BRW_NEW_FS_PROG_DATA */
784 const struct brw_wm_prog_data
*wm_prog_data
=
785 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
787 UNUSED
bool writes_depth
=
788 wm_prog_data
->computed_depth_mode
!= BRW_PSCDEPTH_OFF
;
791 const struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
792 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
794 /* We can't fold this into gen6_upload_wm_push_constants(), because
795 * according to the SNB PRM, vol 2 part 1 section 7.2.2
796 * (3DSTATE_CONSTANT_PS [DevSNB]):
798 * "[DevSNB]: This packet must be followed by WM_STATE."
800 brw_batch_emit(brw
, GENX(3DSTATE_CONSTANT_PS
), wmcp
) {
801 if (wm_prog_data
->base
.nr_params
!= 0) {
802 wmcp
.Buffer0Valid
= true;
803 /* Pointer to the WM constant buffer. Covered by the set of
804 * state flags from gen6_upload_wm_push_constants.
806 wmcp
.PointertoPSConstantBuffer0
= stage_state
->push_const_offset
;
807 wmcp
.PSConstantBuffer0ReadLength
= stage_state
->push_const_size
- 1;
812 brw_batch_emit(brw
, GENX(3DSTATE_WM
), wm
) {
813 wm
.StatisticsEnable
= true;
814 wm
.LineAntialiasingRegionWidth
= _10pixels
;
815 wm
.LineEndCapAntialiasingRegionWidth
= _05pixels
;
818 if (wm_prog_data
->base
.use_alt_mode
)
819 wm
.FloatingPointMode
= Alternate
;
821 wm
.SamplerCount
= DIV_ROUND_UP(stage_state
->sampler_count
, 4);
822 wm
.BindingTableEntryCount
= wm_prog_data
->base
.binding_table
.size_bytes
/ 4;
823 wm
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
824 wm
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
825 wm
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
826 wm
.DispatchGRFStartRegisterForConstantSetupData0
=
827 wm_prog_data
->base
.dispatch_grf_start_reg
;
828 wm
.DispatchGRFStartRegisterForConstantSetupData2
=
829 wm_prog_data
->dispatch_grf_start_reg_2
;
830 wm
.KernelStartPointer0
= stage_state
->prog_offset
;
831 wm
.KernelStartPointer2
= stage_state
->prog_offset
+
832 wm_prog_data
->prog_offset_2
;
833 wm
.DualSourceBlendEnable
=
834 wm_prog_data
->dual_src_blend
&& (ctx
->Color
.BlendEnabled
& 1) &&
835 ctx
->Color
.Blend
[0]._UsesDualSrc
;
836 wm
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
837 wm
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
839 /* From the SNB PRM, volume 2 part 1, page 281:
840 * "If the PS kernel does not need the Position XY Offsets
841 * to compute a Position XY value, then this field should be
842 * programmed to POSOFFSET_NONE."
844 * "SW Recommendation: If the PS kernel needs the Position Offsets
845 * to compute a Position XY value, this field should match Position
846 * ZW Interpolation Mode to ensure a consistent position.xyzw
848 * We only require XY sample offsets. So, this recommendation doesn't
849 * look useful at the moment. We might need this in future.
851 if (wm_prog_data
->uses_pos_offset
)
852 wm
.PositionXYOffsetSelect
= POSOFFSET_SAMPLE
;
854 wm
.PositionXYOffsetSelect
= POSOFFSET_NONE
;
856 if (wm_prog_data
->base
.total_scratch
) {
857 wm
.ScratchSpaceBasePointer
=
858 render_bo(stage_state
->scratch_bo
,
859 ffs(stage_state
->per_thread_scratch
) - 11);
862 wm
.PixelShaderComputedDepth
= writes_depth
;
865 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
868 wm
.LineStippleEnable
= ctx
->Line
.StippleFlag
;
871 wm
.PolygonStippleEnable
= ctx
->Polygon
.StippleFlag
;
872 wm
.BarycentricInterpolationMode
= wm_prog_data
->barycentric_interp_modes
;
876 const bool multisampled_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
878 wm
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
879 wm
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
880 if (wm_prog_data
->uses_kill
||
881 _mesa_is_alpha_test_enabled(ctx
) ||
882 _mesa_is_alpha_to_coverage_enabled(ctx
) ||
883 wm_prog_data
->uses_omask
) {
884 wm
.PixelShaderKillsPixel
= true;
887 /* _NEW_BUFFERS | _NEW_COLOR */
888 if (brw_color_buffer_write_enabled(brw
) || writes_depth
||
889 wm_prog_data
->has_side_effects
|| wm
.PixelShaderKillsPixel
) {
890 wm
.ThreadDispatchEnable
= true;
892 if (multisampled_fbo
) {
893 /* _NEW_MULTISAMPLE */
894 if (ctx
->Multisample
.Enabled
)
895 wm
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
897 wm
.MultisampleRasterizationMode
= MSRASTMODE_OFF_PIXEL
;
899 if (wm_prog_data
->persample_dispatch
)
900 wm
.MultisampleDispatchMode
= MSDISPMODE_PERSAMPLE
;
902 wm
.MultisampleDispatchMode
= MSDISPMODE_PERPIXEL
;
904 wm
.MultisampleRasterizationMode
= MSRASTMODE_OFF_PIXEL
;
905 wm
.MultisampleDispatchMode
= MSDISPMODE_PERSAMPLE
;
909 wm
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
910 wm
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
913 /* The "UAV access enable" bits are unnecessary on HSW because they only
914 * seem to have an effect on the HW-assisted coherency mechanism which we
915 * don't need, and the rasterization-related UAV_ONLY flag and the
916 * DISPATCH_ENABLE bit can be set independently from it.
917 * C.f. gen8_upload_ps_extra().
919 * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
923 if (!(brw_color_buffer_write_enabled(brw
) || writes_depth
) &&
924 wm_prog_data
->has_side_effects
)
930 /* BRW_NEW_FS_PROG_DATA */
931 if (wm_prog_data
->early_fragment_tests
)
932 wm
.EarlyDepthStencilControl
= EDSC_PREPS
;
933 else if (wm_prog_data
->has_side_effects
)
934 wm
.EarlyDepthStencilControl
= EDSC_PSEXEC
;
939 static const struct brw_tracked_state
genX(wm_state
) = {
943 (GEN_GEN
< 8 ? _NEW_BUFFERS
|
947 (GEN_GEN
< 7 ? _NEW_PROGRAM_CONSTANTS
: 0),
948 .brw
= BRW_NEW_BLORP
|
949 BRW_NEW_FS_PROG_DATA
|
950 (GEN_GEN
< 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION
|
954 .emit
= genX(upload_wm
),
959 /* ---------------------------------------------------------------------- */
963 genX(upload_sbe
)(struct brw_context
*brw
)
965 struct gl_context
*ctx
= &brw
->ctx
;
966 /* BRW_NEW_FS_PROG_DATA */
967 const struct brw_wm_prog_data
*wm_prog_data
=
968 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
970 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attr_overrides
[16] = { { 0 } };
972 #define attr_overrides sbe.Attribute
974 uint32_t urb_entry_read_length
;
975 uint32_t urb_entry_read_offset
;
976 uint32_t point_sprite_enables
;
978 brw_batch_emit(brw
, GENX(3DSTATE_SBE
), sbe
) {
979 sbe
.AttributeSwizzleEnable
= true;
980 sbe
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
983 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
987 * Window coordinates in an FBO are inverted, which means point
988 * sprite origin must be inverted.
990 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
)
991 sbe
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
993 sbe
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
995 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
996 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
997 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
998 * BRW_NEW_VUE_MAP_GEOM_OUT
1000 genX(calculate_attr_overrides
)(brw
,
1002 &point_sprite_enables
,
1003 &urb_entry_read_length
,
1004 &urb_entry_read_offset
);
1006 /* Typically, the URB entry read length and offset should be programmed
1007 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
1008 * stage which produces geometry. However, we don't know the proper
1009 * value until we call calculate_attr_overrides().
1011 * To fit with our existing code, we override the inherited values and
1012 * specify it here directly, as we did on previous generations.
1014 sbe
.VertexURBEntryReadLength
= urb_entry_read_length
;
1015 sbe
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
1016 sbe
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
1017 sbe
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
1020 sbe
.ForceVertexURBEntryReadLength
= true;
1021 sbe
.ForceVertexURBEntryReadOffset
= true;
1025 /* prepare the active component dwords */
1026 int input_index
= 0;
1027 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
1028 if (!(brw
->fragment_program
->info
.inputs_read
&
1029 BITFIELD64_BIT(attr
))) {
1033 assert(input_index
< 32);
1035 sbe
.AttributeActiveComponentFormat
[input_index
] = ACTIVE_COMPONENT_XYZW
;
1042 brw_batch_emit(brw
, GENX(3DSTATE_SBE_SWIZ
), sbes
) {
1043 for (int i
= 0; i
< 16; i
++)
1044 sbes
.Attribute
[i
] = attr_overrides
[i
];
1048 #undef attr_overrides
1051 static const struct brw_tracked_state
genX(sbe_state
) = {
1053 .mesa
= _NEW_BUFFERS
|
1058 .brw
= BRW_NEW_BLORP
|
1060 BRW_NEW_FRAGMENT_PROGRAM
|
1061 BRW_NEW_FS_PROG_DATA
|
1062 BRW_NEW_GS_PROG_DATA
|
1063 BRW_NEW_TES_PROG_DATA
|
1064 BRW_NEW_VUE_MAP_GEOM_OUT
|
1065 (GEN_GEN
== 7 ? BRW_NEW_PRIMITIVE
1068 .emit
= genX(upload_sbe
),
1071 /* ---------------------------------------------------------------------- */
1074 * Outputs the 3DSTATE_SO_DECL_LIST command.
1076 * The data output is a series of 64-bit entries containing a SO_DECL per
1077 * stream. We only have one stream of rendering coming out of the GS unit, so
1078 * we only emit stream 0 (low 16 bits) SO_DECLs.
1081 genX(upload_3dstate_so_decl_list
)(struct brw_context
*brw
,
1082 const struct brw_vue_map
*vue_map
)
1084 struct gl_context
*ctx
= &brw
->ctx
;
1085 /* BRW_NEW_TRANSFORM_FEEDBACK */
1086 struct gl_transform_feedback_object
*xfb_obj
=
1087 ctx
->TransformFeedback
.CurrentObject
;
1088 const struct gl_transform_feedback_info
*linked_xfb_info
=
1089 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1090 struct GENX(SO_DECL
) so_decl
[MAX_VERTEX_STREAMS
][128];
1091 int buffer_mask
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1092 int next_offset
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1093 int decls
[MAX_VERTEX_STREAMS
] = {0, 0, 0, 0};
1095 STATIC_ASSERT(ARRAY_SIZE(so_decl
[0]) >= MAX_PROGRAM_OUTPUTS
);
1097 memset(so_decl
, 0, sizeof(so_decl
));
1099 /* Construct the list of SO_DECLs to be emitted. The formatting of the
1100 * command feels strange -- each dword pair contains a SO_DECL per stream.
1102 for (unsigned i
= 0; i
< linked_xfb_info
->NumOutputs
; i
++) {
1103 int buffer
= linked_xfb_info
->Outputs
[i
].OutputBuffer
;
1104 struct GENX(SO_DECL
) decl
= {0};
1105 int varying
= linked_xfb_info
->Outputs
[i
].OutputRegister
;
1106 const unsigned components
= linked_xfb_info
->Outputs
[i
].NumComponents
;
1107 unsigned component_mask
= (1 << components
) - 1;
1108 unsigned stream_id
= linked_xfb_info
->Outputs
[i
].StreamId
;
1109 unsigned decl_buffer_slot
= buffer
;
1110 assert(stream_id
< MAX_VERTEX_STREAMS
);
1112 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
1113 * gl_Layer is stored in VARYING_SLOT_PSIZ.y
1114 * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
1116 if (varying
== VARYING_SLOT_PSIZ
) {
1117 assert(components
== 1);
1118 component_mask
<<= 3;
1119 } else if (varying
== VARYING_SLOT_LAYER
) {
1120 assert(components
== 1);
1121 component_mask
<<= 1;
1122 } else if (varying
== VARYING_SLOT_VIEWPORT
) {
1123 assert(components
== 1);
1124 component_mask
<<= 2;
1126 component_mask
<<= linked_xfb_info
->Outputs
[i
].ComponentOffset
;
1129 buffer_mask
[stream_id
] |= 1 << buffer
;
1131 decl
.OutputBufferSlot
= decl_buffer_slot
;
1132 if (varying
== VARYING_SLOT_LAYER
|| varying
== VARYING_SLOT_VIEWPORT
) {
1133 decl
.RegisterIndex
= vue_map
->varying_to_slot
[VARYING_SLOT_PSIZ
];
1135 assert(vue_map
->varying_to_slot
[varying
] >= 0);
1136 decl
.RegisterIndex
= vue_map
->varying_to_slot
[varying
];
1138 decl
.ComponentMask
= component_mask
;
1140 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
1141 * array. Instead, it simply increments DstOffset for the following
1142 * input by the number of components that should be skipped.
1144 * Our hardware is unusual in that it requires us to program SO_DECLs
1145 * for fake "hole" components, rather than simply taking the offset
1146 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1147 * program as many size = 4 holes as we can, then a final hole to
1148 * accommodate the final 1, 2, or 3 remaining.
1150 int skip_components
=
1151 linked_xfb_info
->Outputs
[i
].DstOffset
- next_offset
[buffer
];
1153 next_offset
[buffer
] += skip_components
;
1155 while (skip_components
>= 4) {
1156 struct GENX(SO_DECL
) *d
= &so_decl
[stream_id
][decls
[stream_id
]++];
1158 d
->OutputBufferSlot
= decl_buffer_slot
;
1159 d
->ComponentMask
= 0xf;
1160 skip_components
-= 4;
1163 if (skip_components
> 0) {
1164 struct GENX(SO_DECL
) *d
= &so_decl
[stream_id
][decls
[stream_id
]++];
1166 d
->OutputBufferSlot
= decl_buffer_slot
;
1167 d
->ComponentMask
= (1 << skip_components
) - 1;
1170 assert(linked_xfb_info
->Outputs
[i
].DstOffset
== next_offset
[buffer
]);
1172 next_offset
[buffer
] += components
;
1174 so_decl
[stream_id
][decls
[stream_id
]++] = decl
;
1176 if (decls
[stream_id
] > max_decls
)
1177 max_decls
= decls
[stream_id
];
1181 dw
= brw_batch_emitn(brw
, GENX(3DSTATE_SO_DECL_LIST
), 3 + 2 * max_decls
,
1182 .StreamtoBufferSelects0
= buffer_mask
[0],
1183 .StreamtoBufferSelects1
= buffer_mask
[1],
1184 .StreamtoBufferSelects2
= buffer_mask
[2],
1185 .StreamtoBufferSelects3
= buffer_mask
[3],
1186 .NumEntries0
= decls
[0],
1187 .NumEntries1
= decls
[1],
1188 .NumEntries2
= decls
[2],
1189 .NumEntries3
= decls
[3]);
1191 for (int i
= 0; i
< max_decls
; i
++) {
1192 GENX(SO_DECL_ENTRY_pack
)(
1193 brw
, dw
+ 2 + i
* 2,
1194 &(struct GENX(SO_DECL_ENTRY
)) {
1195 .Stream0Decl
= so_decl
[0][i
],
1196 .Stream1Decl
= so_decl
[1][i
],
1197 .Stream2Decl
= so_decl
[2][i
],
1198 .Stream3Decl
= so_decl
[3][i
],
1204 genX(upload_3dstate_so_buffers
)(struct brw_context
*brw
)
1206 struct gl_context
*ctx
= &brw
->ctx
;
1207 /* BRW_NEW_TRANSFORM_FEEDBACK */
1208 struct gl_transform_feedback_object
*xfb_obj
=
1209 ctx
->TransformFeedback
.CurrentObject
;
1211 const struct gl_transform_feedback_info
*linked_xfb_info
=
1212 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1214 struct brw_transform_feedback_object
*brw_obj
=
1215 (struct brw_transform_feedback_object
*) xfb_obj
;
1216 uint32_t mocs_wb
= brw
->gen
>= 9 ? SKL_MOCS_WB
: BDW_MOCS_WB
;
1219 /* Set up the up to 4 output buffers. These are the ranges defined in the
1220 * gl_transform_feedback_object.
1222 for (int i
= 0; i
< 4; i
++) {
1223 struct intel_buffer_object
*bufferobj
=
1224 intel_buffer_object(xfb_obj
->Buffers
[i
]);
1227 brw_batch_emit(brw
, GENX(3DSTATE_SO_BUFFER
), sob
) {
1228 sob
.SOBufferIndex
= i
;
1233 uint32_t start
= xfb_obj
->Offset
[i
];
1234 assert(start
% 4 == 0);
1235 uint32_t end
= ALIGN(start
+ xfb_obj
->Size
[i
], 4);
1237 intel_bufferobj_buffer(brw
, bufferobj
, start
, end
- start
);
1238 assert(end
<= bo
->size
);
1240 brw_batch_emit(brw
, GENX(3DSTATE_SO_BUFFER
), sob
) {
1241 sob
.SOBufferIndex
= i
;
1243 sob
.SurfaceBaseAddress
= render_bo(bo
, start
);
1245 sob
.SurfacePitch
= linked_xfb_info
->Buffers
[i
].Stride
* 4;
1246 sob
.SurfaceEndAddress
= render_bo(bo
, end
);
1248 sob
.SOBufferEnable
= true;
1249 sob
.StreamOffsetWriteEnable
= true;
1250 sob
.StreamOutputBufferOffsetAddressEnable
= true;
1251 sob
.SOBufferMOCS
= mocs_wb
;
1253 sob
.SurfaceSize
= MAX2(xfb_obj
->Size
[i
] / 4, 1) - 1;
1254 sob
.StreamOutputBufferOffsetAddress
=
1255 instruction_bo(brw_obj
->offset_bo
, i
* sizeof(uint32_t));
1257 if (brw_obj
->zero_offsets
) {
1258 /* Zero out the offset and write that to offset_bo */
1259 sob
.StreamOffset
= 0;
1261 /* Use offset_bo as the "Stream Offset." */
1262 sob
.StreamOffset
= 0xFFFFFFFF;
1269 brw_obj
->zero_offsets
= false;
1274 query_active(struct gl_query_object
*q
)
1276 return q
&& q
->Active
;
1280 genX(upload_3dstate_streamout
)(struct brw_context
*brw
, bool active
,
1281 const struct brw_vue_map
*vue_map
)
1283 struct gl_context
*ctx
= &brw
->ctx
;
1284 /* BRW_NEW_TRANSFORM_FEEDBACK */
1285 struct gl_transform_feedback_object
*xfb_obj
=
1286 ctx
->TransformFeedback
.CurrentObject
;
1288 brw_batch_emit(brw
, GENX(3DSTATE_STREAMOUT
), sos
) {
1290 int urb_entry_read_offset
= 0;
1291 int urb_entry_read_length
= (vue_map
->num_slots
+ 1) / 2 -
1292 urb_entry_read_offset
;
1294 sos
.SOFunctionEnable
= true;
1295 sos
.SOStatisticsEnable
= true;
1297 /* BRW_NEW_RASTERIZER_DISCARD */
1298 if (ctx
->RasterDiscard
) {
1299 if (!query_active(ctx
->Query
.PrimitivesGenerated
[0])) {
1300 sos
.RenderingDisable
= true;
1302 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
1303 "query active relies on the clipper.");
1308 if (ctx
->Light
.ProvokingVertex
!= GL_FIRST_VERTEX_CONVENTION
)
1309 sos
.ReorderMode
= TRAILING
;
1312 sos
.SOBufferEnable0
= xfb_obj
->Buffers
[0] != NULL
;
1313 sos
.SOBufferEnable1
= xfb_obj
->Buffers
[1] != NULL
;
1314 sos
.SOBufferEnable2
= xfb_obj
->Buffers
[2] != NULL
;
1315 sos
.SOBufferEnable3
= xfb_obj
->Buffers
[3] != NULL
;
1317 const struct gl_transform_feedback_info
*linked_xfb_info
=
1318 xfb_obj
->program
->sh
.LinkedTransformFeedback
;
1319 /* Set buffer pitches; 0 means unbound. */
1320 if (xfb_obj
->Buffers
[0])
1321 sos
.Buffer0SurfacePitch
= linked_xfb_info
->Buffers
[0].Stride
* 4;
1322 if (xfb_obj
->Buffers
[1])
1323 sos
.Buffer1SurfacePitch
= linked_xfb_info
->Buffers
[1].Stride
* 4;
1324 if (xfb_obj
->Buffers
[2])
1325 sos
.Buffer2SurfacePitch
= linked_xfb_info
->Buffers
[2].Stride
* 4;
1326 if (xfb_obj
->Buffers
[3])
1327 sos
.Buffer3SurfacePitch
= linked_xfb_info
->Buffers
[3].Stride
* 4;
1330 /* We always read the whole vertex. This could be reduced at some
1331 * point by reading less and offsetting the register index in the
1334 sos
.Stream0VertexReadOffset
= urb_entry_read_offset
;
1335 sos
.Stream0VertexReadLength
= urb_entry_read_length
- 1;
1336 sos
.Stream1VertexReadOffset
= urb_entry_read_offset
;
1337 sos
.Stream1VertexReadLength
= urb_entry_read_length
- 1;
1338 sos
.Stream2VertexReadOffset
= urb_entry_read_offset
;
1339 sos
.Stream2VertexReadLength
= urb_entry_read_length
- 1;
1340 sos
.Stream3VertexReadOffset
= urb_entry_read_offset
;
1341 sos
.Stream3VertexReadLength
= urb_entry_read_length
- 1;
1347 genX(upload_sol
)(struct brw_context
*brw
)
1349 struct gl_context
*ctx
= &brw
->ctx
;
1350 /* BRW_NEW_TRANSFORM_FEEDBACK */
1351 bool active
= _mesa_is_xfb_active_and_unpaused(ctx
);
1354 genX(upload_3dstate_so_buffers
)(brw
);
1356 /* BRW_NEW_VUE_MAP_GEOM_OUT */
1357 genX(upload_3dstate_so_decl_list
)(brw
, &brw
->vue_map_geom_out
);
1360 /* Finally, set up the SOL stage. This command must always follow updates to
1361 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
1362 * MMIO register updates (current performed by the kernel at each batch
1365 genX(upload_3dstate_streamout
)(brw
, active
, &brw
->vue_map_geom_out
);
1368 static const struct brw_tracked_state
genX(sol_state
) = {
1371 .brw
= BRW_NEW_BATCH
|
1373 BRW_NEW_RASTERIZER_DISCARD
|
1374 BRW_NEW_VUE_MAP_GEOM_OUT
|
1375 BRW_NEW_TRANSFORM_FEEDBACK
,
1377 .emit
= genX(upload_sol
),
1380 /* ---------------------------------------------------------------------- */
1383 genX(upload_ps
)(struct brw_context
*brw
)
1385 UNUSED
const struct gl_context
*ctx
= &brw
->ctx
;
1386 UNUSED
const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1388 /* BRW_NEW_FS_PROG_DATA */
1389 const struct brw_wm_prog_data
*prog_data
=
1390 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1391 const struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
1396 brw_batch_emit(brw
, GENX(3DSTATE_PS
), ps
) {
1397 /* Initialize the execution mask with VMask. Otherwise, derivatives are
1398 * incorrect for subspans where some of the pixels are unlit. We believe
1399 * the bit just didn't take effect in previous generations.
1401 ps
.VectorMaskEnable
= GEN_GEN
>= 8;
1404 DIV_ROUND_UP(CLAMP(stage_state
->sampler_count
, 0, 16), 4);
1406 /* BRW_NEW_FS_PROG_DATA */
1407 ps
.BindingTableEntryCount
= prog_data
->base
.binding_table
.size_bytes
/ 4;
1409 if (prog_data
->base
.use_alt_mode
)
1410 ps
.FloatingPointMode
= Alternate
;
1412 /* Haswell requires the sample mask to be set in this packet as well as
1413 * in 3DSTATE_SAMPLE_MASK; the values should match.
1416 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
1418 ps
.SampleMask
= gen6_determine_sample_mask(brw
);
1421 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
1422 * it implicitly scales for different GT levels (which have some # of
1425 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
1428 ps
.MaximumNumberofThreadsPerPSD
= 64 - 1;
1430 ps
.MaximumNumberofThreadsPerPSD
= 64 - 2;
1432 ps
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
1435 if (prog_data
->base
.nr_params
> 0)
1436 ps
.PushConstantEnable
= true;
1439 /* From the IVB PRM, volume 2 part 1, page 287:
1440 * "This bit is inserted in the PS payload header and made available to
1441 * the DataPort (either via the message header or via header bypass) to
1442 * indicate that oMask data (one or two phases) is included in Render
1443 * Target Write messages. If present, the oMask data is used to mask off
1446 ps
.oMaskPresenttoRenderTarget
= prog_data
->uses_omask
;
1448 /* The hardware wedges if you have this bit set but don't turn on any
1449 * dual source blend factors.
1451 * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
1453 ps
.DualSourceBlendEnable
= prog_data
->dual_src_blend
&&
1454 (ctx
->Color
.BlendEnabled
& 1) &&
1455 ctx
->Color
.Blend
[0]._UsesDualSrc
;
1457 /* BRW_NEW_FS_PROG_DATA */
1458 ps
.AttributeEnable
= (prog_data
->num_varying_inputs
!= 0);
1461 /* From the documentation for this packet:
1462 * "If the PS kernel does not need the Position XY Offsets to
1463 * compute a Position Value, then this field should be programmed
1464 * to POSOFFSET_NONE."
1466 * "SW Recommendation: If the PS kernel needs the Position Offsets
1467 * to compute a Position XY value, this field should match Position
1468 * ZW Interpolation Mode to ensure a consistent position.xyzw
1471 * We only require XY sample offsets. So, this recommendation doesn't
1472 * look useful at the moment. We might need this in future.
1474 if (prog_data
->uses_pos_offset
)
1475 ps
.PositionXYOffsetSelect
= POSOFFSET_SAMPLE
;
1477 ps
.PositionXYOffsetSelect
= POSOFFSET_NONE
;
1479 ps
.RenderTargetFastClearEnable
= brw
->wm
.fast_clear_op
;
1480 ps
._8PixelDispatchEnable
= prog_data
->dispatch_8
;
1481 ps
._16PixelDispatchEnable
= prog_data
->dispatch_16
;
1482 ps
.DispatchGRFStartRegisterForConstantSetupData0
=
1483 prog_data
->base
.dispatch_grf_start_reg
;
1484 ps
.DispatchGRFStartRegisterForConstantSetupData2
=
1485 prog_data
->dispatch_grf_start_reg_2
;
1487 ps
.KernelStartPointer0
= stage_state
->prog_offset
;
1488 ps
.KernelStartPointer2
= stage_state
->prog_offset
+
1489 prog_data
->prog_offset_2
;
1491 if (prog_data
->base
.total_scratch
) {
1492 ps
.ScratchSpaceBasePointer
=
1493 render_bo(stage_state
->scratch_bo
,
1494 ffs(stage_state
->per_thread_scratch
) - 11);
1499 static const struct brw_tracked_state
genX(ps_state
) = {
1501 .mesa
= _NEW_MULTISAMPLE
|
1502 (GEN_GEN
< 8 ? _NEW_BUFFERS
|
1505 .brw
= BRW_NEW_BATCH
|
1507 BRW_NEW_FS_PROG_DATA
,
1509 .emit
= genX(upload_ps
),
1514 /* ---------------------------------------------------------------------- */
1518 genX(upload_raster
)(struct brw_context
*brw
)
1520 struct gl_context
*ctx
= &brw
->ctx
;
1523 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
1526 struct gl_polygon_attrib
*polygon
= &ctx
->Polygon
;
1529 struct gl_point_attrib
*point
= &ctx
->Point
;
1531 brw_batch_emit(brw
, GENX(3DSTATE_RASTER
), raster
) {
1532 if (polygon
->_FrontBit
== render_to_fbo
)
1533 raster
.FrontWinding
= CounterClockwise
;
1535 if (polygon
->CullFlag
) {
1536 switch (polygon
->CullFaceMode
) {
1538 raster
.CullMode
= CULLMODE_FRONT
;
1541 raster
.CullMode
= CULLMODE_BACK
;
1543 case GL_FRONT_AND_BACK
:
1544 raster
.CullMode
= CULLMODE_BOTH
;
1547 unreachable("not reached");
1550 raster
.CullMode
= CULLMODE_NONE
;
1553 point
->SmoothFlag
= raster
.SmoothPointEnable
;
1555 raster
.DXMultisampleRasterizationEnable
=
1556 _mesa_is_multisample_enabled(ctx
);
1558 raster
.GlobalDepthOffsetEnableSolid
= polygon
->OffsetFill
;
1559 raster
.GlobalDepthOffsetEnableWireframe
= polygon
->OffsetLine
;
1560 raster
.GlobalDepthOffsetEnablePoint
= polygon
->OffsetPoint
;
1562 switch (polygon
->FrontMode
) {
1564 raster
.FrontFaceFillMode
= FILL_MODE_SOLID
;
1567 raster
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
1570 raster
.FrontFaceFillMode
= FILL_MODE_POINT
;
1573 unreachable("not reached");
1576 switch (polygon
->BackMode
) {
1578 raster
.BackFaceFillMode
= FILL_MODE_SOLID
;
1581 raster
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
1584 raster
.BackFaceFillMode
= FILL_MODE_POINT
;
1587 unreachable("not reached");
1591 raster
.AntialiasingEnable
= ctx
->Line
.SmoothFlag
;
1594 raster
.ScissorRectangleEnable
= ctx
->Scissor
.EnableFlags
;
1596 /* _NEW_TRANSFORM */
1597 if (!ctx
->Transform
.DepthClamp
) {
1599 raster
.ViewportZFarClipTestEnable
= true;
1600 raster
.ViewportZNearClipTestEnable
= true;
1602 raster
.ViewportZClipTestEnable
= true;
1606 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
1608 raster
.ConservativeRasterizationEnable
=
1609 ctx
->IntelConservativeRasterization
;
1612 raster
.GlobalDepthOffsetClamp
= polygon
->OffsetClamp
;
1613 raster
.GlobalDepthOffsetScale
= polygon
->OffsetFactor
;
1615 raster
.GlobalDepthOffsetConstant
= polygon
->OffsetUnits
* 2;
1619 static const struct brw_tracked_state
genX(raster_state
) = {
1621 .mesa
= _NEW_BUFFERS
|
1628 .brw
= BRW_NEW_BLORP
|
1630 BRW_NEW_CONSERVATIVE_RASTERIZATION
,
1632 .emit
= genX(upload_raster
),
1635 /* ---------------------------------------------------------------------- */
1638 genX(upload_ps_extra
)(struct brw_context
*brw
)
1640 UNUSED
struct gl_context
*ctx
= &brw
->ctx
;
1642 const struct brw_wm_prog_data
*prog_data
=
1643 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1645 brw_batch_emit(brw
, GENX(3DSTATE_PS_EXTRA
), psx
) {
1646 psx
.PixelShaderValid
= true;
1647 psx
.PixelShaderComputedDepthMode
= prog_data
->computed_depth_mode
;
1648 psx
.PixelShaderKillsPixel
= prog_data
->uses_kill
;
1649 psx
.AttributeEnable
= prog_data
->num_varying_inputs
!= 0;
1650 psx
.PixelShaderUsesSourceDepth
= prog_data
->uses_src_depth
;
1651 psx
.PixelShaderUsesSourceW
= prog_data
->uses_src_w
;
1652 psx
.PixelShaderIsPerSample
= prog_data
->persample_dispatch
;
1654 /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
1655 if (prog_data
->uses_sample_mask
) {
1657 if (prog_data
->post_depth_coverage
)
1658 psx
.InputCoverageMaskState
= ICMS_DEPTH_COVERAGE
;
1659 else if (prog_data
->inner_coverage
&& ctx
->IntelConservativeRasterization
)
1660 psx
.InputCoverageMaskState
= ICMS_INNER_CONSERVATIVE
;
1662 psx
.InputCoverageMaskState
= ICMS_NORMAL
;
1664 psx
.PixelShaderUsesInputCoverageMask
= true;
1668 psx
.oMaskPresenttoRenderTarget
= prog_data
->uses_omask
;
1670 psx
.PixelShaderPullsBary
= prog_data
->pulls_bary
;
1671 psx
.PixelShaderComputesStencil
= prog_data
->computed_stencil
;
1674 /* The stricter cross-primitive coherency guarantees that the hardware
1675 * gives us with the "Accesses UAV" bit set for at least one shader stage
1676 * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
1677 * are redundant within the current image, atomic counter and SSBO GL
1678 * APIs, which all have very loose ordering and coherency requirements
1679 * and generally rely on the application to insert explicit barriers when
1680 * a shader invocation is expected to see the memory writes performed by
1681 * the invocations of some previous primitive. Regardless of the value
1682 * of "UAV coherency required", the "Accesses UAV" bits will implicitly
1683 * cause an in most cases useless DC flush when the lowermost stage with
1684 * the bit set finishes execution.
1686 * It would be nice to disable it, but in some cases we can't because on
1687 * Gen8+ it also has an influence on rasterization via the PS UAV-only
1688 * signal (which could be set independently from the coherency mechanism
1689 * in the 3DSTATE_WM command on Gen7), and because in some cases it will
1690 * determine whether the hardware skips execution of the fragment shader
1691 * or not via the ThreadDispatchEnable signal. However if we know that
1692 * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
1693 * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
1694 * difference so we may just disable it here.
1696 * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
1697 * take into account KillPixels when no depth or stencil writes are
1698 * enabled. In order for occlusion queries to work correctly with no
1699 * attachments, we need to force-enable here.
1701 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
1704 if ((prog_data
->has_side_effects
|| prog_data
->uses_kill
) &&
1705 !brw_color_buffer_write_enabled(brw
))
1706 psx
.PixelShaderHasUAV
= true;
1710 const struct brw_tracked_state
genX(ps_extra
) = {
1712 .mesa
= _NEW_BUFFERS
| _NEW_COLOR
,
1713 .brw
= BRW_NEW_BLORP
|
1715 BRW_NEW_FRAGMENT_PROGRAM
|
1716 BRW_NEW_FS_PROG_DATA
|
1717 BRW_NEW_CONSERVATIVE_RASTERIZATION
,
1719 .emit
= genX(upload_ps_extra
),
1723 /* ---------------------------------------------------------------------- */
1726 genX(init_atoms
)(struct brw_context
*brw
)
1729 static const struct brw_tracked_state
*render_atoms
[] =
1731 /* Once all the programs are done, we know how large urb entry
1732 * sizes need to be and can decide if we need to change the urb
1736 &brw_recalculate_urb_fence
,
1741 /* Surface state setup. Must come before the VS/WM unit. The binding
1742 * table upload must be last.
1744 &brw_vs_pull_constants
,
1745 &brw_wm_pull_constants
,
1746 &brw_renderbuffer_surfaces
,
1747 &brw_renderbuffer_read_surfaces
,
1748 &brw_texture_surfaces
,
1749 &brw_vs_binding_table
,
1750 &brw_wm_binding_table
,
1755 /* These set up state for brw_psp_urb_cbs */
1759 &brw_vs_unit
, /* always required, enabled or not */
1765 &brw_invariant_state
,
1767 &brw_binding_table_pointers
,
1768 &brw_blend_constant_color
,
1772 &brw_polygon_stipple
,
1773 &brw_polygon_stipple_offset
,
1780 &brw_indices
, /* must come before brw_vertices */
1784 &brw_constant_buffer
1787 static const struct brw_tracked_state
*render_atoms
[] =
1789 &gen6_sf_and_clip_viewports
,
1791 /* Command packets: */
1794 &gen6_viewport_state
, /* must do after *_vp stages */
1797 &gen6_blend_state
, /* must do before cc unit */
1798 &gen6_color_calc_state
, /* must do before cc unit */
1799 &gen6_depth_stencil_state
, /* must do before cc unit */
1801 &gen6_vs_push_constants
, /* Before vs_state */
1802 &gen6_gs_push_constants
, /* Before gs_state */
1803 &gen6_wm_push_constants
, /* Before wm_state */
1805 /* Surface state setup. Must come before the VS/WM unit. The binding
1806 * table upload must be last.
1808 &brw_vs_pull_constants
,
1809 &brw_vs_ubo_surfaces
,
1810 &brw_gs_pull_constants
,
1811 &brw_gs_ubo_surfaces
,
1812 &brw_wm_pull_constants
,
1813 &brw_wm_ubo_surfaces
,
1814 &gen6_renderbuffer_surfaces
,
1815 &brw_renderbuffer_read_surfaces
,
1816 &brw_texture_surfaces
,
1818 &brw_vs_binding_table
,
1819 &gen6_gs_binding_table
,
1820 &brw_wm_binding_table
,
1825 &gen6_sampler_state
,
1826 &gen6_multisample_state
,
1834 &gen6_scissor_state
,
1836 &gen6_binding_table_pointers
,
1840 &brw_polygon_stipple
,
1841 &brw_polygon_stipple_offset
,
1847 &brw_indices
, /* must come before brw_vertices */
1852 static const struct brw_tracked_state
*render_atoms
[] =
1854 /* Command packets: */
1857 &gen7_sf_clip_viewport
,
1860 &gen7_push_constant_space
,
1862 &gen6_blend_state
, /* must do before cc unit */
1863 &gen6_color_calc_state
, /* must do before cc unit */
1864 &genX(depth_stencil_state
), /* must do before cc unit */
1866 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
1867 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
1868 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
1869 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
1870 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
1872 &gen6_vs_push_constants
, /* Before vs_state */
1873 &gen7_tcs_push_constants
,
1874 &gen7_tes_push_constants
,
1875 &gen6_gs_push_constants
, /* Before gs_state */
1876 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
1878 /* Surface state setup. Must come before the VS/WM unit. The binding
1879 * table upload must be last.
1881 &brw_vs_pull_constants
,
1882 &brw_vs_ubo_surfaces
,
1883 &brw_vs_abo_surfaces
,
1884 &brw_tcs_pull_constants
,
1885 &brw_tcs_ubo_surfaces
,
1886 &brw_tcs_abo_surfaces
,
1887 &brw_tes_pull_constants
,
1888 &brw_tes_ubo_surfaces
,
1889 &brw_tes_abo_surfaces
,
1890 &brw_gs_pull_constants
,
1891 &brw_gs_ubo_surfaces
,
1892 &brw_gs_abo_surfaces
,
1893 &brw_wm_pull_constants
,
1894 &brw_wm_ubo_surfaces
,
1895 &brw_wm_abo_surfaces
,
1896 &gen6_renderbuffer_surfaces
,
1897 &brw_renderbuffer_read_surfaces
,
1898 &brw_texture_surfaces
,
1899 &brw_vs_binding_table
,
1900 &brw_tcs_binding_table
,
1901 &brw_tes_binding_table
,
1902 &brw_gs_binding_table
,
1903 &brw_wm_binding_table
,
1910 &gen6_multisample_state
,
1924 &gen6_scissor_state
,
1928 &brw_polygon_stipple
,
1929 &brw_polygon_stipple_offset
,
1935 &brw_indices
, /* must come before brw_vertices */
1942 static const struct brw_tracked_state
*render_atoms
[] =
1945 &gen8_sf_clip_viewport
,
1948 &gen7_push_constant_space
,
1951 &gen6_color_calc_state
,
1953 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
1954 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
1955 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
1956 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
1957 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
1959 &gen6_vs_push_constants
, /* Before vs_state */
1960 &gen7_tcs_push_constants
,
1961 &gen7_tes_push_constants
,
1962 &gen6_gs_push_constants
, /* Before gs_state */
1963 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
1965 /* Surface state setup. Must come before the VS/WM unit. The binding
1966 * table upload must be last.
1968 &brw_vs_pull_constants
,
1969 &brw_vs_ubo_surfaces
,
1970 &brw_vs_abo_surfaces
,
1971 &brw_tcs_pull_constants
,
1972 &brw_tcs_ubo_surfaces
,
1973 &brw_tcs_abo_surfaces
,
1974 &brw_tes_pull_constants
,
1975 &brw_tes_ubo_surfaces
,
1976 &brw_tes_abo_surfaces
,
1977 &brw_gs_pull_constants
,
1978 &brw_gs_ubo_surfaces
,
1979 &brw_gs_abo_surfaces
,
1980 &brw_wm_pull_constants
,
1981 &brw_wm_ubo_surfaces
,
1982 &brw_wm_abo_surfaces
,
1983 &gen6_renderbuffer_surfaces
,
1984 &brw_renderbuffer_read_surfaces
,
1985 &brw_texture_surfaces
,
1986 &brw_vs_binding_table
,
1987 &brw_tcs_binding_table
,
1988 &brw_tes_binding_table
,
1989 &brw_gs_binding_table
,
1990 &brw_wm_binding_table
,
1997 &gen8_multisample_state
,
2006 &genX(raster_state
),
2012 &genX(depth_stencil_state
),
2015 &gen6_scissor_state
,
2019 &brw_polygon_stipple
,
2020 &brw_polygon_stipple_offset
,
2037 STATIC_ASSERT(ARRAY_SIZE(render_atoms
) <= ARRAY_SIZE(brw
->render_atoms
));
2038 brw_copy_pipeline_atoms(brw
, BRW_RENDER_PIPELINE
,
2039 render_atoms
, ARRAY_SIZE(render_atoms
));
2042 static const struct brw_tracked_state
*compute_atoms
[] =
2045 &brw_cs_image_surfaces
,
2046 &gen7_cs_push_constants
,
2047 &brw_cs_pull_constants
,
2048 &brw_cs_ubo_surfaces
,
2049 &brw_cs_abo_surfaces
,
2050 &brw_cs_texture_surfaces
,
2051 &brw_cs_work_groups_surface
,
2056 STATIC_ASSERT(ARRAY_SIZE(compute_atoms
) <= ARRAY_SIZE(brw
->compute_atoms
));
2057 brw_copy_pipeline_atoms(brw
, BRW_COMPUTE_PIPELINE
,
2058 compute_atoms
, ARRAY_SIZE(compute_atoms
));