2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "common/gen_device_info.h"
27 #include "genxml/gen_macros.h"
29 #include "brw_context.h"
30 #include "brw_state.h"
33 #include "intel_batchbuffer.h"
34 #include "intel_fbo.h"
36 #include "main/fbobject.h"
37 #include "main/framebuffer.h"
38 #include "main/stencil.h"
41 emit_dwords(struct brw_context
*brw
, unsigned n
)
43 intel_batchbuffer_begin(brw
, n
, RENDER_RING
);
44 uint32_t *map
= brw
->batch
.map_next
;
45 brw
->batch
.map_next
+= n
;
46 intel_batchbuffer_advance(brw
);
52 uint32_t read_domains
;
53 uint32_t write_domain
;
58 emit_reloc(struct brw_context
*brw
,
59 void *location
, struct brw_address address
, uint32_t delta
)
61 uint32_t offset
= (char *) location
- (char *) brw
->batch
.map
;
63 return brw_emit_reloc(&brw
->batch
, offset
, address
.bo
,
64 address
.offset
+ delta
,
66 address
.write_domain
);
69 #define __gen_address_type struct brw_address
70 #define __gen_user_data struct brw_context
73 __gen_combine_address(struct brw_context
*brw
, void *location
,
74 struct brw_address address
, uint32_t delta
)
76 if (address
.bo
== NULL
) {
77 return address
.offset
+ delta
;
79 return emit_reloc(brw
, location
, address
, delta
);
83 #include "genxml/genX_pack.h"
85 #define _brw_cmd_length(cmd) cmd ## _length
86 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
87 #define _brw_cmd_header(cmd) cmd ## _header
88 #define _brw_cmd_pack(cmd) cmd ## _pack
90 #define brw_batch_emit(brw, cmd, name) \
91 for (struct cmd name = { _brw_cmd_header(cmd) }, \
92 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
93 __builtin_expect(_dst != NULL, 1); \
94 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
97 #define brw_batch_emitn(brw, cmd, n) ({ \
98 uint32_t *_dw = emit_dwords(brw, n); \
99 struct cmd template = { \
100 _brw_cmd_header(cmd), \
101 .DWordLength = n - _brw_cmd_length_bias(cmd), \
103 _brw_cmd_pack(cmd)(brw, _dw, &template); \
104 _dw + 1; /* Array starts at dw[1] */ \
107 #define brw_state_emit(brw, cmd, align, offset, name) \
108 for (struct cmd name = { 0, }, \
109 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
111 __builtin_expect(_dst != NULL, 1); \
112 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
117 * Determine the appropriate attribute override value to store into the
118 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
119 * override value contains two pieces of information: the location of the
120 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
121 * flag indicating whether to "swizzle" the attribute based on the direction
122 * the triangle is facing.
124 * If an attribute is "swizzled", then the given VUE location is used for
125 * front-facing triangles, and the VUE location that immediately follows is
126 * used for back-facing triangles. We use this to implement the mapping from
127 * gl_FrontColor/gl_BackColor to gl_Color.
129 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
130 * being instructed to begin reading attribute data. It can be set to a
131 * nonzero value to prevent the SF unit from wasting time reading elements of
132 * the VUE that are not needed by the fragment shader. It is measured in
133 * 256-bit increments.
136 genX(get_attr_override
)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr
,
137 const struct brw_vue_map
*vue_map
,
138 int urb_entry_read_offset
, int fs_attr
,
139 bool two_side_color
, uint32_t *max_source_attr
)
141 /* Find the VUE slot for this attribute. */
142 int slot
= vue_map
->varying_to_slot
[fs_attr
];
144 /* Viewport and Layer are stored in the VUE header. We need to override
145 * them to zero if earlier stages didn't write them, as GL requires that
146 * they read back as zero when not explicitly set.
148 if (fs_attr
== VARYING_SLOT_VIEWPORT
|| fs_attr
== VARYING_SLOT_LAYER
) {
149 attr
->ComponentOverrideX
= true;
150 attr
->ComponentOverrideW
= true;
151 attr
->ConstantSource
= CONST_0000
;
153 if (!(vue_map
->slots_valid
& VARYING_BIT_LAYER
))
154 attr
->ComponentOverrideY
= true;
155 if (!(vue_map
->slots_valid
& VARYING_BIT_VIEWPORT
))
156 attr
->ComponentOverrideZ
= true;
161 /* If there was only a back color written but not front, use back
162 * as the color instead of undefined
164 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL0
)
165 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC0
];
166 if (slot
== -1 && fs_attr
== VARYING_SLOT_COL1
)
167 slot
= vue_map
->varying_to_slot
[VARYING_SLOT_BFC1
];
170 /* This attribute does not exist in the VUE--that means that the vertex
171 * shader did not write to it. This means that either:
173 * (a) This attribute is a texture coordinate, and it is going to be
174 * replaced with point coordinates (as a consequence of a call to
175 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
176 * hardware will ignore whatever attribute override we supply.
178 * (b) This attribute is read by the fragment shader but not written by
179 * the vertex shader, so its value is undefined. Therefore the
180 * attribute override we supply doesn't matter.
182 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
183 * previous shader stage.
185 * Note that we don't have to worry about the cases where the attribute
186 * is gl_PointCoord or is undergoing point sprite coordinate
187 * replacement, because in those cases, this function isn't called.
189 * In case (c), we need to program the attribute overrides so that the
190 * primitive ID will be stored in this slot. In every other case, the
191 * attribute override we supply doesn't matter. So just go ahead and
192 * program primitive ID in every case.
194 attr
->ComponentOverrideW
= true;
195 attr
->ComponentOverrideX
= true;
196 attr
->ComponentOverrideY
= true;
197 attr
->ComponentOverrideZ
= true;
198 attr
->ConstantSource
= PRIM_ID
;
202 /* Compute the location of the attribute relative to urb_entry_read_offset.
203 * Each increment of urb_entry_read_offset represents a 256-bit value, so
204 * it counts for two 128-bit VUE slots.
206 int source_attr
= slot
- 2 * urb_entry_read_offset
;
207 assert(source_attr
>= 0 && source_attr
< 32);
209 /* If we are doing two-sided color, and the VUE slot following this one
210 * represents a back-facing color, then we need to instruct the SF unit to
211 * do back-facing swizzling.
213 bool swizzling
= two_side_color
&&
214 ((vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL0
&&
215 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC0
) ||
216 (vue_map
->slot_to_varying
[slot
] == VARYING_SLOT_COL1
&&
217 vue_map
->slot_to_varying
[slot
+1] == VARYING_SLOT_BFC1
));
219 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
220 if (*max_source_attr
< source_attr
+ swizzling
)
221 *max_source_attr
= source_attr
+ swizzling
;
223 attr
->SourceAttribute
= source_attr
;
225 attr
->SwizzleSelect
= INPUTATTR_FACING
;
230 genX(calculate_attr_overrides
)(const struct brw_context
*brw
,
231 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) *attr_overrides
,
232 uint32_t *point_sprite_enables
,
233 uint32_t *urb_entry_read_length
,
234 uint32_t *urb_entry_read_offset
)
236 const struct gl_context
*ctx
= &brw
->ctx
;
239 const struct gl_point_attrib
*point
= &ctx
->Point
;
241 /* BRW_NEW_FS_PROG_DATA */
242 const struct brw_wm_prog_data
*wm_prog_data
=
243 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
244 uint32_t max_source_attr
= 0;
246 *point_sprite_enables
= 0;
248 /* BRW_NEW_FRAGMENT_PROGRAM
250 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
251 * the full vertex header. Otherwise, we can program the SF to start
252 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
253 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
254 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
257 bool fs_needs_vue_header
= brw
->fragment_program
->info
.inputs_read
&
258 (VARYING_BIT_LAYER
| VARYING_BIT_VIEWPORT
);
260 *urb_entry_read_offset
= fs_needs_vue_header
? 0 : 1;
262 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
263 * description of dw10 Point Sprite Texture Coordinate Enable:
265 * "This field must be programmed to zero when non-point primitives
268 * The SandyBridge PRM doesn't explicitly say that point sprite enables
269 * must be programmed to zero when rendering non-point primitives, but
270 * the IvyBridge PRM does, and if we don't, we get garbage.
272 * This is not required on Haswell, as the hardware ignores this state
273 * when drawing non-points -- although we do still need to be careful to
274 * correctly set the attr overrides.
277 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
279 bool drawing_points
= brw_is_drawing_points(brw
);
281 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
282 int input_index
= wm_prog_data
->urb_setup
[attr
];
288 bool point_sprite
= false;
289 if (drawing_points
) {
290 if (point
->PointSprite
&&
291 (attr
>= VARYING_SLOT_TEX0
&& attr
<= VARYING_SLOT_TEX7
) &&
292 (point
->CoordReplace
& (1u << (attr
- VARYING_SLOT_TEX0
)))) {
296 if (attr
== VARYING_SLOT_PNTC
)
300 *point_sprite_enables
|= (1 << input_index
);
303 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
304 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attribute
= { 0 };
307 genX(get_attr_override
)(&attribute
,
308 &brw
->vue_map_geom_out
,
309 *urb_entry_read_offset
, attr
,
310 brw
->ctx
.VertexProgram
._TwoSideEnabled
,
314 /* The hardware can only do the overrides on 16 overrides at a
315 * time, and the other up to 16 have to be lined up so that the
316 * input index = the output index. We'll need to do some
317 * tweaking to make sure that's the case.
319 if (input_index
< 16)
320 attr_overrides
[input_index
] = attribute
;
322 assert(attribute
.SourceAttribute
== input_index
);
325 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
326 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
328 * "This field should be set to the minimum length required to read the
329 * maximum source attribute. The maximum source attribute is indicated
330 * by the maximum value of the enabled Attribute # Source Attribute if
331 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
333 * read_length = ceiling((max_source_attr + 1) / 2)
335 * [errata] Corruption/Hang possible if length programmed larger than
338 * Similar text exists for Ivy Bridge.
340 *urb_entry_read_length
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
343 /* ---------------------------------------------------------------------- */
346 genX(upload_depth_stencil_state
)(struct brw_context
*brw
)
348 struct gl_context
*ctx
= &brw
->ctx
;
351 struct intel_renderbuffer
*depth_irb
=
352 intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
355 struct gl_depthbuffer_attrib
*depth
= &ctx
->Depth
;
358 struct gl_stencil_attrib
*stencil
= &ctx
->Stencil
;
359 const int b
= stencil
->_BackFace
;
362 brw_batch_emit(brw
, GENX(3DSTATE_WM_DEPTH_STENCIL
), wmds
) {
365 brw_state_emit(brw
, GENX(DEPTH_STENCIL_STATE
), 64, &ds_offset
, wmds
) {
367 if (depth
->Test
&& depth_irb
) {
368 wmds
.DepthTestEnable
= true;
369 wmds
.DepthBufferWriteEnable
= brw_depth_writes_enabled(brw
);
370 wmds
.DepthTestFunction
= intel_translate_compare_func(depth
->Func
);
373 if (stencil
->_Enabled
) {
374 wmds
.StencilTestEnable
= true;
375 wmds
.StencilWriteMask
= stencil
->WriteMask
[0] & 0xff;
376 wmds
.StencilTestMask
= stencil
->ValueMask
[0] & 0xff;
378 wmds
.StencilTestFunction
=
379 intel_translate_compare_func(stencil
->Function
[0]);
381 intel_translate_stencil_op(stencil
->FailFunc
[0]);
382 wmds
.StencilPassDepthPassOp
=
383 intel_translate_stencil_op(stencil
->ZPassFunc
[0]);
384 wmds
.StencilPassDepthFailOp
=
385 intel_translate_stencil_op(stencil
->ZFailFunc
[0]);
387 wmds
.StencilBufferWriteEnable
= stencil
->_WriteEnabled
;
389 if (stencil
->_TestTwoSide
) {
390 wmds
.DoubleSidedStencilEnable
= true;
391 wmds
.BackfaceStencilWriteMask
= stencil
->WriteMask
[b
] & 0xff;
392 wmds
.BackfaceStencilTestMask
= stencil
->ValueMask
[b
] & 0xff;
394 wmds
.BackfaceStencilTestFunction
=
395 intel_translate_compare_func(stencil
->Function
[b
]);
396 wmds
.BackfaceStencilFailOp
=
397 intel_translate_stencil_op(stencil
->FailFunc
[b
]);
398 wmds
.BackfaceStencilPassDepthPassOp
=
399 intel_translate_stencil_op(stencil
->ZPassFunc
[b
]);
400 wmds
.BackfaceStencilPassDepthFailOp
=
401 intel_translate_stencil_op(stencil
->ZFailFunc
[b
]);
405 wmds
.StencilReferenceValue
= _mesa_get_stencil_ref(ctx
, 0);
406 wmds
.BackfaceStencilReferenceValue
= _mesa_get_stencil_ref(ctx
, b
);
412 brw_batch_emit(brw
, GENX(3DSTATE_CC_STATE_POINTERS
), ptr
) {
413 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
414 ptr
.DEPTH_STENCIL_STATEChange
= true;
417 brw_batch_emit(brw
, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS
), ptr
) {
418 ptr
.PointertoDEPTH_STENCIL_STATE
= ds_offset
;
423 static const struct brw_tracked_state
genX(depth_stencil_state
) = {
425 .mesa
= _NEW_BUFFERS
|
428 .brw
= BRW_NEW_BLORP
|
429 (GEN_GEN
>= 8 ? BRW_NEW_CONTEXT
431 BRW_NEW_STATE_BASE_ADDRESS
),
433 .emit
= genX(upload_depth_stencil_state
),
436 /* ---------------------------------------------------------------------- */
439 genX(upload_clip_state
)(struct brw_context
*brw
)
441 struct gl_context
*ctx
= &brw
->ctx
;
444 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
446 /* BRW_NEW_FS_PROG_DATA */
447 struct brw_wm_prog_data
*wm_prog_data
=
448 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
450 brw_batch_emit(brw
, GENX(3DSTATE_CLIP
), clip
) {
451 clip
.StatisticsEnable
= !brw
->meta_in_progress
;
453 if (wm_prog_data
->barycentric_interp_modes
&
454 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS
)
455 clip
.NonPerspectiveBarycentricEnable
= true;
458 clip
.EarlyCullEnable
= true;
462 clip
.FrontWinding
= ctx
->Polygon
._FrontBit
== _mesa_is_user_fbo(fb
);
464 if (ctx
->Polygon
.CullFlag
) {
465 switch (ctx
->Polygon
.CullFaceMode
) {
467 clip
.CullMode
= CULLMODE_FRONT
;
470 clip
.CullMode
= CULLMODE_BACK
;
472 case GL_FRONT_AND_BACK
:
473 clip
.CullMode
= CULLMODE_BOTH
;
476 unreachable("Should not get here: invalid CullFlag");
479 clip
.CullMode
= CULLMODE_NONE
;
484 clip
.UserClipDistanceCullTestEnableBitmask
=
485 brw_vue_prog_data(brw
->vs
.base
.prog_data
)->cull_distance_mask
;
487 clip
.ViewportZClipTestEnable
= !ctx
->Transform
.DepthClamp
;
491 if (ctx
->Light
.ProvokingVertex
== GL_FIRST_VERTEX_CONVENTION
) {
492 clip
.TriangleStripListProvokingVertexSelect
= 0;
493 clip
.TriangleFanProvokingVertexSelect
= 1;
494 clip
.LineStripListProvokingVertexSelect
= 0;
496 clip
.TriangleStripListProvokingVertexSelect
= 2;
497 clip
.TriangleFanProvokingVertexSelect
= 2;
498 clip
.LineStripListProvokingVertexSelect
= 1;
502 clip
.UserClipDistanceClipTestEnableBitmask
=
503 ctx
->Transform
.ClipPlanesEnabled
;
506 clip
.ForceUserClipDistanceClipTestEnableBitmask
= true;
509 if (ctx
->Transform
.ClipDepthMode
== GL_ZERO_TO_ONE
)
510 clip
.APIMode
= APIMODE_D3D
;
512 clip
.APIMode
= APIMODE_OGL
;
514 clip
.GuardbandClipTestEnable
= true;
516 /* BRW_NEW_VIEWPORT_COUNT */
517 const unsigned viewport_count
= brw
->clip
.viewport_count
;
519 if (ctx
->RasterDiscard
) {
520 clip
.ClipMode
= CLIPMODE_REJECT_ALL
;
522 perf_debug("Rasterizer discard is currently implemented via the "
523 "clipper; having the GS not write primitives would "
524 "likely be faster.\n");
527 clip
.ClipMode
= CLIPMODE_NORMAL
;
530 clip
.ClipEnable
= brw
->primitive
!= _3DPRIM_RECTLIST
;
533 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
535 if (!brw_is_drawing_points(brw
) && !brw_is_drawing_lines(brw
))
536 clip
.ViewportXYClipTestEnable
= true;
538 clip
.MinimumPointWidth
= 0.125;
539 clip
.MaximumPointWidth
= 255.875;
540 clip
.MaximumVPIndex
= viewport_count
- 1;
541 if (_mesa_geometric_layers(fb
) == 0)
542 clip
.ForceZeroRTAIndexEnable
= true;
546 static const struct brw_tracked_state
genX(clip_state
) = {
548 .mesa
= _NEW_BUFFERS
|
552 .brw
= BRW_NEW_BLORP
|
554 BRW_NEW_FS_PROG_DATA
|
555 BRW_NEW_GS_PROG_DATA
|
556 BRW_NEW_VS_PROG_DATA
|
557 BRW_NEW_META_IN_PROGRESS
|
559 BRW_NEW_RASTERIZER_DISCARD
|
560 BRW_NEW_TES_PROG_DATA
|
561 BRW_NEW_VIEWPORT_COUNT
,
563 .emit
= genX(upload_clip_state
),
566 /* ---------------------------------------------------------------------- */
569 genX(upload_sf
)(struct brw_context
*brw
)
571 struct gl_context
*ctx
= &brw
->ctx
;
576 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
577 const bool multisampled_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
580 brw_batch_emit(brw
, GENX(3DSTATE_SF
), sf
) {
581 sf
.StatisticsEnable
= true;
582 sf
.ViewportTransformEnable
= brw
->sf
.viewport_transform_enable
;
586 sf
.DepthBufferSurfaceFormat
= brw_depthbuffer_format(brw
);
591 sf
.FrontWinding
= ctx
->Polygon
._FrontBit
== render_to_fbo
;
592 sf
.GlobalDepthOffsetEnableSolid
= ctx
->Polygon
.OffsetFill
;
593 sf
.GlobalDepthOffsetEnableWireframe
= ctx
->Polygon
.OffsetLine
;
594 sf
.GlobalDepthOffsetEnablePoint
= ctx
->Polygon
.OffsetPoint
;
596 switch (ctx
->Polygon
.FrontMode
) {
598 sf
.FrontFaceFillMode
= FILL_MODE_SOLID
;
601 sf
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
604 sf
.FrontFaceFillMode
= FILL_MODE_POINT
;
607 unreachable("not reached");
610 switch (ctx
->Polygon
.BackMode
) {
612 sf
.BackFaceFillMode
= FILL_MODE_SOLID
;
615 sf
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
618 sf
.BackFaceFillMode
= FILL_MODE_POINT
;
621 unreachable("not reached");
624 sf
.ScissorRectangleEnable
= true;
626 if (ctx
->Polygon
.CullFlag
) {
627 switch (ctx
->Polygon
.CullFaceMode
) {
629 sf
.CullMode
= CULLMODE_FRONT
;
632 sf
.CullMode
= CULLMODE_BACK
;
634 case GL_FRONT_AND_BACK
:
635 sf
.CullMode
= CULLMODE_BOTH
;
638 unreachable("not reached");
641 sf
.CullMode
= CULLMODE_NONE
;
645 sf
.LineStippleEnable
= ctx
->Line
.StippleFlag
;
648 if (multisampled_fbo
&& ctx
->Multisample
.Enabled
)
649 sf
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
651 sf
.GlobalDepthOffsetConstant
= ctx
->Polygon
.OffsetUnits
* 2;
652 sf
.GlobalDepthOffsetScale
= ctx
->Polygon
.OffsetFactor
;
653 sf
.GlobalDepthOffsetClamp
= ctx
->Polygon
.OffsetClamp
;
657 sf
.LineWidth
= brw_get_line_width_float(brw
);
659 if (ctx
->Line
.SmoothFlag
) {
660 sf
.LineEndCapAntialiasingRegionWidth
= _10pixels
;
662 sf
.AntiAliasingEnable
= true;
666 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
667 point_size
= CLAMP(ctx
->Point
.Size
, ctx
->Point
.MinSize
, ctx
->Point
.MaxSize
);
668 /* Clamp to the hardware limits */
669 sf
.PointWidth
= CLAMP(point_size
, 0.125f
, 255.875f
);
671 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
672 if (use_state_point_size(brw
))
673 sf
.PointWidthSource
= State
;
676 /* _NEW_POINT | _NEW_MULTISAMPLE */
677 if ((ctx
->Point
.SmoothFlag
|| _mesa_is_multisample_enabled(ctx
)) &&
678 !ctx
->Point
.PointSprite
)
679 sf
.SmoothPointEnable
= true;
682 sf
.AALineDistanceMode
= AALINEDISTANCE_TRUE
;
685 if (ctx
->Light
.ProvokingVertex
!= GL_FIRST_VERTEX_CONVENTION
) {
686 sf
.TriangleStripListProvokingVertexSelect
= 2;
687 sf
.TriangleFanProvokingVertexSelect
= 2;
688 sf
.LineStripListProvokingVertexSelect
= 1;
690 sf
.TriangleFanProvokingVertexSelect
= 1;
694 /* BRW_NEW_FS_PROG_DATA */
695 const struct brw_wm_prog_data
*wm_prog_data
=
696 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
698 sf
.AttributeSwizzleEnable
= true;
699 sf
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
702 * Window coordinates in an FBO are inverted, which means point
703 * sprite origin must be inverted, too.
705 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
) {
706 sf
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
708 sf
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
711 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
712 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
714 uint32_t urb_entry_read_length
;
715 uint32_t urb_entry_read_offset
;
716 uint32_t point_sprite_enables
;
717 genX(calculate_attr_overrides
)(brw
, sf
.Attribute
, &point_sprite_enables
,
718 &urb_entry_read_length
,
719 &urb_entry_read_offset
);
720 sf
.VertexURBEntryReadLength
= urb_entry_read_length
;
721 sf
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
722 sf
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
723 sf
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
728 static const struct brw_tracked_state
genX(sf_state
) = {
735 (GEN_GEN
<= 7 ? _NEW_BUFFERS
| _NEW_POLYGON
: 0),
736 .brw
= BRW_NEW_BLORP
|
738 BRW_NEW_VUE_MAP_GEOM_OUT
|
739 (GEN_GEN
<= 7 ? BRW_NEW_GS_PROG_DATA
|
741 BRW_NEW_TES_PROG_DATA
743 (GEN_GEN
== 6 ? BRW_NEW_FS_PROG_DATA
|
744 BRW_NEW_FRAGMENT_PROGRAM
747 .emit
= genX(upload_sf
),
752 /* ---------------------------------------------------------------------- */
756 genX(upload_sbe
)(struct brw_context
*brw
)
758 struct gl_context
*ctx
= &brw
->ctx
;
759 /* BRW_NEW_FS_PROG_DATA */
760 const struct brw_wm_prog_data
*wm_prog_data
=
761 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
763 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL
) attr_overrides
[16] = { { 0 } };
765 #define attr_overrides sbe.Attribute
767 uint32_t urb_entry_read_length
;
768 uint32_t urb_entry_read_offset
;
769 uint32_t point_sprite_enables
;
771 brw_batch_emit(brw
, GENX(3DSTATE_SBE
), sbe
) {
772 sbe
.AttributeSwizzleEnable
= true;
773 sbe
.NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
;
776 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
780 * Window coordinates in an FBO are inverted, which means point
781 * sprite origin must be inverted.
783 if ((ctx
->Point
.SpriteOrigin
== GL_LOWER_LEFT
) != render_to_fbo
)
784 sbe
.PointSpriteTextureCoordinateOrigin
= LOWERLEFT
;
786 sbe
.PointSpriteTextureCoordinateOrigin
= UPPERLEFT
;
788 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
789 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
790 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
791 * BRW_NEW_VUE_MAP_GEOM_OUT
793 genX(calculate_attr_overrides
)(brw
,
795 &point_sprite_enables
,
796 &urb_entry_read_length
,
797 &urb_entry_read_offset
);
799 /* Typically, the URB entry read length and offset should be programmed
800 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
801 * stage which produces geometry. However, we don't know the proper
802 * value until we call calculate_attr_overrides().
804 * To fit with our existing code, we override the inherited values and
805 * specify it here directly, as we did on previous generations.
807 sbe
.VertexURBEntryReadLength
= urb_entry_read_length
;
808 sbe
.VertexURBEntryReadOffset
= urb_entry_read_offset
;
809 sbe
.PointSpriteTextureCoordinateEnable
= point_sprite_enables
;
810 sbe
.ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
;
813 sbe
.ForceVertexURBEntryReadLength
= true;
814 sbe
.ForceVertexURBEntryReadOffset
= true;
818 /* prepare the active component dwords */
820 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
821 if (!(brw
->fragment_program
->info
.inputs_read
&
822 BITFIELD64_BIT(attr
))) {
826 assert(input_index
< 32);
828 sbe
.AttributeActiveComponentFormat
[input_index
] = ACTIVE_COMPONENT_XYZW
;
835 brw_batch_emit(brw
, GENX(3DSTATE_SBE_SWIZ
), sbes
) {
836 for (int i
= 0; i
< 16; i
++)
837 sbes
.Attribute
[i
] = attr_overrides
[i
];
841 #undef attr_overrides
844 static const struct brw_tracked_state
genX(sbe_state
) = {
846 .mesa
= _NEW_BUFFERS
|
851 .brw
= BRW_NEW_BLORP
|
853 BRW_NEW_FRAGMENT_PROGRAM
|
854 BRW_NEW_FS_PROG_DATA
|
855 BRW_NEW_GS_PROG_DATA
|
856 BRW_NEW_TES_PROG_DATA
|
857 BRW_NEW_VUE_MAP_GEOM_OUT
|
858 (GEN_GEN
== 7 ? BRW_NEW_PRIMITIVE
861 .emit
= genX(upload_sbe
),
865 /* ---------------------------------------------------------------------- */
869 genX(upload_raster
)(struct brw_context
*brw
)
871 struct gl_context
*ctx
= &brw
->ctx
;
874 bool render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
877 struct gl_polygon_attrib
*polygon
= &ctx
->Polygon
;
880 struct gl_point_attrib
*point
= &ctx
->Point
;
882 brw_batch_emit(brw
, GENX(3DSTATE_RASTER
), raster
) {
883 if (polygon
->_FrontBit
== render_to_fbo
)
884 raster
.FrontWinding
= CounterClockwise
;
886 if (polygon
->CullFlag
) {
887 switch (polygon
->CullFaceMode
) {
889 raster
.CullMode
= CULLMODE_FRONT
;
892 raster
.CullMode
= CULLMODE_BACK
;
894 case GL_FRONT_AND_BACK
:
895 raster
.CullMode
= CULLMODE_BOTH
;
898 unreachable("not reached");
901 raster
.CullMode
= CULLMODE_NONE
;
904 point
->SmoothFlag
= raster
.SmoothPointEnable
;
906 raster
.DXMultisampleRasterizationEnable
=
907 _mesa_is_multisample_enabled(ctx
);
909 raster
.GlobalDepthOffsetEnableSolid
= polygon
->OffsetFill
;
910 raster
.GlobalDepthOffsetEnableWireframe
= polygon
->OffsetLine
;
911 raster
.GlobalDepthOffsetEnablePoint
= polygon
->OffsetPoint
;
913 switch (polygon
->FrontMode
) {
915 raster
.FrontFaceFillMode
= FILL_MODE_SOLID
;
918 raster
.FrontFaceFillMode
= FILL_MODE_WIREFRAME
;
921 raster
.FrontFaceFillMode
= FILL_MODE_POINT
;
924 unreachable("not reached");
927 switch (polygon
->BackMode
) {
929 raster
.BackFaceFillMode
= FILL_MODE_SOLID
;
932 raster
.BackFaceFillMode
= FILL_MODE_WIREFRAME
;
935 raster
.BackFaceFillMode
= FILL_MODE_POINT
;
938 unreachable("not reached");
942 raster
.AntialiasingEnable
= ctx
->Line
.SmoothFlag
;
945 raster
.ScissorRectangleEnable
= ctx
->Scissor
.EnableFlags
;
948 if (!ctx
->Transform
.DepthClamp
) {
950 raster
.ViewportZFarClipTestEnable
= true;
951 raster
.ViewportZNearClipTestEnable
= true;
953 raster
.ViewportZClipTestEnable
= true;
957 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
959 raster
.ConservativeRasterizationEnable
=
960 ctx
->IntelConservativeRasterization
;
963 raster
.GlobalDepthOffsetClamp
= polygon
->OffsetClamp
;
964 raster
.GlobalDepthOffsetScale
= polygon
->OffsetFactor
;
966 raster
.GlobalDepthOffsetConstant
= polygon
->OffsetUnits
* 2;
970 static const struct brw_tracked_state
genX(raster_state
) = {
972 .mesa
= _NEW_BUFFERS
|
979 .brw
= BRW_NEW_BLORP
|
981 BRW_NEW_CONSERVATIVE_RASTERIZATION
,
983 .emit
= genX(upload_raster
),
987 /* ---------------------------------------------------------------------- */
990 genX(init_atoms
)(struct brw_context
*brw
)
993 static const struct brw_tracked_state
*render_atoms
[] =
995 /* Once all the programs are done, we know how large urb entry
996 * sizes need to be and can decide if we need to change the urb
1000 &brw_recalculate_urb_fence
,
1005 /* Surface state setup. Must come before the VS/WM unit. The binding
1006 * table upload must be last.
1008 &brw_vs_pull_constants
,
1009 &brw_wm_pull_constants
,
1010 &brw_renderbuffer_surfaces
,
1011 &brw_renderbuffer_read_surfaces
,
1012 &brw_texture_surfaces
,
1013 &brw_vs_binding_table
,
1014 &brw_wm_binding_table
,
1019 /* These set up state for brw_psp_urb_cbs */
1023 &brw_vs_unit
, /* always required, enabled or not */
1029 &brw_invariant_state
,
1031 &brw_binding_table_pointers
,
1032 &brw_blend_constant_color
,
1036 &brw_polygon_stipple
,
1037 &brw_polygon_stipple_offset
,
1044 &brw_indices
, /* must come before brw_vertices */
1048 &brw_constant_buffer
1051 static const struct brw_tracked_state
*render_atoms
[] =
1053 &gen6_sf_and_clip_viewports
,
1055 /* Command packets: */
1058 &gen6_viewport_state
, /* must do after *_vp stages */
1061 &gen6_blend_state
, /* must do before cc unit */
1062 &gen6_color_calc_state
, /* must do before cc unit */
1063 &gen6_depth_stencil_state
, /* must do before cc unit */
1065 &gen6_vs_push_constants
, /* Before vs_state */
1066 &gen6_gs_push_constants
, /* Before gs_state */
1067 &gen6_wm_push_constants
, /* Before wm_state */
1069 /* Surface state setup. Must come before the VS/WM unit. The binding
1070 * table upload must be last.
1072 &brw_vs_pull_constants
,
1073 &brw_vs_ubo_surfaces
,
1074 &brw_gs_pull_constants
,
1075 &brw_gs_ubo_surfaces
,
1076 &brw_wm_pull_constants
,
1077 &brw_wm_ubo_surfaces
,
1078 &gen6_renderbuffer_surfaces
,
1079 &brw_renderbuffer_read_surfaces
,
1080 &brw_texture_surfaces
,
1082 &brw_vs_binding_table
,
1083 &gen6_gs_binding_table
,
1084 &brw_wm_binding_table
,
1089 &gen6_sampler_state
,
1090 &gen6_multisample_state
,
1098 &gen6_scissor_state
,
1100 &gen6_binding_table_pointers
,
1104 &brw_polygon_stipple
,
1105 &brw_polygon_stipple_offset
,
1111 &brw_indices
, /* must come before brw_vertices */
1116 static const struct brw_tracked_state
*render_atoms
[] =
1118 /* Command packets: */
1121 &gen7_sf_clip_viewport
,
1124 &gen7_push_constant_space
,
1126 &gen6_blend_state
, /* must do before cc unit */
1127 &gen6_color_calc_state
, /* must do before cc unit */
1128 &genX(depth_stencil_state
), /* must do before cc unit */
1130 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
1131 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
1132 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
1133 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
1134 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
1136 &gen6_vs_push_constants
, /* Before vs_state */
1137 &gen7_tcs_push_constants
,
1138 &gen7_tes_push_constants
,
1139 &gen6_gs_push_constants
, /* Before gs_state */
1140 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
1142 /* Surface state setup. Must come before the VS/WM unit. The binding
1143 * table upload must be last.
1145 &brw_vs_pull_constants
,
1146 &brw_vs_ubo_surfaces
,
1147 &brw_vs_abo_surfaces
,
1148 &brw_tcs_pull_constants
,
1149 &brw_tcs_ubo_surfaces
,
1150 &brw_tcs_abo_surfaces
,
1151 &brw_tes_pull_constants
,
1152 &brw_tes_ubo_surfaces
,
1153 &brw_tes_abo_surfaces
,
1154 &brw_gs_pull_constants
,
1155 &brw_gs_ubo_surfaces
,
1156 &brw_gs_abo_surfaces
,
1157 &brw_wm_pull_constants
,
1158 &brw_wm_ubo_surfaces
,
1159 &brw_wm_abo_surfaces
,
1160 &gen6_renderbuffer_surfaces
,
1161 &brw_renderbuffer_read_surfaces
,
1162 &brw_texture_surfaces
,
1163 &brw_vs_binding_table
,
1164 &brw_tcs_binding_table
,
1165 &brw_tes_binding_table
,
1166 &brw_gs_binding_table
,
1167 &brw_wm_binding_table
,
1174 &gen6_multisample_state
,
1188 &gen6_scissor_state
,
1192 &brw_polygon_stipple
,
1193 &brw_polygon_stipple_offset
,
1199 &brw_indices
, /* must come before brw_vertices */
1206 static const struct brw_tracked_state
*render_atoms
[] =
1209 &gen8_sf_clip_viewport
,
1212 &gen7_push_constant_space
,
1215 &gen6_color_calc_state
,
1217 &brw_vs_image_surfaces
, /* Before vs push/pull constants and binding table */
1218 &brw_tcs_image_surfaces
, /* Before tcs push/pull constants and binding table */
1219 &brw_tes_image_surfaces
, /* Before tes push/pull constants and binding table */
1220 &brw_gs_image_surfaces
, /* Before gs push/pull constants and binding table */
1221 &brw_wm_image_surfaces
, /* Before wm push/pull constants and binding table */
1223 &gen6_vs_push_constants
, /* Before vs_state */
1224 &gen7_tcs_push_constants
,
1225 &gen7_tes_push_constants
,
1226 &gen6_gs_push_constants
, /* Before gs_state */
1227 &gen6_wm_push_constants
, /* Before wm_surfaces and constant_buffer */
1229 /* Surface state setup. Must come before the VS/WM unit. The binding
1230 * table upload must be last.
1232 &brw_vs_pull_constants
,
1233 &brw_vs_ubo_surfaces
,
1234 &brw_vs_abo_surfaces
,
1235 &brw_tcs_pull_constants
,
1236 &brw_tcs_ubo_surfaces
,
1237 &brw_tcs_abo_surfaces
,
1238 &brw_tes_pull_constants
,
1239 &brw_tes_ubo_surfaces
,
1240 &brw_tes_abo_surfaces
,
1241 &brw_gs_pull_constants
,
1242 &brw_gs_ubo_surfaces
,
1243 &brw_gs_abo_surfaces
,
1244 &brw_wm_pull_constants
,
1245 &brw_wm_ubo_surfaces
,
1246 &brw_wm_abo_surfaces
,
1247 &gen6_renderbuffer_surfaces
,
1248 &brw_renderbuffer_read_surfaces
,
1249 &brw_texture_surfaces
,
1250 &brw_vs_binding_table
,
1251 &brw_tcs_binding_table
,
1252 &brw_tes_binding_table
,
1253 &brw_gs_binding_table
,
1254 &brw_wm_binding_table
,
1261 &gen8_multisample_state
,
1270 &genX(raster_state
),
1276 &genX(depth_stencil_state
),
1279 &gen6_scissor_state
,
1283 &brw_polygon_stipple
,
1284 &brw_polygon_stipple_offset
,
1301 STATIC_ASSERT(ARRAY_SIZE(render_atoms
) <= ARRAY_SIZE(brw
->render_atoms
));
1302 brw_copy_pipeline_atoms(brw
, BRW_RENDER_PIPELINE
,
1303 render_atoms
, ARRAY_SIZE(render_atoms
));
1306 static const struct brw_tracked_state
*compute_atoms
[] =
1309 &brw_cs_image_surfaces
,
1310 &gen7_cs_push_constants
,
1311 &brw_cs_pull_constants
,
1312 &brw_cs_ubo_surfaces
,
1313 &brw_cs_abo_surfaces
,
1314 &brw_cs_texture_surfaces
,
1315 &brw_cs_work_groups_surface
,
1320 STATIC_ASSERT(ARRAY_SIZE(compute_atoms
) <= ARRAY_SIZE(brw
->compute_atoms
));
1321 brw_copy_pipeline_atoms(brw
, BRW_COMPUTE_PIPELINE
,
1322 compute_atoms
, ARRAY_SIZE(compute_atoms
));