i965: Port gen8+ 3DSTATE_PS_EXTRA to genxml.
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "common/gen_device_info.h"
27 #include "genxml/gen_macros.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31 #include "brw_wm.h"
32 #include "brw_util.h"
33
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36 #include "intel_fbo.h"
37
38 #include "main/fbobject.h"
39 #include "main/framebuffer.h"
40 #include "main/stencil.h"
41 #include "main/transformfeedback.h"
42
43 UNUSED static void *
44 emit_dwords(struct brw_context *brw, unsigned n)
45 {
46 intel_batchbuffer_begin(brw, n, RENDER_RING);
47 uint32_t *map = brw->batch.map_next;
48 brw->batch.map_next += n;
49 intel_batchbuffer_advance(brw);
50 return map;
51 }
52
53 struct brw_address {
54 struct brw_bo *bo;
55 uint32_t read_domains;
56 uint32_t write_domain;
57 uint32_t offset;
58 };
59
60 static uint64_t
61 emit_reloc(struct brw_context *brw,
62 void *location, struct brw_address address, uint32_t delta)
63 {
64 uint32_t offset = (char *) location - (char *) brw->batch.map;
65
66 return brw_emit_reloc(&brw->batch, offset, address.bo,
67 address.offset + delta,
68 address.read_domains,
69 address.write_domain);
70 }
71
72 #define __gen_address_type struct brw_address
73 #define __gen_user_data struct brw_context
74
75 static uint64_t
76 __gen_combine_address(struct brw_context *brw, void *location,
77 struct brw_address address, uint32_t delta)
78 {
79 if (address.bo == NULL) {
80 return address.offset + delta;
81 } else {
82 return emit_reloc(brw, location, address, delta);
83 }
84 }
85
86 static inline struct brw_address
87 render_bo(struct brw_bo *bo, uint32_t offset)
88 {
89 return (struct brw_address) {
90 .bo = bo,
91 .offset = offset,
92 .read_domains = I915_GEM_DOMAIN_RENDER,
93 .write_domain = I915_GEM_DOMAIN_RENDER,
94 };
95 }
96
97 static inline struct brw_address
98 instruction_bo(struct brw_bo *bo, uint32_t offset)
99 {
100 return (struct brw_address) {
101 .bo = bo,
102 .offset = offset,
103 .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
104 .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
105 };
106 }
107
108 #include "genxml/genX_pack.h"
109
110 #define _brw_cmd_length(cmd) cmd ## _length
111 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
112 #define _brw_cmd_header(cmd) cmd ## _header
113 #define _brw_cmd_pack(cmd) cmd ## _pack
114
115 #define brw_batch_emit(brw, cmd, name) \
116 for (struct cmd name = { _brw_cmd_header(cmd) }, \
117 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
118 __builtin_expect(_dst != NULL, 1); \
119 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
120 _dst = NULL)
121
122 #define brw_batch_emitn(brw, cmd, n, ...) ({ \
123 uint32_t *_dw = emit_dwords(brw, n); \
124 struct cmd template = { \
125 _brw_cmd_header(cmd), \
126 .DWordLength = n - _brw_cmd_length_bias(cmd), \
127 __VA_ARGS__ \
128 }; \
129 _brw_cmd_pack(cmd)(brw, _dw, &template); \
130 _dw + 1; /* Array starts at dw[1] */ \
131 })
132
133 #define brw_state_emit(brw, cmd, align, offset, name) \
134 for (struct cmd name = { 0, }, \
135 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
136 align, offset); \
137 __builtin_expect(_dst != NULL, 1); \
138 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
139 _dst = NULL)
140
141 #if GEN_GEN >= 6
142 /**
143 * Determine the appropriate attribute override value to store into the
144 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
145 * override value contains two pieces of information: the location of the
146 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
147 * flag indicating whether to "swizzle" the attribute based on the direction
148 * the triangle is facing.
149 *
150 * If an attribute is "swizzled", then the given VUE location is used for
151 * front-facing triangles, and the VUE location that immediately follows is
152 * used for back-facing triangles. We use this to implement the mapping from
153 * gl_FrontColor/gl_BackColor to gl_Color.
154 *
155 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
156 * being instructed to begin reading attribute data. It can be set to a
157 * nonzero value to prevent the SF unit from wasting time reading elements of
158 * the VUE that are not needed by the fragment shader. It is measured in
159 * 256-bit increments.
160 */
161 static void
162 genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
163 const struct brw_vue_map *vue_map,
164 int urb_entry_read_offset, int fs_attr,
165 bool two_side_color, uint32_t *max_source_attr)
166 {
167 /* Find the VUE slot for this attribute. */
168 int slot = vue_map->varying_to_slot[fs_attr];
169
170 /* Viewport and Layer are stored in the VUE header. We need to override
171 * them to zero if earlier stages didn't write them, as GL requires that
172 * they read back as zero when not explicitly set.
173 */
174 if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
175 attr->ComponentOverrideX = true;
176 attr->ComponentOverrideW = true;
177 attr->ConstantSource = CONST_0000;
178
179 if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
180 attr->ComponentOverrideY = true;
181 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
182 attr->ComponentOverrideZ = true;
183
184 return;
185 }
186
187 /* If there was only a back color written but not front, use back
188 * as the color instead of undefined
189 */
190 if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
191 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
192 if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
193 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
194
195 if (slot == -1) {
196 /* This attribute does not exist in the VUE--that means that the vertex
197 * shader did not write to it. This means that either:
198 *
199 * (a) This attribute is a texture coordinate, and it is going to be
200 * replaced with point coordinates (as a consequence of a call to
201 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
202 * hardware will ignore whatever attribute override we supply.
203 *
204 * (b) This attribute is read by the fragment shader but not written by
205 * the vertex shader, so its value is undefined. Therefore the
206 * attribute override we supply doesn't matter.
207 *
208 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
209 * previous shader stage.
210 *
211 * Note that we don't have to worry about the cases where the attribute
212 * is gl_PointCoord or is undergoing point sprite coordinate
213 * replacement, because in those cases, this function isn't called.
214 *
215 * In case (c), we need to program the attribute overrides so that the
216 * primitive ID will be stored in this slot. In every other case, the
217 * attribute override we supply doesn't matter. So just go ahead and
218 * program primitive ID in every case.
219 */
220 attr->ComponentOverrideW = true;
221 attr->ComponentOverrideX = true;
222 attr->ComponentOverrideY = true;
223 attr->ComponentOverrideZ = true;
224 attr->ConstantSource = PRIM_ID;
225 return;
226 }
227
228 /* Compute the location of the attribute relative to urb_entry_read_offset.
229 * Each increment of urb_entry_read_offset represents a 256-bit value, so
230 * it counts for two 128-bit VUE slots.
231 */
232 int source_attr = slot - 2 * urb_entry_read_offset;
233 assert(source_attr >= 0 && source_attr < 32);
234
235 /* If we are doing two-sided color, and the VUE slot following this one
236 * represents a back-facing color, then we need to instruct the SF unit to
237 * do back-facing swizzling.
238 */
239 bool swizzling = two_side_color &&
240 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
241 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
242 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
243 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
244
245 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
246 if (*max_source_attr < source_attr + swizzling)
247 *max_source_attr = source_attr + swizzling;
248
249 attr->SourceAttribute = source_attr;
250 if (swizzling)
251 attr->SwizzleSelect = INPUTATTR_FACING;
252 }
253
254
255 static void
256 genX(calculate_attr_overrides)(const struct brw_context *brw,
257 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
258 uint32_t *point_sprite_enables,
259 uint32_t *urb_entry_read_length,
260 uint32_t *urb_entry_read_offset)
261 {
262 const struct gl_context *ctx = &brw->ctx;
263
264 /* _NEW_POINT */
265 const struct gl_point_attrib *point = &ctx->Point;
266
267 /* BRW_NEW_FS_PROG_DATA */
268 const struct brw_wm_prog_data *wm_prog_data =
269 brw_wm_prog_data(brw->wm.base.prog_data);
270 uint32_t max_source_attr = 0;
271
272 *point_sprite_enables = 0;
273
274 /* BRW_NEW_FRAGMENT_PROGRAM
275 *
276 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
277 * the full vertex header. Otherwise, we can program the SF to start
278 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
279 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
280 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
281 */
282
283 bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
284 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
285
286 *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
287
288 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
289 * description of dw10 Point Sprite Texture Coordinate Enable:
290 *
291 * "This field must be programmed to zero when non-point primitives
292 * are rendered."
293 *
294 * The SandyBridge PRM doesn't explicitly say that point sprite enables
295 * must be programmed to zero when rendering non-point primitives, but
296 * the IvyBridge PRM does, and if we don't, we get garbage.
297 *
298 * This is not required on Haswell, as the hardware ignores this state
299 * when drawing non-points -- although we do still need to be careful to
300 * correctly set the attr overrides.
301 *
302 * _NEW_POLYGON
303 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
304 */
305 bool drawing_points = brw_is_drawing_points(brw);
306
307 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
308 int input_index = wm_prog_data->urb_setup[attr];
309
310 if (input_index < 0)
311 continue;
312
313 /* _NEW_POINT */
314 bool point_sprite = false;
315 if (drawing_points) {
316 if (point->PointSprite &&
317 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
318 (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
319 point_sprite = true;
320 }
321
322 if (attr == VARYING_SLOT_PNTC)
323 point_sprite = true;
324
325 if (point_sprite)
326 *point_sprite_enables |= (1 << input_index);
327 }
328
329 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
330 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
331
332 if (!point_sprite) {
333 genX(get_attr_override)(&attribute,
334 &brw->vue_map_geom_out,
335 *urb_entry_read_offset, attr,
336 brw->ctx.VertexProgram._TwoSideEnabled,
337 &max_source_attr);
338 }
339
340 /* The hardware can only do the overrides on 16 overrides at a
341 * time, and the other up to 16 have to be lined up so that the
342 * input index = the output index. We'll need to do some
343 * tweaking to make sure that's the case.
344 */
345 if (input_index < 16)
346 attr_overrides[input_index] = attribute;
347 else
348 assert(attribute.SourceAttribute == input_index);
349 }
350
351 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
352 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
353 *
354 * "This field should be set to the minimum length required to read the
355 * maximum source attribute. The maximum source attribute is indicated
356 * by the maximum value of the enabled Attribute # Source Attribute if
357 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
358 * enable is not set.
359 * read_length = ceiling((max_source_attr + 1) / 2)
360 *
361 * [errata] Corruption/Hang possible if length programmed larger than
362 * recommended"
363 *
364 * Similar text exists for Ivy Bridge.
365 */
366 *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
367 }
368
369 /* ---------------------------------------------------------------------- */
370
371 static void
372 genX(upload_depth_stencil_state)(struct brw_context *brw)
373 {
374 struct gl_context *ctx = &brw->ctx;
375
376 /* _NEW_BUFFERS */
377 struct intel_renderbuffer *depth_irb =
378 intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
379
380 /* _NEW_DEPTH */
381 struct gl_depthbuffer_attrib *depth = &ctx->Depth;
382
383 /* _NEW_STENCIL */
384 struct gl_stencil_attrib *stencil = &ctx->Stencil;
385 const int b = stencil->_BackFace;
386
387 #if GEN_GEN >= 8
388 brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
389 #else
390 uint32_t ds_offset;
391 brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, wmds) {
392 #endif
393 if (depth->Test && depth_irb) {
394 wmds.DepthTestEnable = true;
395 wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
396 wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
397 }
398
399 if (stencil->_Enabled) {
400 wmds.StencilTestEnable = true;
401 wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
402 wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
403
404 wmds.StencilTestFunction =
405 intel_translate_compare_func(stencil->Function[0]);
406 wmds.StencilFailOp =
407 intel_translate_stencil_op(stencil->FailFunc[0]);
408 wmds.StencilPassDepthPassOp =
409 intel_translate_stencil_op(stencil->ZPassFunc[0]);
410 wmds.StencilPassDepthFailOp =
411 intel_translate_stencil_op(stencil->ZFailFunc[0]);
412
413 wmds.StencilBufferWriteEnable = stencil->_WriteEnabled;
414
415 if (stencil->_TestTwoSide) {
416 wmds.DoubleSidedStencilEnable = true;
417 wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
418 wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
419
420 wmds.BackfaceStencilTestFunction =
421 intel_translate_compare_func(stencil->Function[b]);
422 wmds.BackfaceStencilFailOp =
423 intel_translate_stencil_op(stencil->FailFunc[b]);
424 wmds.BackfaceStencilPassDepthPassOp =
425 intel_translate_stencil_op(stencil->ZPassFunc[b]);
426 wmds.BackfaceStencilPassDepthFailOp =
427 intel_translate_stencil_op(stencil->ZFailFunc[b]);
428 }
429
430 #if GEN_GEN >= 9
431 wmds.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
432 wmds.BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
433 #endif
434 }
435 }
436
437 #if GEN_GEN == 6
438 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
439 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
440 ptr.DEPTH_STENCIL_STATEChange = true;
441 }
442 #elif GEN_GEN == 7
443 brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
444 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
445 }
446 #endif
447 }
448
449 static const struct brw_tracked_state genX(depth_stencil_state) = {
450 .dirty = {
451 .mesa = _NEW_BUFFERS |
452 _NEW_DEPTH |
453 _NEW_STENCIL,
454 .brw = BRW_NEW_BLORP |
455 (GEN_GEN >= 8 ? BRW_NEW_CONTEXT
456 : BRW_NEW_BATCH |
457 BRW_NEW_STATE_BASE_ADDRESS),
458 },
459 .emit = genX(upload_depth_stencil_state),
460 };
461
462 /* ---------------------------------------------------------------------- */
463
464 static void
465 genX(upload_clip_state)(struct brw_context *brw)
466 {
467 struct gl_context *ctx = &brw->ctx;
468
469 /* _NEW_BUFFERS */
470 struct gl_framebuffer *fb = ctx->DrawBuffer;
471
472 /* BRW_NEW_FS_PROG_DATA */
473 struct brw_wm_prog_data *wm_prog_data =
474 brw_wm_prog_data(brw->wm.base.prog_data);
475
476 brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) {
477 clip.StatisticsEnable = !brw->meta_in_progress;
478
479 if (wm_prog_data->barycentric_interp_modes &
480 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
481 clip.NonPerspectiveBarycentricEnable = true;
482
483 #if GEN_GEN >= 7
484 clip.EarlyCullEnable = true;
485 #endif
486
487 #if GEN_GEN == 7
488 clip.FrontWinding = ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb);
489
490 if (ctx->Polygon.CullFlag) {
491 switch (ctx->Polygon.CullFaceMode) {
492 case GL_FRONT:
493 clip.CullMode = CULLMODE_FRONT;
494 break;
495 case GL_BACK:
496 clip.CullMode = CULLMODE_BACK;
497 break;
498 case GL_FRONT_AND_BACK:
499 clip.CullMode = CULLMODE_BOTH;
500 break;
501 default:
502 unreachable("Should not get here: invalid CullFlag");
503 }
504 } else {
505 clip.CullMode = CULLMODE_NONE;
506 }
507 #endif
508
509 #if GEN_GEN < 8
510 clip.UserClipDistanceCullTestEnableBitmask =
511 brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
512
513 clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
514 #endif
515
516 /* _NEW_LIGHT */
517 if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
518 clip.TriangleStripListProvokingVertexSelect = 0;
519 clip.TriangleFanProvokingVertexSelect = 1;
520 clip.LineStripListProvokingVertexSelect = 0;
521 } else {
522 clip.TriangleStripListProvokingVertexSelect = 2;
523 clip.TriangleFanProvokingVertexSelect = 2;
524 clip.LineStripListProvokingVertexSelect = 1;
525 }
526
527 /* _NEW_TRANSFORM */
528 clip.UserClipDistanceClipTestEnableBitmask =
529 ctx->Transform.ClipPlanesEnabled;
530
531 #if GEN_GEN >= 8
532 clip.ForceUserClipDistanceClipTestEnableBitmask = true;
533 #endif
534
535 if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
536 clip.APIMode = APIMODE_D3D;
537 else
538 clip.APIMode = APIMODE_OGL;
539
540 clip.GuardbandClipTestEnable = true;
541
542 /* BRW_NEW_VIEWPORT_COUNT */
543 const unsigned viewport_count = brw->clip.viewport_count;
544
545 if (ctx->RasterDiscard) {
546 clip.ClipMode = CLIPMODE_REJECT_ALL;
547 #if GEN_GEN == 6
548 perf_debug("Rasterizer discard is currently implemented via the "
549 "clipper; having the GS not write primitives would "
550 "likely be faster.\n");
551 #endif
552 } else {
553 clip.ClipMode = CLIPMODE_NORMAL;
554 }
555
556 clip.ClipEnable = brw->primitive != _3DPRIM_RECTLIST;
557
558 /* _NEW_POLYGON,
559 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
560 */
561 if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw))
562 clip.ViewportXYClipTestEnable = true;
563
564 clip.MinimumPointWidth = 0.125;
565 clip.MaximumPointWidth = 255.875;
566 clip.MaximumVPIndex = viewport_count - 1;
567 if (_mesa_geometric_layers(fb) == 0)
568 clip.ForceZeroRTAIndexEnable = true;
569 }
570 }
571
572 static const struct brw_tracked_state genX(clip_state) = {
573 .dirty = {
574 .mesa = _NEW_BUFFERS |
575 _NEW_LIGHT |
576 _NEW_POLYGON |
577 _NEW_TRANSFORM,
578 .brw = BRW_NEW_BLORP |
579 BRW_NEW_CONTEXT |
580 BRW_NEW_FS_PROG_DATA |
581 BRW_NEW_GS_PROG_DATA |
582 BRW_NEW_VS_PROG_DATA |
583 BRW_NEW_META_IN_PROGRESS |
584 BRW_NEW_PRIMITIVE |
585 BRW_NEW_RASTERIZER_DISCARD |
586 BRW_NEW_TES_PROG_DATA |
587 BRW_NEW_VIEWPORT_COUNT,
588 },
589 .emit = genX(upload_clip_state),
590 };
591
592 /* ---------------------------------------------------------------------- */
593
594 static void
595 genX(upload_sf)(struct brw_context *brw)
596 {
597 struct gl_context *ctx = &brw->ctx;
598 float point_size;
599
600 #if GEN_GEN <= 7
601 /* _NEW_BUFFERS */
602 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
603 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
604 #endif
605
606 brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
607 sf.StatisticsEnable = true;
608 sf.ViewportTransformEnable = brw->sf.viewport_transform_enable;
609
610 #if GEN_GEN == 7
611 /* _NEW_BUFFERS */
612 sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
613 #endif
614
615 #if GEN_GEN <= 7
616 /* _NEW_POLYGON */
617 sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
618 sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
619 sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
620 sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
621
622 switch (ctx->Polygon.FrontMode) {
623 case GL_FILL:
624 sf.FrontFaceFillMode = FILL_MODE_SOLID;
625 break;
626 case GL_LINE:
627 sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
628 break;
629 case GL_POINT:
630 sf.FrontFaceFillMode = FILL_MODE_POINT;
631 break;
632 default:
633 unreachable("not reached");
634 }
635
636 switch (ctx->Polygon.BackMode) {
637 case GL_FILL:
638 sf.BackFaceFillMode = FILL_MODE_SOLID;
639 break;
640 case GL_LINE:
641 sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
642 break;
643 case GL_POINT:
644 sf.BackFaceFillMode = FILL_MODE_POINT;
645 break;
646 default:
647 unreachable("not reached");
648 }
649
650 sf.ScissorRectangleEnable = true;
651
652 if (ctx->Polygon.CullFlag) {
653 switch (ctx->Polygon.CullFaceMode) {
654 case GL_FRONT:
655 sf.CullMode = CULLMODE_FRONT;
656 break;
657 case GL_BACK:
658 sf.CullMode = CULLMODE_BACK;
659 break;
660 case GL_FRONT_AND_BACK:
661 sf.CullMode = CULLMODE_BOTH;
662 break;
663 default:
664 unreachable("not reached");
665 }
666 } else {
667 sf.CullMode = CULLMODE_NONE;
668 }
669
670 #if GEN_IS_HASWELL
671 sf.LineStippleEnable = ctx->Line.StippleFlag;
672 #endif
673
674 if (multisampled_fbo && ctx->Multisample.Enabled)
675 sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
676
677 sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
678 sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
679 sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
680 #endif
681
682 /* _NEW_LINE */
683 sf.LineWidth = brw_get_line_width_float(brw);
684
685 if (ctx->Line.SmoothFlag) {
686 sf.LineEndCapAntialiasingRegionWidth = _10pixels;
687 #if GEN_GEN <= 7
688 sf.AntiAliasingEnable = true;
689 #endif
690 }
691
692 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
693 point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
694 /* Clamp to the hardware limits */
695 sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
696
697 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
698 if (use_state_point_size(brw))
699 sf.PointWidthSource = State;
700
701 #if GEN_GEN >= 8
702 /* _NEW_POINT | _NEW_MULTISAMPLE */
703 if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
704 !ctx->Point.PointSprite)
705 sf.SmoothPointEnable = true;
706 #endif
707
708 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
709
710 /* _NEW_LIGHT */
711 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
712 sf.TriangleStripListProvokingVertexSelect = 2;
713 sf.TriangleFanProvokingVertexSelect = 2;
714 sf.LineStripListProvokingVertexSelect = 1;
715 } else {
716 sf.TriangleFanProvokingVertexSelect = 1;
717 }
718
719 #if GEN_GEN == 6
720 /* BRW_NEW_FS_PROG_DATA */
721 const struct brw_wm_prog_data *wm_prog_data =
722 brw_wm_prog_data(brw->wm.base.prog_data);
723
724 sf.AttributeSwizzleEnable = true;
725 sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
726
727 /*
728 * Window coordinates in an FBO are inverted, which means point
729 * sprite origin must be inverted, too.
730 */
731 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
732 sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
733 } else {
734 sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
735 }
736
737 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
738 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
739 */
740 uint32_t urb_entry_read_length;
741 uint32_t urb_entry_read_offset;
742 uint32_t point_sprite_enables;
743 genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
744 &urb_entry_read_length,
745 &urb_entry_read_offset);
746 sf.VertexURBEntryReadLength = urb_entry_read_length;
747 sf.VertexURBEntryReadOffset = urb_entry_read_offset;
748 sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
749 sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
750 #endif
751 }
752 }
753
754 static const struct brw_tracked_state genX(sf_state) = {
755 .dirty = {
756 .mesa = _NEW_LIGHT |
757 _NEW_LINE |
758 _NEW_MULTISAMPLE |
759 _NEW_POINT |
760 _NEW_PROGRAM |
761 (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
762 .brw = BRW_NEW_BLORP |
763 BRW_NEW_CONTEXT |
764 BRW_NEW_VUE_MAP_GEOM_OUT |
765 (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
766 BRW_NEW_PRIMITIVE |
767 BRW_NEW_TES_PROG_DATA
768 : 0) |
769 (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA |
770 BRW_NEW_FRAGMENT_PROGRAM
771 : 0),
772 },
773 .emit = genX(upload_sf),
774 };
775
776 /* ---------------------------------------------------------------------- */
777
778 static void
779 genX(upload_wm)(struct brw_context *brw)
780 {
781 struct gl_context *ctx = &brw->ctx;
782
783 /* BRW_NEW_FS_PROG_DATA */
784 const struct brw_wm_prog_data *wm_prog_data =
785 brw_wm_prog_data(brw->wm.base.prog_data);
786
787 UNUSED bool writes_depth =
788 wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
789
790 #if GEN_GEN < 7
791 const struct brw_stage_state *stage_state = &brw->wm.base;
792 const struct gen_device_info *devinfo = &brw->screen->devinfo;
793
794 /* We can't fold this into gen6_upload_wm_push_constants(), because
795 * according to the SNB PRM, vol 2 part 1 section 7.2.2
796 * (3DSTATE_CONSTANT_PS [DevSNB]):
797 *
798 * "[DevSNB]: This packet must be followed by WM_STATE."
799 */
800 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) {
801 if (wm_prog_data->base.nr_params != 0) {
802 wmcp.Buffer0Valid = true;
803 /* Pointer to the WM constant buffer. Covered by the set of
804 * state flags from gen6_upload_wm_push_constants.
805 */
806 wmcp.PointertoPSConstantBuffer0 = stage_state->push_const_offset;
807 wmcp.PSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
808 }
809 }
810 #endif
811
812 brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
813 wm.StatisticsEnable = true;
814 wm.LineAntialiasingRegionWidth = _10pixels;
815 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
816
817 #if GEN_GEN < 7
818 if (wm_prog_data->base.use_alt_mode)
819 wm.FloatingPointMode = Alternate;
820
821 wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
822 wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
823 wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
824 wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
825 wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
826 wm.DispatchGRFStartRegisterForConstantSetupData0 =
827 wm_prog_data->base.dispatch_grf_start_reg;
828 wm.DispatchGRFStartRegisterForConstantSetupData2 =
829 wm_prog_data->dispatch_grf_start_reg_2;
830 wm.KernelStartPointer0 = stage_state->prog_offset;
831 wm.KernelStartPointer2 = stage_state->prog_offset +
832 wm_prog_data->prog_offset_2;
833 wm.DualSourceBlendEnable =
834 wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
835 ctx->Color.Blend[0]._UsesDualSrc;
836 wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
837 wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
838
839 /* From the SNB PRM, volume 2 part 1, page 281:
840 * "If the PS kernel does not need the Position XY Offsets
841 * to compute a Position XY value, then this field should be
842 * programmed to POSOFFSET_NONE."
843 *
844 * "SW Recommendation: If the PS kernel needs the Position Offsets
845 * to compute a Position XY value, this field should match Position
846 * ZW Interpolation Mode to ensure a consistent position.xyzw
847 * computation."
848 * We only require XY sample offsets. So, this recommendation doesn't
849 * look useful at the moment. We might need this in future.
850 */
851 if (wm_prog_data->uses_pos_offset)
852 wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
853 else
854 wm.PositionXYOffsetSelect = POSOFFSET_NONE;
855
856 if (wm_prog_data->base.total_scratch) {
857 wm.ScratchSpaceBasePointer =
858 render_bo(stage_state->scratch_bo,
859 ffs(stage_state->per_thread_scratch) - 11);
860 }
861
862 wm.PixelShaderComputedDepth = writes_depth;
863 #endif
864
865 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
866
867 /* _NEW_LINE */
868 wm.LineStippleEnable = ctx->Line.StippleFlag;
869
870 /* _NEW_POLYGON */
871 wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
872 wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
873
874 #if GEN_GEN < 8
875 /* _NEW_BUFFERS */
876 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
877
878 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
879 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
880 if (wm_prog_data->uses_kill ||
881 _mesa_is_alpha_test_enabled(ctx) ||
882 _mesa_is_alpha_to_coverage_enabled(ctx) ||
883 wm_prog_data->uses_omask) {
884 wm.PixelShaderKillsPixel = true;
885 }
886
887 /* _NEW_BUFFERS | _NEW_COLOR */
888 if (brw_color_buffer_write_enabled(brw) || writes_depth ||
889 wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
890 wm.ThreadDispatchEnable = true;
891 }
892 if (multisampled_fbo) {
893 /* _NEW_MULTISAMPLE */
894 if (ctx->Multisample.Enabled)
895 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
896 else
897 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
898
899 if (wm_prog_data->persample_dispatch)
900 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
901 else
902 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
903 } else {
904 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
905 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
906 }
907
908 #if GEN_GEN >= 7
909 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
910 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
911 #endif
912
913 /* The "UAV access enable" bits are unnecessary on HSW because they only
914 * seem to have an effect on the HW-assisted coherency mechanism which we
915 * don't need, and the rasterization-related UAV_ONLY flag and the
916 * DISPATCH_ENABLE bit can be set independently from it.
917 * C.f. gen8_upload_ps_extra().
918 *
919 * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
920 * _NEW_COLOR
921 */
922 #if GEN_IS_HASWELL
923 if (!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
924 wm_prog_data->has_side_effects)
925 wm.PSUAVonly = ON;
926 #endif
927 #endif
928
929 #if GEN_GEN >= 7
930 /* BRW_NEW_FS_PROG_DATA */
931 if (wm_prog_data->early_fragment_tests)
932 wm.EarlyDepthStencilControl = EDSC_PREPS;
933 else if (wm_prog_data->has_side_effects)
934 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
935 #endif
936 }
937 }
938
939 static const struct brw_tracked_state genX(wm_state) = {
940 .dirty = {
941 .mesa = _NEW_LINE |
942 _NEW_POLYGON |
943 (GEN_GEN < 8 ? _NEW_BUFFERS |
944 _NEW_COLOR |
945 _NEW_MULTISAMPLE :
946 0) |
947 (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
948 .brw = BRW_NEW_BLORP |
949 BRW_NEW_FS_PROG_DATA |
950 (GEN_GEN < 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
951 BRW_NEW_BATCH
952 : BRW_NEW_CONTEXT),
953 },
954 .emit = genX(upload_wm),
955 };
956
957 #endif
958
959 /* ---------------------------------------------------------------------- */
960
961 #if GEN_GEN >= 7
962 static void
963 genX(upload_sbe)(struct brw_context *brw)
964 {
965 struct gl_context *ctx = &brw->ctx;
966 /* BRW_NEW_FS_PROG_DATA */
967 const struct brw_wm_prog_data *wm_prog_data =
968 brw_wm_prog_data(brw->wm.base.prog_data);
969 #if GEN_GEN >= 8
970 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
971 #else
972 #define attr_overrides sbe.Attribute
973 #endif
974 uint32_t urb_entry_read_length;
975 uint32_t urb_entry_read_offset;
976 uint32_t point_sprite_enables;
977
978 brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) {
979 sbe.AttributeSwizzleEnable = true;
980 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
981
982 /* _NEW_BUFFERS */
983 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
984
985 /* _NEW_POINT
986 *
987 * Window coordinates in an FBO are inverted, which means point
988 * sprite origin must be inverted.
989 */
990 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
991 sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
992 else
993 sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
994
995 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
996 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
997 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
998 * BRW_NEW_VUE_MAP_GEOM_OUT
999 */
1000 genX(calculate_attr_overrides)(brw,
1001 attr_overrides,
1002 &point_sprite_enables,
1003 &urb_entry_read_length,
1004 &urb_entry_read_offset);
1005
1006 /* Typically, the URB entry read length and offset should be programmed
1007 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
1008 * stage which produces geometry. However, we don't know the proper
1009 * value until we call calculate_attr_overrides().
1010 *
1011 * To fit with our existing code, we override the inherited values and
1012 * specify it here directly, as we did on previous generations.
1013 */
1014 sbe.VertexURBEntryReadLength = urb_entry_read_length;
1015 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
1016 sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
1017 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
1018
1019 #if GEN_GEN >= 8
1020 sbe.ForceVertexURBEntryReadLength = true;
1021 sbe.ForceVertexURBEntryReadOffset = true;
1022 #endif
1023
1024 #if GEN_GEN >= 9
1025 /* prepare the active component dwords */
1026 int input_index = 0;
1027 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1028 if (!(brw->fragment_program->info.inputs_read &
1029 BITFIELD64_BIT(attr))) {
1030 continue;
1031 }
1032
1033 assert(input_index < 32);
1034
1035 sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
1036 ++input_index;
1037 }
1038 #endif
1039 }
1040
1041 #if GEN_GEN >= 8
1042 brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) {
1043 for (int i = 0; i < 16; i++)
1044 sbes.Attribute[i] = attr_overrides[i];
1045 }
1046 #endif
1047
1048 #undef attr_overrides
1049 }
1050
1051 static const struct brw_tracked_state genX(sbe_state) = {
1052 .dirty = {
1053 .mesa = _NEW_BUFFERS |
1054 _NEW_LIGHT |
1055 _NEW_POINT |
1056 _NEW_POLYGON |
1057 _NEW_PROGRAM,
1058 .brw = BRW_NEW_BLORP |
1059 BRW_NEW_CONTEXT |
1060 BRW_NEW_FRAGMENT_PROGRAM |
1061 BRW_NEW_FS_PROG_DATA |
1062 BRW_NEW_GS_PROG_DATA |
1063 BRW_NEW_TES_PROG_DATA |
1064 BRW_NEW_VUE_MAP_GEOM_OUT |
1065 (GEN_GEN == 7 ? BRW_NEW_PRIMITIVE
1066 : 0),
1067 },
1068 .emit = genX(upload_sbe),
1069 };
1070
1071 /* ---------------------------------------------------------------------- */
1072
1073 /**
1074 * Outputs the 3DSTATE_SO_DECL_LIST command.
1075 *
1076 * The data output is a series of 64-bit entries containing a SO_DECL per
1077 * stream. We only have one stream of rendering coming out of the GS unit, so
1078 * we only emit stream 0 (low 16 bits) SO_DECLs.
1079 */
1080 static void
1081 genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
1082 const struct brw_vue_map *vue_map)
1083 {
1084 struct gl_context *ctx = &brw->ctx;
1085 /* BRW_NEW_TRANSFORM_FEEDBACK */
1086 struct gl_transform_feedback_object *xfb_obj =
1087 ctx->TransformFeedback.CurrentObject;
1088 const struct gl_transform_feedback_info *linked_xfb_info =
1089 xfb_obj->program->sh.LinkedTransformFeedback;
1090 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
1091 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1092 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1093 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1094 int max_decls = 0;
1095 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
1096
1097 memset(so_decl, 0, sizeof(so_decl));
1098
1099 /* Construct the list of SO_DECLs to be emitted. The formatting of the
1100 * command feels strange -- each dword pair contains a SO_DECL per stream.
1101 */
1102 for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
1103 int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
1104 struct GENX(SO_DECL) decl = {0};
1105 int varying = linked_xfb_info->Outputs[i].OutputRegister;
1106 const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
1107 unsigned component_mask = (1 << components) - 1;
1108 unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
1109 unsigned decl_buffer_slot = buffer;
1110 assert(stream_id < MAX_VERTEX_STREAMS);
1111
1112 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
1113 * gl_Layer is stored in VARYING_SLOT_PSIZ.y
1114 * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
1115 */
1116 if (varying == VARYING_SLOT_PSIZ) {
1117 assert(components == 1);
1118 component_mask <<= 3;
1119 } else if (varying == VARYING_SLOT_LAYER) {
1120 assert(components == 1);
1121 component_mask <<= 1;
1122 } else if (varying == VARYING_SLOT_VIEWPORT) {
1123 assert(components == 1);
1124 component_mask <<= 2;
1125 } else {
1126 component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
1127 }
1128
1129 buffer_mask[stream_id] |= 1 << buffer;
1130
1131 decl.OutputBufferSlot = decl_buffer_slot;
1132 if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
1133 decl.RegisterIndex = vue_map->varying_to_slot[VARYING_SLOT_PSIZ];
1134 } else {
1135 assert(vue_map->varying_to_slot[varying] >= 0);
1136 decl.RegisterIndex = vue_map->varying_to_slot[varying];
1137 }
1138 decl.ComponentMask = component_mask;
1139
1140 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
1141 * array. Instead, it simply increments DstOffset for the following
1142 * input by the number of components that should be skipped.
1143 *
1144 * Our hardware is unusual in that it requires us to program SO_DECLs
1145 * for fake "hole" components, rather than simply taking the offset
1146 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1147 * program as many size = 4 holes as we can, then a final hole to
1148 * accommodate the final 1, 2, or 3 remaining.
1149 */
1150 int skip_components =
1151 linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
1152
1153 next_offset[buffer] += skip_components;
1154
1155 while (skip_components >= 4) {
1156 struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
1157 d->HoleFlag = 1;
1158 d->OutputBufferSlot = decl_buffer_slot;
1159 d->ComponentMask = 0xf;
1160 skip_components -= 4;
1161 }
1162
1163 if (skip_components > 0) {
1164 struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
1165 d->HoleFlag = 1;
1166 d->OutputBufferSlot = decl_buffer_slot;
1167 d->ComponentMask = (1 << skip_components) - 1;
1168 }
1169
1170 assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
1171
1172 next_offset[buffer] += components;
1173
1174 so_decl[stream_id][decls[stream_id]++] = decl;
1175
1176 if (decls[stream_id] > max_decls)
1177 max_decls = decls[stream_id];
1178 }
1179
1180 uint32_t *dw;
1181 dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
1182 .StreamtoBufferSelects0 = buffer_mask[0],
1183 .StreamtoBufferSelects1 = buffer_mask[1],
1184 .StreamtoBufferSelects2 = buffer_mask[2],
1185 .StreamtoBufferSelects3 = buffer_mask[3],
1186 .NumEntries0 = decls[0],
1187 .NumEntries1 = decls[1],
1188 .NumEntries2 = decls[2],
1189 .NumEntries3 = decls[3]);
1190
1191 for (int i = 0; i < max_decls; i++) {
1192 GENX(SO_DECL_ENTRY_pack)(
1193 brw, dw + 2 + i * 2,
1194 &(struct GENX(SO_DECL_ENTRY)) {
1195 .Stream0Decl = so_decl[0][i],
1196 .Stream1Decl = so_decl[1][i],
1197 .Stream2Decl = so_decl[2][i],
1198 .Stream3Decl = so_decl[3][i],
1199 });
1200 }
1201 }
1202
1203 static void
1204 genX(upload_3dstate_so_buffers)(struct brw_context *brw)
1205 {
1206 struct gl_context *ctx = &brw->ctx;
1207 /* BRW_NEW_TRANSFORM_FEEDBACK */
1208 struct gl_transform_feedback_object *xfb_obj =
1209 ctx->TransformFeedback.CurrentObject;
1210 #if GEN_GEN < 8
1211 const struct gl_transform_feedback_info *linked_xfb_info =
1212 xfb_obj->program->sh.LinkedTransformFeedback;
1213 #else
1214 struct brw_transform_feedback_object *brw_obj =
1215 (struct brw_transform_feedback_object *) xfb_obj;
1216 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
1217 #endif
1218
1219 /* Set up the up to 4 output buffers. These are the ranges defined in the
1220 * gl_transform_feedback_object.
1221 */
1222 for (int i = 0; i < 4; i++) {
1223 struct intel_buffer_object *bufferobj =
1224 intel_buffer_object(xfb_obj->Buffers[i]);
1225
1226 if (!bufferobj) {
1227 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
1228 sob.SOBufferIndex = i;
1229 }
1230 continue;
1231 }
1232
1233 uint32_t start = xfb_obj->Offset[i];
1234 assert(start % 4 == 0);
1235 uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
1236 struct brw_bo *bo =
1237 intel_bufferobj_buffer(brw, bufferobj, start, end - start);
1238 assert(end <= bo->size);
1239
1240 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
1241 sob.SOBufferIndex = i;
1242
1243 sob.SurfaceBaseAddress = render_bo(bo, start);
1244 #if GEN_GEN < 8
1245 sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
1246 sob.SurfaceEndAddress = render_bo(bo, end);
1247 #else
1248 sob.SOBufferEnable = true;
1249 sob.StreamOffsetWriteEnable = true;
1250 sob.StreamOutputBufferOffsetAddressEnable = true;
1251 sob.SOBufferMOCS = mocs_wb;
1252
1253 sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
1254 sob.StreamOutputBufferOffsetAddress =
1255 instruction_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
1256
1257 if (brw_obj->zero_offsets) {
1258 /* Zero out the offset and write that to offset_bo */
1259 sob.StreamOffset = 0;
1260 } else {
1261 /* Use offset_bo as the "Stream Offset." */
1262 sob.StreamOffset = 0xFFFFFFFF;
1263 }
1264 #endif
1265 }
1266 }
1267
1268 #if GEN_GEN >= 8
1269 brw_obj->zero_offsets = false;
1270 #endif
1271 }
1272
1273 static inline bool
1274 query_active(struct gl_query_object *q)
1275 {
1276 return q && q->Active;
1277 }
1278
1279 static void
1280 genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
1281 const struct brw_vue_map *vue_map)
1282 {
1283 struct gl_context *ctx = &brw->ctx;
1284 /* BRW_NEW_TRANSFORM_FEEDBACK */
1285 struct gl_transform_feedback_object *xfb_obj =
1286 ctx->TransformFeedback.CurrentObject;
1287
1288 brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
1289 if (active) {
1290 int urb_entry_read_offset = 0;
1291 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
1292 urb_entry_read_offset;
1293
1294 sos.SOFunctionEnable = true;
1295 sos.SOStatisticsEnable = true;
1296
1297 /* BRW_NEW_RASTERIZER_DISCARD */
1298 if (ctx->RasterDiscard) {
1299 if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
1300 sos.RenderingDisable = true;
1301 } else {
1302 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
1303 "query active relies on the clipper.");
1304 }
1305 }
1306
1307 /* _NEW_LIGHT */
1308 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
1309 sos.ReorderMode = TRAILING;
1310
1311 #if GEN_GEN < 8
1312 sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
1313 sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
1314 sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
1315 sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
1316 #else
1317 const struct gl_transform_feedback_info *linked_xfb_info =
1318 xfb_obj->program->sh.LinkedTransformFeedback;
1319 /* Set buffer pitches; 0 means unbound. */
1320 if (xfb_obj->Buffers[0])
1321 sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
1322 if (xfb_obj->Buffers[1])
1323 sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
1324 if (xfb_obj->Buffers[2])
1325 sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
1326 if (xfb_obj->Buffers[3])
1327 sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
1328 #endif
1329
1330 /* We always read the whole vertex. This could be reduced at some
1331 * point by reading less and offsetting the register index in the
1332 * SO_DECLs.
1333 */
1334 sos.Stream0VertexReadOffset = urb_entry_read_offset;
1335 sos.Stream0VertexReadLength = urb_entry_read_length - 1;
1336 sos.Stream1VertexReadOffset = urb_entry_read_offset;
1337 sos.Stream1VertexReadLength = urb_entry_read_length - 1;
1338 sos.Stream2VertexReadOffset = urb_entry_read_offset;
1339 sos.Stream2VertexReadLength = urb_entry_read_length - 1;
1340 sos.Stream3VertexReadOffset = urb_entry_read_offset;
1341 sos.Stream3VertexReadLength = urb_entry_read_length - 1;
1342 }
1343 }
1344 }
1345
1346 static void
1347 genX(upload_sol)(struct brw_context *brw)
1348 {
1349 struct gl_context *ctx = &brw->ctx;
1350 /* BRW_NEW_TRANSFORM_FEEDBACK */
1351 bool active = _mesa_is_xfb_active_and_unpaused(ctx);
1352
1353 if (active) {
1354 genX(upload_3dstate_so_buffers)(brw);
1355
1356 /* BRW_NEW_VUE_MAP_GEOM_OUT */
1357 genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
1358 }
1359
1360 /* Finally, set up the SOL stage. This command must always follow updates to
1361 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
1362 * MMIO register updates (current performed by the kernel at each batch
1363 * emit).
1364 */
1365 genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
1366 }
1367
1368 static const struct brw_tracked_state genX(sol_state) = {
1369 .dirty = {
1370 .mesa = _NEW_LIGHT,
1371 .brw = BRW_NEW_BATCH |
1372 BRW_NEW_BLORP |
1373 BRW_NEW_RASTERIZER_DISCARD |
1374 BRW_NEW_VUE_MAP_GEOM_OUT |
1375 BRW_NEW_TRANSFORM_FEEDBACK,
1376 },
1377 .emit = genX(upload_sol),
1378 };
1379
1380 /* ---------------------------------------------------------------------- */
1381
1382 static void
1383 genX(upload_ps)(struct brw_context *brw)
1384 {
1385 UNUSED const struct gl_context *ctx = &brw->ctx;
1386 UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
1387
1388 /* BRW_NEW_FS_PROG_DATA */
1389 const struct brw_wm_prog_data *prog_data =
1390 brw_wm_prog_data(brw->wm.base.prog_data);
1391 const struct brw_stage_state *stage_state = &brw->wm.base;
1392
1393 #if GEN_GEN < 8
1394 #endif
1395
1396 brw_batch_emit(brw, GENX(3DSTATE_PS), ps) {
1397 /* Initialize the execution mask with VMask. Otherwise, derivatives are
1398 * incorrect for subspans where some of the pixels are unlit. We believe
1399 * the bit just didn't take effect in previous generations.
1400 */
1401 ps.VectorMaskEnable = GEN_GEN >= 8;
1402
1403 ps.SamplerCount =
1404 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
1405
1406 /* BRW_NEW_FS_PROG_DATA */
1407 ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
1408
1409 if (prog_data->base.use_alt_mode)
1410 ps.FloatingPointMode = Alternate;
1411
1412 /* Haswell requires the sample mask to be set in this packet as well as
1413 * in 3DSTATE_SAMPLE_MASK; the values should match.
1414 */
1415
1416 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
1417 #if GEN_IS_HASWELL
1418 ps.SampleMask = gen6_determine_sample_mask(brw);
1419 #endif
1420
1421 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
1422 * it implicitly scales for different GT levels (which have some # of
1423 * PSDs).
1424 *
1425 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
1426 */
1427 #if GEN_GEN >= 9
1428 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
1429 #elif GEN_GEN >= 8
1430 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
1431 #else
1432 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1433 #endif
1434
1435 if (prog_data->base.nr_params > 0)
1436 ps.PushConstantEnable = true;
1437
1438 #if GEN_GEN < 8
1439 /* From the IVB PRM, volume 2 part 1, page 287:
1440 * "This bit is inserted in the PS payload header and made available to
1441 * the DataPort (either via the message header or via header bypass) to
1442 * indicate that oMask data (one or two phases) is included in Render
1443 * Target Write messages. If present, the oMask data is used to mask off
1444 * samples."
1445 */
1446 ps.oMaskPresenttoRenderTarget = prog_data->uses_omask;
1447
1448 /* The hardware wedges if you have this bit set but don't turn on any
1449 * dual source blend factors.
1450 *
1451 * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
1452 */
1453 ps.DualSourceBlendEnable = prog_data->dual_src_blend &&
1454 (ctx->Color.BlendEnabled & 1) &&
1455 ctx->Color.Blend[0]._UsesDualSrc;
1456
1457 /* BRW_NEW_FS_PROG_DATA */
1458 ps.AttributeEnable = (prog_data->num_varying_inputs != 0);
1459 #endif
1460
1461 /* From the documentation for this packet:
1462 * "If the PS kernel does not need the Position XY Offsets to
1463 * compute a Position Value, then this field should be programmed
1464 * to POSOFFSET_NONE."
1465 *
1466 * "SW Recommendation: If the PS kernel needs the Position Offsets
1467 * to compute a Position XY value, this field should match Position
1468 * ZW Interpolation Mode to ensure a consistent position.xyzw
1469 * computation."
1470 *
1471 * We only require XY sample offsets. So, this recommendation doesn't
1472 * look useful at the moment. We might need this in future.
1473 */
1474 if (prog_data->uses_pos_offset)
1475 ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
1476 else
1477 ps.PositionXYOffsetSelect = POSOFFSET_NONE;
1478
1479 ps.RenderTargetFastClearEnable = brw->wm.fast_clear_op;
1480 ps._8PixelDispatchEnable = prog_data->dispatch_8;
1481 ps._16PixelDispatchEnable = prog_data->dispatch_16;
1482 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1483 prog_data->base.dispatch_grf_start_reg;
1484 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1485 prog_data->dispatch_grf_start_reg_2;
1486
1487 ps.KernelStartPointer0 = stage_state->prog_offset;
1488 ps.KernelStartPointer2 = stage_state->prog_offset +
1489 prog_data->prog_offset_2;
1490
1491 if (prog_data->base.total_scratch) {
1492 ps.ScratchSpaceBasePointer =
1493 render_bo(stage_state->scratch_bo,
1494 ffs(stage_state->per_thread_scratch) - 11);
1495 }
1496 }
1497 }
1498
1499 static const struct brw_tracked_state genX(ps_state) = {
1500 .dirty = {
1501 .mesa = _NEW_MULTISAMPLE |
1502 (GEN_GEN < 8 ? _NEW_BUFFERS |
1503 _NEW_COLOR
1504 : 0),
1505 .brw = BRW_NEW_BATCH |
1506 BRW_NEW_BLORP |
1507 BRW_NEW_FS_PROG_DATA,
1508 },
1509 .emit = genX(upload_ps),
1510 };
1511
1512 #endif
1513
1514 /* ---------------------------------------------------------------------- */
1515
1516 #if GEN_GEN >= 8
1517 static void
1518 genX(upload_raster)(struct brw_context *brw)
1519 {
1520 struct gl_context *ctx = &brw->ctx;
1521
1522 /* _NEW_BUFFERS */
1523 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
1524
1525 /* _NEW_POLYGON */
1526 struct gl_polygon_attrib *polygon = &ctx->Polygon;
1527
1528 /* _NEW_POINT */
1529 struct gl_point_attrib *point = &ctx->Point;
1530
1531 brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
1532 if (polygon->_FrontBit == render_to_fbo)
1533 raster.FrontWinding = CounterClockwise;
1534
1535 if (polygon->CullFlag) {
1536 switch (polygon->CullFaceMode) {
1537 case GL_FRONT:
1538 raster.CullMode = CULLMODE_FRONT;
1539 break;
1540 case GL_BACK:
1541 raster.CullMode = CULLMODE_BACK;
1542 break;
1543 case GL_FRONT_AND_BACK:
1544 raster.CullMode = CULLMODE_BOTH;
1545 break;
1546 default:
1547 unreachable("not reached");
1548 }
1549 } else {
1550 raster.CullMode = CULLMODE_NONE;
1551 }
1552
1553 point->SmoothFlag = raster.SmoothPointEnable;
1554
1555 raster.DXMultisampleRasterizationEnable =
1556 _mesa_is_multisample_enabled(ctx);
1557
1558 raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill;
1559 raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine;
1560 raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint;
1561
1562 switch (polygon->FrontMode) {
1563 case GL_FILL:
1564 raster.FrontFaceFillMode = FILL_MODE_SOLID;
1565 break;
1566 case GL_LINE:
1567 raster.FrontFaceFillMode = FILL_MODE_WIREFRAME;
1568 break;
1569 case GL_POINT:
1570 raster.FrontFaceFillMode = FILL_MODE_POINT;
1571 break;
1572 default:
1573 unreachable("not reached");
1574 }
1575
1576 switch (polygon->BackMode) {
1577 case GL_FILL:
1578 raster.BackFaceFillMode = FILL_MODE_SOLID;
1579 break;
1580 case GL_LINE:
1581 raster.BackFaceFillMode = FILL_MODE_WIREFRAME;
1582 break;
1583 case GL_POINT:
1584 raster.BackFaceFillMode = FILL_MODE_POINT;
1585 break;
1586 default:
1587 unreachable("not reached");
1588 }
1589
1590 /* _NEW_LINE */
1591 raster.AntialiasingEnable = ctx->Line.SmoothFlag;
1592
1593 /* _NEW_SCISSOR */
1594 raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
1595
1596 /* _NEW_TRANSFORM */
1597 if (!ctx->Transform.DepthClamp) {
1598 #if GEN_GEN >= 9
1599 raster.ViewportZFarClipTestEnable = true;
1600 raster.ViewportZNearClipTestEnable = true;
1601 #else
1602 raster.ViewportZClipTestEnable = true;
1603 #endif
1604 }
1605
1606 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
1607 #if GEN_GEN >= 9
1608 raster.ConservativeRasterizationEnable =
1609 ctx->IntelConservativeRasterization;
1610 #endif
1611
1612 raster.GlobalDepthOffsetClamp = polygon->OffsetClamp;
1613 raster.GlobalDepthOffsetScale = polygon->OffsetFactor;
1614
1615 raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2;
1616 }
1617 }
1618
1619 static const struct brw_tracked_state genX(raster_state) = {
1620 .dirty = {
1621 .mesa = _NEW_BUFFERS |
1622 _NEW_LINE |
1623 _NEW_MULTISAMPLE |
1624 _NEW_POINT |
1625 _NEW_POLYGON |
1626 _NEW_SCISSOR |
1627 _NEW_TRANSFORM,
1628 .brw = BRW_NEW_BLORP |
1629 BRW_NEW_CONTEXT |
1630 BRW_NEW_CONSERVATIVE_RASTERIZATION,
1631 },
1632 .emit = genX(upload_raster),
1633 };
1634
1635 /* ---------------------------------------------------------------------- */
1636
1637 static void
1638 genX(upload_ps_extra)(struct brw_context *brw)
1639 {
1640 UNUSED struct gl_context *ctx = &brw->ctx;
1641
1642 const struct brw_wm_prog_data *prog_data =
1643 brw_wm_prog_data(brw->wm.base.prog_data);
1644
1645 brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
1646 psx.PixelShaderValid = true;
1647 psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
1648 psx.PixelShaderKillsPixel = prog_data->uses_kill;
1649 psx.AttributeEnable = prog_data->num_varying_inputs != 0;
1650 psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
1651 psx.PixelShaderUsesSourceW = prog_data->uses_src_w;
1652 psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
1653
1654 /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
1655 if (prog_data->uses_sample_mask) {
1656 #if GEN_GEN >= 9
1657 if (prog_data->post_depth_coverage)
1658 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
1659 else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
1660 psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
1661 else
1662 psx.InputCoverageMaskState = ICMS_NORMAL;
1663 #else
1664 psx.PixelShaderUsesInputCoverageMask = true;
1665 #endif
1666 }
1667
1668 psx.oMaskPresenttoRenderTarget = prog_data->uses_omask;
1669 #if GEN_GEN >= 9
1670 psx.PixelShaderPullsBary = prog_data->pulls_bary;
1671 psx.PixelShaderComputesStencil = prog_data->computed_stencil;
1672 #endif
1673
1674 /* The stricter cross-primitive coherency guarantees that the hardware
1675 * gives us with the "Accesses UAV" bit set for at least one shader stage
1676 * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
1677 * are redundant within the current image, atomic counter and SSBO GL
1678 * APIs, which all have very loose ordering and coherency requirements
1679 * and generally rely on the application to insert explicit barriers when
1680 * a shader invocation is expected to see the memory writes performed by
1681 * the invocations of some previous primitive. Regardless of the value
1682 * of "UAV coherency required", the "Accesses UAV" bits will implicitly
1683 * cause an in most cases useless DC flush when the lowermost stage with
1684 * the bit set finishes execution.
1685 *
1686 * It would be nice to disable it, but in some cases we can't because on
1687 * Gen8+ it also has an influence on rasterization via the PS UAV-only
1688 * signal (which could be set independently from the coherency mechanism
1689 * in the 3DSTATE_WM command on Gen7), and because in some cases it will
1690 * determine whether the hardware skips execution of the fragment shader
1691 * or not via the ThreadDispatchEnable signal. However if we know that
1692 * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
1693 * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
1694 * difference so we may just disable it here.
1695 *
1696 * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
1697 * take into account KillPixels when no depth or stencil writes are
1698 * enabled. In order for occlusion queries to work correctly with no
1699 * attachments, we need to force-enable here.
1700 *
1701 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
1702 * _NEW_COLOR
1703 */
1704 if ((prog_data->has_side_effects || prog_data->uses_kill) &&
1705 !brw_color_buffer_write_enabled(brw))
1706 psx.PixelShaderHasUAV = true;
1707 }
1708 }
1709
1710 const struct brw_tracked_state genX(ps_extra) = {
1711 .dirty = {
1712 .mesa = _NEW_BUFFERS | _NEW_COLOR,
1713 .brw = BRW_NEW_BLORP |
1714 BRW_NEW_CONTEXT |
1715 BRW_NEW_FRAGMENT_PROGRAM |
1716 BRW_NEW_FS_PROG_DATA |
1717 BRW_NEW_CONSERVATIVE_RASTERIZATION,
1718 },
1719 .emit = genX(upload_ps_extra),
1720 };
1721 #endif
1722
1723 /* ---------------------------------------------------------------------- */
1724
1725 void
1726 genX(init_atoms)(struct brw_context *brw)
1727 {
1728 #if GEN_GEN < 6
1729 static const struct brw_tracked_state *render_atoms[] =
1730 {
1731 /* Once all the programs are done, we know how large urb entry
1732 * sizes need to be and can decide if we need to change the urb
1733 * layout.
1734 */
1735 &brw_curbe_offsets,
1736 &brw_recalculate_urb_fence,
1737
1738 &brw_cc_vp,
1739 &brw_cc_unit,
1740
1741 /* Surface state setup. Must come before the VS/WM unit. The binding
1742 * table upload must be last.
1743 */
1744 &brw_vs_pull_constants,
1745 &brw_wm_pull_constants,
1746 &brw_renderbuffer_surfaces,
1747 &brw_renderbuffer_read_surfaces,
1748 &brw_texture_surfaces,
1749 &brw_vs_binding_table,
1750 &brw_wm_binding_table,
1751
1752 &brw_fs_samplers,
1753 &brw_vs_samplers,
1754
1755 /* These set up state for brw_psp_urb_cbs */
1756 &brw_wm_unit,
1757 &brw_sf_vp,
1758 &brw_sf_unit,
1759 &brw_vs_unit, /* always required, enabled or not */
1760 &brw_clip_unit,
1761 &brw_gs_unit,
1762
1763 /* Command packets:
1764 */
1765 &brw_invariant_state,
1766
1767 &brw_binding_table_pointers,
1768 &brw_blend_constant_color,
1769
1770 &brw_depthbuffer,
1771
1772 &brw_polygon_stipple,
1773 &brw_polygon_stipple_offset,
1774
1775 &brw_line_stipple,
1776
1777 &brw_psp_urb_cbs,
1778
1779 &brw_drawing_rect,
1780 &brw_indices, /* must come before brw_vertices */
1781 &brw_index_buffer,
1782 &brw_vertices,
1783
1784 &brw_constant_buffer
1785 };
1786 #elif GEN_GEN == 6
1787 static const struct brw_tracked_state *render_atoms[] =
1788 {
1789 &gen6_sf_and_clip_viewports,
1790
1791 /* Command packets: */
1792
1793 &brw_cc_vp,
1794 &gen6_viewport_state, /* must do after *_vp stages */
1795
1796 &gen6_urb,
1797 &gen6_blend_state, /* must do before cc unit */
1798 &gen6_color_calc_state, /* must do before cc unit */
1799 &gen6_depth_stencil_state, /* must do before cc unit */
1800
1801 &gen6_vs_push_constants, /* Before vs_state */
1802 &gen6_gs_push_constants, /* Before gs_state */
1803 &gen6_wm_push_constants, /* Before wm_state */
1804
1805 /* Surface state setup. Must come before the VS/WM unit. The binding
1806 * table upload must be last.
1807 */
1808 &brw_vs_pull_constants,
1809 &brw_vs_ubo_surfaces,
1810 &brw_gs_pull_constants,
1811 &brw_gs_ubo_surfaces,
1812 &brw_wm_pull_constants,
1813 &brw_wm_ubo_surfaces,
1814 &gen6_renderbuffer_surfaces,
1815 &brw_renderbuffer_read_surfaces,
1816 &brw_texture_surfaces,
1817 &gen6_sol_surface,
1818 &brw_vs_binding_table,
1819 &gen6_gs_binding_table,
1820 &brw_wm_binding_table,
1821
1822 &brw_fs_samplers,
1823 &brw_vs_samplers,
1824 &brw_gs_samplers,
1825 &gen6_sampler_state,
1826 &gen6_multisample_state,
1827
1828 &gen6_vs_state,
1829 &gen6_gs_state,
1830 &genX(clip_state),
1831 &genX(sf_state),
1832 &genX(wm_state),
1833
1834 &gen6_scissor_state,
1835
1836 &gen6_binding_table_pointers,
1837
1838 &brw_depthbuffer,
1839
1840 &brw_polygon_stipple,
1841 &brw_polygon_stipple_offset,
1842
1843 &brw_line_stipple,
1844
1845 &brw_drawing_rect,
1846
1847 &brw_indices, /* must come before brw_vertices */
1848 &brw_index_buffer,
1849 &brw_vertices,
1850 };
1851 #elif GEN_GEN == 7
1852 static const struct brw_tracked_state *render_atoms[] =
1853 {
1854 /* Command packets: */
1855
1856 &brw_cc_vp,
1857 &gen7_sf_clip_viewport,
1858
1859 &gen7_l3_state,
1860 &gen7_push_constant_space,
1861 &gen7_urb,
1862 &gen6_blend_state, /* must do before cc unit */
1863 &gen6_color_calc_state, /* must do before cc unit */
1864 &genX(depth_stencil_state), /* must do before cc unit */
1865
1866 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
1867 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
1868 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
1869 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
1870 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
1871
1872 &gen6_vs_push_constants, /* Before vs_state */
1873 &gen7_tcs_push_constants,
1874 &gen7_tes_push_constants,
1875 &gen6_gs_push_constants, /* Before gs_state */
1876 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
1877
1878 /* Surface state setup. Must come before the VS/WM unit. The binding
1879 * table upload must be last.
1880 */
1881 &brw_vs_pull_constants,
1882 &brw_vs_ubo_surfaces,
1883 &brw_vs_abo_surfaces,
1884 &brw_tcs_pull_constants,
1885 &brw_tcs_ubo_surfaces,
1886 &brw_tcs_abo_surfaces,
1887 &brw_tes_pull_constants,
1888 &brw_tes_ubo_surfaces,
1889 &brw_tes_abo_surfaces,
1890 &brw_gs_pull_constants,
1891 &brw_gs_ubo_surfaces,
1892 &brw_gs_abo_surfaces,
1893 &brw_wm_pull_constants,
1894 &brw_wm_ubo_surfaces,
1895 &brw_wm_abo_surfaces,
1896 &gen6_renderbuffer_surfaces,
1897 &brw_renderbuffer_read_surfaces,
1898 &brw_texture_surfaces,
1899 &brw_vs_binding_table,
1900 &brw_tcs_binding_table,
1901 &brw_tes_binding_table,
1902 &brw_gs_binding_table,
1903 &brw_wm_binding_table,
1904
1905 &brw_fs_samplers,
1906 &brw_vs_samplers,
1907 &brw_tcs_samplers,
1908 &brw_tes_samplers,
1909 &brw_gs_samplers,
1910 &gen6_multisample_state,
1911
1912 &gen7_vs_state,
1913 &gen7_hs_state,
1914 &gen7_te_state,
1915 &gen7_ds_state,
1916 &gen7_gs_state,
1917 &genX(sol_state),
1918 &genX(clip_state),
1919 &genX(sbe_state),
1920 &genX(sf_state),
1921 &genX(wm_state),
1922 &genX(ps_state),
1923
1924 &gen6_scissor_state,
1925
1926 &gen7_depthbuffer,
1927
1928 &brw_polygon_stipple,
1929 &brw_polygon_stipple_offset,
1930
1931 &brw_line_stipple,
1932
1933 &brw_drawing_rect,
1934
1935 &brw_indices, /* must come before brw_vertices */
1936 &brw_index_buffer,
1937 &brw_vertices,
1938
1939 &haswell_cut_index,
1940 };
1941 #elif GEN_GEN >= 8
1942 static const struct brw_tracked_state *render_atoms[] =
1943 {
1944 &brw_cc_vp,
1945 &gen8_sf_clip_viewport,
1946
1947 &gen7_l3_state,
1948 &gen7_push_constant_space,
1949 &gen7_urb,
1950 &gen8_blend_state,
1951 &gen6_color_calc_state,
1952
1953 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
1954 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
1955 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
1956 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
1957 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
1958
1959 &gen6_vs_push_constants, /* Before vs_state */
1960 &gen7_tcs_push_constants,
1961 &gen7_tes_push_constants,
1962 &gen6_gs_push_constants, /* Before gs_state */
1963 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
1964
1965 /* Surface state setup. Must come before the VS/WM unit. The binding
1966 * table upload must be last.
1967 */
1968 &brw_vs_pull_constants,
1969 &brw_vs_ubo_surfaces,
1970 &brw_vs_abo_surfaces,
1971 &brw_tcs_pull_constants,
1972 &brw_tcs_ubo_surfaces,
1973 &brw_tcs_abo_surfaces,
1974 &brw_tes_pull_constants,
1975 &brw_tes_ubo_surfaces,
1976 &brw_tes_abo_surfaces,
1977 &brw_gs_pull_constants,
1978 &brw_gs_ubo_surfaces,
1979 &brw_gs_abo_surfaces,
1980 &brw_wm_pull_constants,
1981 &brw_wm_ubo_surfaces,
1982 &brw_wm_abo_surfaces,
1983 &gen6_renderbuffer_surfaces,
1984 &brw_renderbuffer_read_surfaces,
1985 &brw_texture_surfaces,
1986 &brw_vs_binding_table,
1987 &brw_tcs_binding_table,
1988 &brw_tes_binding_table,
1989 &brw_gs_binding_table,
1990 &brw_wm_binding_table,
1991
1992 &brw_fs_samplers,
1993 &brw_vs_samplers,
1994 &brw_tcs_samplers,
1995 &brw_tes_samplers,
1996 &brw_gs_samplers,
1997 &gen8_multisample_state,
1998
1999 &gen8_vs_state,
2000 &gen8_hs_state,
2001 &gen7_te_state,
2002 &gen8_ds_state,
2003 &gen8_gs_state,
2004 &genX(sol_state),
2005 &genX(clip_state),
2006 &genX(raster_state),
2007 &genX(sbe_state),
2008 &genX(sf_state),
2009 &gen8_ps_blend,
2010 &genX(ps_extra),
2011 &genX(ps_state),
2012 &genX(depth_stencil_state),
2013 &genX(wm_state),
2014
2015 &gen6_scissor_state,
2016
2017 &gen7_depthbuffer,
2018
2019 &brw_polygon_stipple,
2020 &brw_polygon_stipple_offset,
2021
2022 &brw_line_stipple,
2023
2024 &brw_drawing_rect,
2025
2026 &gen8_vf_topology,
2027
2028 &brw_indices,
2029 &gen8_index_buffer,
2030 &gen8_vertices,
2031
2032 &haswell_cut_index,
2033 &gen8_pma_fix,
2034 };
2035 #endif
2036
2037 STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms));
2038 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
2039 render_atoms, ARRAY_SIZE(render_atoms));
2040
2041 #if GEN_GEN >= 7
2042 static const struct brw_tracked_state *compute_atoms[] =
2043 {
2044 &gen7_l3_state,
2045 &brw_cs_image_surfaces,
2046 &gen7_cs_push_constants,
2047 &brw_cs_pull_constants,
2048 &brw_cs_ubo_surfaces,
2049 &brw_cs_abo_surfaces,
2050 &brw_cs_texture_surfaces,
2051 &brw_cs_work_groups_surface,
2052 &brw_cs_samplers,
2053 &brw_cs_state,
2054 };
2055
2056 STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
2057 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
2058 compute_atoms, ARRAY_SIZE(compute_atoms));
2059 #endif
2060 }