i965: Port gen7+ 3DSTATE_SOL to genxml.
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "common/gen_device_info.h"
27 #include "genxml/gen_macros.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31 #include "brw_util.h"
32
33 #include "intel_batchbuffer.h"
34 #include "intel_buffer_objects.h"
35 #include "intel_fbo.h"
36
37 #include "main/fbobject.h"
38 #include "main/framebuffer.h"
39 #include "main/stencil.h"
40 #include "main/transformfeedback.h"
41
42 UNUSED static void *
43 emit_dwords(struct brw_context *brw, unsigned n)
44 {
45 intel_batchbuffer_begin(brw, n, RENDER_RING);
46 uint32_t *map = brw->batch.map_next;
47 brw->batch.map_next += n;
48 intel_batchbuffer_advance(brw);
49 return map;
50 }
51
52 struct brw_address {
53 struct brw_bo *bo;
54 uint32_t read_domains;
55 uint32_t write_domain;
56 uint32_t offset;
57 };
58
59 static uint64_t
60 emit_reloc(struct brw_context *brw,
61 void *location, struct brw_address address, uint32_t delta)
62 {
63 uint32_t offset = (char *) location - (char *) brw->batch.map;
64
65 return brw_emit_reloc(&brw->batch, offset, address.bo,
66 address.offset + delta,
67 address.read_domains,
68 address.write_domain);
69 }
70
71 #define __gen_address_type struct brw_address
72 #define __gen_user_data struct brw_context
73
74 static uint64_t
75 __gen_combine_address(struct brw_context *brw, void *location,
76 struct brw_address address, uint32_t delta)
77 {
78 if (address.bo == NULL) {
79 return address.offset + delta;
80 } else {
81 return emit_reloc(brw, location, address, delta);
82 }
83 }
84
85 static inline struct brw_address
86 render_bo(struct brw_bo *bo, uint32_t offset)
87 {
88 return (struct brw_address) {
89 .bo = bo,
90 .offset = offset,
91 .read_domains = I915_GEM_DOMAIN_RENDER,
92 .write_domain = I915_GEM_DOMAIN_RENDER,
93 };
94 }
95
96 static inline struct brw_address
97 instruction_bo(struct brw_bo *bo, uint32_t offset)
98 {
99 return (struct brw_address) {
100 .bo = bo,
101 .offset = offset,
102 .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
103 .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
104 };
105 }
106
107 #include "genxml/genX_pack.h"
108
109 #define _brw_cmd_length(cmd) cmd ## _length
110 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
111 #define _brw_cmd_header(cmd) cmd ## _header
112 #define _brw_cmd_pack(cmd) cmd ## _pack
113
114 #define brw_batch_emit(brw, cmd, name) \
115 for (struct cmd name = { _brw_cmd_header(cmd) }, \
116 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
117 __builtin_expect(_dst != NULL, 1); \
118 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
119 _dst = NULL)
120
121 #define brw_batch_emitn(brw, cmd, n, ...) ({ \
122 uint32_t *_dw = emit_dwords(brw, n); \
123 struct cmd template = { \
124 _brw_cmd_header(cmd), \
125 .DWordLength = n - _brw_cmd_length_bias(cmd), \
126 __VA_ARGS__ \
127 }; \
128 _brw_cmd_pack(cmd)(brw, _dw, &template); \
129 _dw + 1; /* Array starts at dw[1] */ \
130 })
131
132 #define brw_state_emit(brw, cmd, align, offset, name) \
133 for (struct cmd name = { 0, }, \
134 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
135 align, offset); \
136 __builtin_expect(_dst != NULL, 1); \
137 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
138 _dst = NULL)
139
140 #if GEN_GEN >= 6
141 /**
142 * Determine the appropriate attribute override value to store into the
143 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
144 * override value contains two pieces of information: the location of the
145 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
146 * flag indicating whether to "swizzle" the attribute based on the direction
147 * the triangle is facing.
148 *
149 * If an attribute is "swizzled", then the given VUE location is used for
150 * front-facing triangles, and the VUE location that immediately follows is
151 * used for back-facing triangles. We use this to implement the mapping from
152 * gl_FrontColor/gl_BackColor to gl_Color.
153 *
154 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
155 * being instructed to begin reading attribute data. It can be set to a
156 * nonzero value to prevent the SF unit from wasting time reading elements of
157 * the VUE that are not needed by the fragment shader. It is measured in
158 * 256-bit increments.
159 */
160 static void
161 genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
162 const struct brw_vue_map *vue_map,
163 int urb_entry_read_offset, int fs_attr,
164 bool two_side_color, uint32_t *max_source_attr)
165 {
166 /* Find the VUE slot for this attribute. */
167 int slot = vue_map->varying_to_slot[fs_attr];
168
169 /* Viewport and Layer are stored in the VUE header. We need to override
170 * them to zero if earlier stages didn't write them, as GL requires that
171 * they read back as zero when not explicitly set.
172 */
173 if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
174 attr->ComponentOverrideX = true;
175 attr->ComponentOverrideW = true;
176 attr->ConstantSource = CONST_0000;
177
178 if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
179 attr->ComponentOverrideY = true;
180 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
181 attr->ComponentOverrideZ = true;
182
183 return;
184 }
185
186 /* If there was only a back color written but not front, use back
187 * as the color instead of undefined
188 */
189 if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
190 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
191 if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
192 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
193
194 if (slot == -1) {
195 /* This attribute does not exist in the VUE--that means that the vertex
196 * shader did not write to it. This means that either:
197 *
198 * (a) This attribute is a texture coordinate, and it is going to be
199 * replaced with point coordinates (as a consequence of a call to
200 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
201 * hardware will ignore whatever attribute override we supply.
202 *
203 * (b) This attribute is read by the fragment shader but not written by
204 * the vertex shader, so its value is undefined. Therefore the
205 * attribute override we supply doesn't matter.
206 *
207 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
208 * previous shader stage.
209 *
210 * Note that we don't have to worry about the cases where the attribute
211 * is gl_PointCoord or is undergoing point sprite coordinate
212 * replacement, because in those cases, this function isn't called.
213 *
214 * In case (c), we need to program the attribute overrides so that the
215 * primitive ID will be stored in this slot. In every other case, the
216 * attribute override we supply doesn't matter. So just go ahead and
217 * program primitive ID in every case.
218 */
219 attr->ComponentOverrideW = true;
220 attr->ComponentOverrideX = true;
221 attr->ComponentOverrideY = true;
222 attr->ComponentOverrideZ = true;
223 attr->ConstantSource = PRIM_ID;
224 return;
225 }
226
227 /* Compute the location of the attribute relative to urb_entry_read_offset.
228 * Each increment of urb_entry_read_offset represents a 256-bit value, so
229 * it counts for two 128-bit VUE slots.
230 */
231 int source_attr = slot - 2 * urb_entry_read_offset;
232 assert(source_attr >= 0 && source_attr < 32);
233
234 /* If we are doing two-sided color, and the VUE slot following this one
235 * represents a back-facing color, then we need to instruct the SF unit to
236 * do back-facing swizzling.
237 */
238 bool swizzling = two_side_color &&
239 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
240 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
241 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
242 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
243
244 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
245 if (*max_source_attr < source_attr + swizzling)
246 *max_source_attr = source_attr + swizzling;
247
248 attr->SourceAttribute = source_attr;
249 if (swizzling)
250 attr->SwizzleSelect = INPUTATTR_FACING;
251 }
252
253
254 static void
255 genX(calculate_attr_overrides)(const struct brw_context *brw,
256 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
257 uint32_t *point_sprite_enables,
258 uint32_t *urb_entry_read_length,
259 uint32_t *urb_entry_read_offset)
260 {
261 const struct gl_context *ctx = &brw->ctx;
262
263 /* _NEW_POINT */
264 const struct gl_point_attrib *point = &ctx->Point;
265
266 /* BRW_NEW_FS_PROG_DATA */
267 const struct brw_wm_prog_data *wm_prog_data =
268 brw_wm_prog_data(brw->wm.base.prog_data);
269 uint32_t max_source_attr = 0;
270
271 *point_sprite_enables = 0;
272
273 /* BRW_NEW_FRAGMENT_PROGRAM
274 *
275 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
276 * the full vertex header. Otherwise, we can program the SF to start
277 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
278 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
279 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
280 */
281
282 bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
283 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
284
285 *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
286
287 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
288 * description of dw10 Point Sprite Texture Coordinate Enable:
289 *
290 * "This field must be programmed to zero when non-point primitives
291 * are rendered."
292 *
293 * The SandyBridge PRM doesn't explicitly say that point sprite enables
294 * must be programmed to zero when rendering non-point primitives, but
295 * the IvyBridge PRM does, and if we don't, we get garbage.
296 *
297 * This is not required on Haswell, as the hardware ignores this state
298 * when drawing non-points -- although we do still need to be careful to
299 * correctly set the attr overrides.
300 *
301 * _NEW_POLYGON
302 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
303 */
304 bool drawing_points = brw_is_drawing_points(brw);
305
306 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
307 int input_index = wm_prog_data->urb_setup[attr];
308
309 if (input_index < 0)
310 continue;
311
312 /* _NEW_POINT */
313 bool point_sprite = false;
314 if (drawing_points) {
315 if (point->PointSprite &&
316 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
317 (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
318 point_sprite = true;
319 }
320
321 if (attr == VARYING_SLOT_PNTC)
322 point_sprite = true;
323
324 if (point_sprite)
325 *point_sprite_enables |= (1 << input_index);
326 }
327
328 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
329 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
330
331 if (!point_sprite) {
332 genX(get_attr_override)(&attribute,
333 &brw->vue_map_geom_out,
334 *urb_entry_read_offset, attr,
335 brw->ctx.VertexProgram._TwoSideEnabled,
336 &max_source_attr);
337 }
338
339 /* The hardware can only do the overrides on 16 overrides at a
340 * time, and the other up to 16 have to be lined up so that the
341 * input index = the output index. We'll need to do some
342 * tweaking to make sure that's the case.
343 */
344 if (input_index < 16)
345 attr_overrides[input_index] = attribute;
346 else
347 assert(attribute.SourceAttribute == input_index);
348 }
349
350 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
351 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
352 *
353 * "This field should be set to the minimum length required to read the
354 * maximum source attribute. The maximum source attribute is indicated
355 * by the maximum value of the enabled Attribute # Source Attribute if
356 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
357 * enable is not set.
358 * read_length = ceiling((max_source_attr + 1) / 2)
359 *
360 * [errata] Corruption/Hang possible if length programmed larger than
361 * recommended"
362 *
363 * Similar text exists for Ivy Bridge.
364 */
365 *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
366 }
367
368 /* ---------------------------------------------------------------------- */
369
370 static void
371 genX(upload_depth_stencil_state)(struct brw_context *brw)
372 {
373 struct gl_context *ctx = &brw->ctx;
374
375 /* _NEW_BUFFERS */
376 struct intel_renderbuffer *depth_irb =
377 intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
378
379 /* _NEW_DEPTH */
380 struct gl_depthbuffer_attrib *depth = &ctx->Depth;
381
382 /* _NEW_STENCIL */
383 struct gl_stencil_attrib *stencil = &ctx->Stencil;
384 const int b = stencil->_BackFace;
385
386 #if GEN_GEN >= 8
387 brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
388 #else
389 uint32_t ds_offset;
390 brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, wmds) {
391 #endif
392 if (depth->Test && depth_irb) {
393 wmds.DepthTestEnable = true;
394 wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
395 wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
396 }
397
398 if (stencil->_Enabled) {
399 wmds.StencilTestEnable = true;
400 wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
401 wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
402
403 wmds.StencilTestFunction =
404 intel_translate_compare_func(stencil->Function[0]);
405 wmds.StencilFailOp =
406 intel_translate_stencil_op(stencil->FailFunc[0]);
407 wmds.StencilPassDepthPassOp =
408 intel_translate_stencil_op(stencil->ZPassFunc[0]);
409 wmds.StencilPassDepthFailOp =
410 intel_translate_stencil_op(stencil->ZFailFunc[0]);
411
412 wmds.StencilBufferWriteEnable = stencil->_WriteEnabled;
413
414 if (stencil->_TestTwoSide) {
415 wmds.DoubleSidedStencilEnable = true;
416 wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
417 wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
418
419 wmds.BackfaceStencilTestFunction =
420 intel_translate_compare_func(stencil->Function[b]);
421 wmds.BackfaceStencilFailOp =
422 intel_translate_stencil_op(stencil->FailFunc[b]);
423 wmds.BackfaceStencilPassDepthPassOp =
424 intel_translate_stencil_op(stencil->ZPassFunc[b]);
425 wmds.BackfaceStencilPassDepthFailOp =
426 intel_translate_stencil_op(stencil->ZFailFunc[b]);
427 }
428
429 #if GEN_GEN >= 9
430 wmds.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
431 wmds.BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
432 #endif
433 }
434 }
435
436 #if GEN_GEN == 6
437 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
438 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
439 ptr.DEPTH_STENCIL_STATEChange = true;
440 }
441 #elif GEN_GEN == 7
442 brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
443 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
444 }
445 #endif
446 }
447
448 static const struct brw_tracked_state genX(depth_stencil_state) = {
449 .dirty = {
450 .mesa = _NEW_BUFFERS |
451 _NEW_DEPTH |
452 _NEW_STENCIL,
453 .brw = BRW_NEW_BLORP |
454 (GEN_GEN >= 8 ? BRW_NEW_CONTEXT
455 : BRW_NEW_BATCH |
456 BRW_NEW_STATE_BASE_ADDRESS),
457 },
458 .emit = genX(upload_depth_stencil_state),
459 };
460
461 /* ---------------------------------------------------------------------- */
462
463 static void
464 genX(upload_clip_state)(struct brw_context *brw)
465 {
466 struct gl_context *ctx = &brw->ctx;
467
468 /* _NEW_BUFFERS */
469 struct gl_framebuffer *fb = ctx->DrawBuffer;
470
471 /* BRW_NEW_FS_PROG_DATA */
472 struct brw_wm_prog_data *wm_prog_data =
473 brw_wm_prog_data(brw->wm.base.prog_data);
474
475 brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) {
476 clip.StatisticsEnable = !brw->meta_in_progress;
477
478 if (wm_prog_data->barycentric_interp_modes &
479 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
480 clip.NonPerspectiveBarycentricEnable = true;
481
482 #if GEN_GEN >= 7
483 clip.EarlyCullEnable = true;
484 #endif
485
486 #if GEN_GEN == 7
487 clip.FrontWinding = ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb);
488
489 if (ctx->Polygon.CullFlag) {
490 switch (ctx->Polygon.CullFaceMode) {
491 case GL_FRONT:
492 clip.CullMode = CULLMODE_FRONT;
493 break;
494 case GL_BACK:
495 clip.CullMode = CULLMODE_BACK;
496 break;
497 case GL_FRONT_AND_BACK:
498 clip.CullMode = CULLMODE_BOTH;
499 break;
500 default:
501 unreachable("Should not get here: invalid CullFlag");
502 }
503 } else {
504 clip.CullMode = CULLMODE_NONE;
505 }
506 #endif
507
508 #if GEN_GEN < 8
509 clip.UserClipDistanceCullTestEnableBitmask =
510 brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
511
512 clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
513 #endif
514
515 /* _NEW_LIGHT */
516 if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
517 clip.TriangleStripListProvokingVertexSelect = 0;
518 clip.TriangleFanProvokingVertexSelect = 1;
519 clip.LineStripListProvokingVertexSelect = 0;
520 } else {
521 clip.TriangleStripListProvokingVertexSelect = 2;
522 clip.TriangleFanProvokingVertexSelect = 2;
523 clip.LineStripListProvokingVertexSelect = 1;
524 }
525
526 /* _NEW_TRANSFORM */
527 clip.UserClipDistanceClipTestEnableBitmask =
528 ctx->Transform.ClipPlanesEnabled;
529
530 #if GEN_GEN >= 8
531 clip.ForceUserClipDistanceClipTestEnableBitmask = true;
532 #endif
533
534 if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
535 clip.APIMode = APIMODE_D3D;
536 else
537 clip.APIMode = APIMODE_OGL;
538
539 clip.GuardbandClipTestEnable = true;
540
541 /* BRW_NEW_VIEWPORT_COUNT */
542 const unsigned viewport_count = brw->clip.viewport_count;
543
544 if (ctx->RasterDiscard) {
545 clip.ClipMode = CLIPMODE_REJECT_ALL;
546 #if GEN_GEN == 6
547 perf_debug("Rasterizer discard is currently implemented via the "
548 "clipper; having the GS not write primitives would "
549 "likely be faster.\n");
550 #endif
551 } else {
552 clip.ClipMode = CLIPMODE_NORMAL;
553 }
554
555 clip.ClipEnable = brw->primitive != _3DPRIM_RECTLIST;
556
557 /* _NEW_POLYGON,
558 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
559 */
560 if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw))
561 clip.ViewportXYClipTestEnable = true;
562
563 clip.MinimumPointWidth = 0.125;
564 clip.MaximumPointWidth = 255.875;
565 clip.MaximumVPIndex = viewport_count - 1;
566 if (_mesa_geometric_layers(fb) == 0)
567 clip.ForceZeroRTAIndexEnable = true;
568 }
569 }
570
571 static const struct brw_tracked_state genX(clip_state) = {
572 .dirty = {
573 .mesa = _NEW_BUFFERS |
574 _NEW_LIGHT |
575 _NEW_POLYGON |
576 _NEW_TRANSFORM,
577 .brw = BRW_NEW_BLORP |
578 BRW_NEW_CONTEXT |
579 BRW_NEW_FS_PROG_DATA |
580 BRW_NEW_GS_PROG_DATA |
581 BRW_NEW_VS_PROG_DATA |
582 BRW_NEW_META_IN_PROGRESS |
583 BRW_NEW_PRIMITIVE |
584 BRW_NEW_RASTERIZER_DISCARD |
585 BRW_NEW_TES_PROG_DATA |
586 BRW_NEW_VIEWPORT_COUNT,
587 },
588 .emit = genX(upload_clip_state),
589 };
590
591 /* ---------------------------------------------------------------------- */
592
593 static void
594 genX(upload_sf)(struct brw_context *brw)
595 {
596 struct gl_context *ctx = &brw->ctx;
597 float point_size;
598
599 #if GEN_GEN <= 7
600 /* _NEW_BUFFERS */
601 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
602 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
603 #endif
604
605 brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
606 sf.StatisticsEnable = true;
607 sf.ViewportTransformEnable = brw->sf.viewport_transform_enable;
608
609 #if GEN_GEN == 7
610 /* _NEW_BUFFERS */
611 sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
612 #endif
613
614 #if GEN_GEN <= 7
615 /* _NEW_POLYGON */
616 sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
617 sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
618 sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
619 sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
620
621 switch (ctx->Polygon.FrontMode) {
622 case GL_FILL:
623 sf.FrontFaceFillMode = FILL_MODE_SOLID;
624 break;
625 case GL_LINE:
626 sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
627 break;
628 case GL_POINT:
629 sf.FrontFaceFillMode = FILL_MODE_POINT;
630 break;
631 default:
632 unreachable("not reached");
633 }
634
635 switch (ctx->Polygon.BackMode) {
636 case GL_FILL:
637 sf.BackFaceFillMode = FILL_MODE_SOLID;
638 break;
639 case GL_LINE:
640 sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
641 break;
642 case GL_POINT:
643 sf.BackFaceFillMode = FILL_MODE_POINT;
644 break;
645 default:
646 unreachable("not reached");
647 }
648
649 sf.ScissorRectangleEnable = true;
650
651 if (ctx->Polygon.CullFlag) {
652 switch (ctx->Polygon.CullFaceMode) {
653 case GL_FRONT:
654 sf.CullMode = CULLMODE_FRONT;
655 break;
656 case GL_BACK:
657 sf.CullMode = CULLMODE_BACK;
658 break;
659 case GL_FRONT_AND_BACK:
660 sf.CullMode = CULLMODE_BOTH;
661 break;
662 default:
663 unreachable("not reached");
664 }
665 } else {
666 sf.CullMode = CULLMODE_NONE;
667 }
668
669 #if GEN_IS_HASWELL
670 sf.LineStippleEnable = ctx->Line.StippleFlag;
671 #endif
672
673 if (multisampled_fbo && ctx->Multisample.Enabled)
674 sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
675
676 sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
677 sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
678 sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
679 #endif
680
681 /* _NEW_LINE */
682 sf.LineWidth = brw_get_line_width_float(brw);
683
684 if (ctx->Line.SmoothFlag) {
685 sf.LineEndCapAntialiasingRegionWidth = _10pixels;
686 #if GEN_GEN <= 7
687 sf.AntiAliasingEnable = true;
688 #endif
689 }
690
691 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
692 point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
693 /* Clamp to the hardware limits */
694 sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
695
696 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
697 if (use_state_point_size(brw))
698 sf.PointWidthSource = State;
699
700 #if GEN_GEN >= 8
701 /* _NEW_POINT | _NEW_MULTISAMPLE */
702 if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
703 !ctx->Point.PointSprite)
704 sf.SmoothPointEnable = true;
705 #endif
706
707 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
708
709 /* _NEW_LIGHT */
710 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
711 sf.TriangleStripListProvokingVertexSelect = 2;
712 sf.TriangleFanProvokingVertexSelect = 2;
713 sf.LineStripListProvokingVertexSelect = 1;
714 } else {
715 sf.TriangleFanProvokingVertexSelect = 1;
716 }
717
718 #if GEN_GEN == 6
719 /* BRW_NEW_FS_PROG_DATA */
720 const struct brw_wm_prog_data *wm_prog_data =
721 brw_wm_prog_data(brw->wm.base.prog_data);
722
723 sf.AttributeSwizzleEnable = true;
724 sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
725
726 /*
727 * Window coordinates in an FBO are inverted, which means point
728 * sprite origin must be inverted, too.
729 */
730 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
731 sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
732 } else {
733 sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
734 }
735
736 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
737 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
738 */
739 uint32_t urb_entry_read_length;
740 uint32_t urb_entry_read_offset;
741 uint32_t point_sprite_enables;
742 genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
743 &urb_entry_read_length,
744 &urb_entry_read_offset);
745 sf.VertexURBEntryReadLength = urb_entry_read_length;
746 sf.VertexURBEntryReadOffset = urb_entry_read_offset;
747 sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
748 sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
749 #endif
750 }
751 }
752
753 static const struct brw_tracked_state genX(sf_state) = {
754 .dirty = {
755 .mesa = _NEW_LIGHT |
756 _NEW_LINE |
757 _NEW_MULTISAMPLE |
758 _NEW_POINT |
759 _NEW_PROGRAM |
760 (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
761 .brw = BRW_NEW_BLORP |
762 BRW_NEW_CONTEXT |
763 BRW_NEW_VUE_MAP_GEOM_OUT |
764 (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
765 BRW_NEW_PRIMITIVE |
766 BRW_NEW_TES_PROG_DATA
767 : 0) |
768 (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA |
769 BRW_NEW_FRAGMENT_PROGRAM
770 : 0),
771 },
772 .emit = genX(upload_sf),
773 };
774
775 #endif
776
777 /* ---------------------------------------------------------------------- */
778
779 #if GEN_GEN >= 7
780 static void
781 genX(upload_sbe)(struct brw_context *brw)
782 {
783 struct gl_context *ctx = &brw->ctx;
784 /* BRW_NEW_FS_PROG_DATA */
785 const struct brw_wm_prog_data *wm_prog_data =
786 brw_wm_prog_data(brw->wm.base.prog_data);
787 #if GEN_GEN >= 8
788 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
789 #else
790 #define attr_overrides sbe.Attribute
791 #endif
792 uint32_t urb_entry_read_length;
793 uint32_t urb_entry_read_offset;
794 uint32_t point_sprite_enables;
795
796 brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) {
797 sbe.AttributeSwizzleEnable = true;
798 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
799
800 /* _NEW_BUFFERS */
801 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
802
803 /* _NEW_POINT
804 *
805 * Window coordinates in an FBO are inverted, which means point
806 * sprite origin must be inverted.
807 */
808 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
809 sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
810 else
811 sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
812
813 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
814 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
815 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
816 * BRW_NEW_VUE_MAP_GEOM_OUT
817 */
818 genX(calculate_attr_overrides)(brw,
819 attr_overrides,
820 &point_sprite_enables,
821 &urb_entry_read_length,
822 &urb_entry_read_offset);
823
824 /* Typically, the URB entry read length and offset should be programmed
825 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
826 * stage which produces geometry. However, we don't know the proper
827 * value until we call calculate_attr_overrides().
828 *
829 * To fit with our existing code, we override the inherited values and
830 * specify it here directly, as we did on previous generations.
831 */
832 sbe.VertexURBEntryReadLength = urb_entry_read_length;
833 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
834 sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
835 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
836
837 #if GEN_GEN >= 8
838 sbe.ForceVertexURBEntryReadLength = true;
839 sbe.ForceVertexURBEntryReadOffset = true;
840 #endif
841
842 #if GEN_GEN >= 9
843 /* prepare the active component dwords */
844 int input_index = 0;
845 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
846 if (!(brw->fragment_program->info.inputs_read &
847 BITFIELD64_BIT(attr))) {
848 continue;
849 }
850
851 assert(input_index < 32);
852
853 sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
854 ++input_index;
855 }
856 #endif
857 }
858
859 #if GEN_GEN >= 8
860 brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) {
861 for (int i = 0; i < 16; i++)
862 sbes.Attribute[i] = attr_overrides[i];
863 }
864 #endif
865
866 #undef attr_overrides
867 }
868
869 static const struct brw_tracked_state genX(sbe_state) = {
870 .dirty = {
871 .mesa = _NEW_BUFFERS |
872 _NEW_LIGHT |
873 _NEW_POINT |
874 _NEW_POLYGON |
875 _NEW_PROGRAM,
876 .brw = BRW_NEW_BLORP |
877 BRW_NEW_CONTEXT |
878 BRW_NEW_FRAGMENT_PROGRAM |
879 BRW_NEW_FS_PROG_DATA |
880 BRW_NEW_GS_PROG_DATA |
881 BRW_NEW_TES_PROG_DATA |
882 BRW_NEW_VUE_MAP_GEOM_OUT |
883 (GEN_GEN == 7 ? BRW_NEW_PRIMITIVE
884 : 0),
885 },
886 .emit = genX(upload_sbe),
887 };
888
889 /* ---------------------------------------------------------------------- */
890
891 /**
892 * Outputs the 3DSTATE_SO_DECL_LIST command.
893 *
894 * The data output is a series of 64-bit entries containing a SO_DECL per
895 * stream. We only have one stream of rendering coming out of the GS unit, so
896 * we only emit stream 0 (low 16 bits) SO_DECLs.
897 */
898 static void
899 genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
900 const struct brw_vue_map *vue_map)
901 {
902 struct gl_context *ctx = &brw->ctx;
903 /* BRW_NEW_TRANSFORM_FEEDBACK */
904 struct gl_transform_feedback_object *xfb_obj =
905 ctx->TransformFeedback.CurrentObject;
906 const struct gl_transform_feedback_info *linked_xfb_info =
907 xfb_obj->program->sh.LinkedTransformFeedback;
908 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
909 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
910 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
911 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
912 int max_decls = 0;
913 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
914
915 memset(so_decl, 0, sizeof(so_decl));
916
917 /* Construct the list of SO_DECLs to be emitted. The formatting of the
918 * command feels strange -- each dword pair contains a SO_DECL per stream.
919 */
920 for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
921 int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
922 struct GENX(SO_DECL) decl = {0};
923 int varying = linked_xfb_info->Outputs[i].OutputRegister;
924 const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
925 unsigned component_mask = (1 << components) - 1;
926 unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
927 unsigned decl_buffer_slot = buffer;
928 assert(stream_id < MAX_VERTEX_STREAMS);
929
930 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
931 * gl_Layer is stored in VARYING_SLOT_PSIZ.y
932 * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
933 */
934 if (varying == VARYING_SLOT_PSIZ) {
935 assert(components == 1);
936 component_mask <<= 3;
937 } else if (varying == VARYING_SLOT_LAYER) {
938 assert(components == 1);
939 component_mask <<= 1;
940 } else if (varying == VARYING_SLOT_VIEWPORT) {
941 assert(components == 1);
942 component_mask <<= 2;
943 } else {
944 component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
945 }
946
947 buffer_mask[stream_id] |= 1 << buffer;
948
949 decl.OutputBufferSlot = decl_buffer_slot;
950 if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
951 decl.RegisterIndex = vue_map->varying_to_slot[VARYING_SLOT_PSIZ];
952 } else {
953 assert(vue_map->varying_to_slot[varying] >= 0);
954 decl.RegisterIndex = vue_map->varying_to_slot[varying];
955 }
956 decl.ComponentMask = component_mask;
957
958 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
959 * array. Instead, it simply increments DstOffset for the following
960 * input by the number of components that should be skipped.
961 *
962 * Our hardware is unusual in that it requires us to program SO_DECLs
963 * for fake "hole" components, rather than simply taking the offset
964 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
965 * program as many size = 4 holes as we can, then a final hole to
966 * accommodate the final 1, 2, or 3 remaining.
967 */
968 int skip_components =
969 linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
970
971 next_offset[buffer] += skip_components;
972
973 while (skip_components >= 4) {
974 struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
975 d->HoleFlag = 1;
976 d->OutputBufferSlot = decl_buffer_slot;
977 d->ComponentMask = 0xf;
978 skip_components -= 4;
979 }
980
981 if (skip_components > 0) {
982 struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
983 d->HoleFlag = 1;
984 d->OutputBufferSlot = decl_buffer_slot;
985 d->ComponentMask = (1 << skip_components) - 1;
986 }
987
988 assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
989
990 next_offset[buffer] += components;
991
992 so_decl[stream_id][decls[stream_id]++] = decl;
993
994 if (decls[stream_id] > max_decls)
995 max_decls = decls[stream_id];
996 }
997
998 uint32_t *dw;
999 dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
1000 .StreamtoBufferSelects0 = buffer_mask[0],
1001 .StreamtoBufferSelects1 = buffer_mask[1],
1002 .StreamtoBufferSelects2 = buffer_mask[2],
1003 .StreamtoBufferSelects3 = buffer_mask[3],
1004 .NumEntries0 = decls[0],
1005 .NumEntries1 = decls[1],
1006 .NumEntries2 = decls[2],
1007 .NumEntries3 = decls[3]);
1008
1009 for (int i = 0; i < max_decls; i++) {
1010 GENX(SO_DECL_ENTRY_pack)(
1011 brw, dw + 2 + i * 2,
1012 &(struct GENX(SO_DECL_ENTRY)) {
1013 .Stream0Decl = so_decl[0][i],
1014 .Stream1Decl = so_decl[1][i],
1015 .Stream2Decl = so_decl[2][i],
1016 .Stream3Decl = so_decl[3][i],
1017 });
1018 }
1019 }
1020
1021 static void
1022 genX(upload_3dstate_so_buffers)(struct brw_context *brw)
1023 {
1024 struct gl_context *ctx = &brw->ctx;
1025 /* BRW_NEW_TRANSFORM_FEEDBACK */
1026 struct gl_transform_feedback_object *xfb_obj =
1027 ctx->TransformFeedback.CurrentObject;
1028 #if GEN_GEN < 8
1029 const struct gl_transform_feedback_info *linked_xfb_info =
1030 xfb_obj->program->sh.LinkedTransformFeedback;
1031 #else
1032 struct brw_transform_feedback_object *brw_obj =
1033 (struct brw_transform_feedback_object *) xfb_obj;
1034 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
1035 #endif
1036
1037 /* Set up the up to 4 output buffers. These are the ranges defined in the
1038 * gl_transform_feedback_object.
1039 */
1040 for (int i = 0; i < 4; i++) {
1041 struct intel_buffer_object *bufferobj =
1042 intel_buffer_object(xfb_obj->Buffers[i]);
1043
1044 if (!bufferobj) {
1045 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
1046 sob.SOBufferIndex = i;
1047 }
1048 continue;
1049 }
1050
1051 uint32_t start = xfb_obj->Offset[i];
1052 assert(start % 4 == 0);
1053 uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
1054 struct brw_bo *bo =
1055 intel_bufferobj_buffer(brw, bufferobj, start, end - start);
1056 assert(end <= bo->size);
1057
1058 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
1059 sob.SOBufferIndex = i;
1060
1061 sob.SurfaceBaseAddress = render_bo(bo, start);
1062 #if GEN_GEN < 8
1063 sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
1064 sob.SurfaceEndAddress = render_bo(bo, end);
1065 #else
1066 sob.SOBufferEnable = true;
1067 sob.StreamOffsetWriteEnable = true;
1068 sob.StreamOutputBufferOffsetAddressEnable = true;
1069 sob.SOBufferMOCS = mocs_wb;
1070
1071 sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
1072 sob.StreamOutputBufferOffsetAddress =
1073 instruction_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
1074
1075 if (brw_obj->zero_offsets) {
1076 /* Zero out the offset and write that to offset_bo */
1077 sob.StreamOffset = 0;
1078 } else {
1079 /* Use offset_bo as the "Stream Offset." */
1080 sob.StreamOffset = 0xFFFFFFFF;
1081 }
1082 #endif
1083 }
1084 }
1085
1086 #if GEN_GEN >= 8
1087 brw_obj->zero_offsets = false;
1088 #endif
1089 }
1090
1091 static inline bool
1092 query_active(struct gl_query_object *q)
1093 {
1094 return q && q->Active;
1095 }
1096
1097 static void
1098 genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
1099 const struct brw_vue_map *vue_map)
1100 {
1101 struct gl_context *ctx = &brw->ctx;
1102 /* BRW_NEW_TRANSFORM_FEEDBACK */
1103 struct gl_transform_feedback_object *xfb_obj =
1104 ctx->TransformFeedback.CurrentObject;
1105
1106 brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
1107 if (active) {
1108 int urb_entry_read_offset = 0;
1109 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
1110 urb_entry_read_offset;
1111
1112 sos.SOFunctionEnable = true;
1113 sos.SOStatisticsEnable = true;
1114
1115 /* BRW_NEW_RASTERIZER_DISCARD */
1116 if (ctx->RasterDiscard) {
1117 if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
1118 sos.RenderingDisable = true;
1119 } else {
1120 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
1121 "query active relies on the clipper.");
1122 }
1123 }
1124
1125 /* _NEW_LIGHT */
1126 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
1127 sos.ReorderMode = TRAILING;
1128
1129 #if GEN_GEN < 8
1130 sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
1131 sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
1132 sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
1133 sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
1134 #else
1135 const struct gl_transform_feedback_info *linked_xfb_info =
1136 xfb_obj->program->sh.LinkedTransformFeedback;
1137 /* Set buffer pitches; 0 means unbound. */
1138 if (xfb_obj->Buffers[0])
1139 sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
1140 if (xfb_obj->Buffers[1])
1141 sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
1142 if (xfb_obj->Buffers[2])
1143 sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
1144 if (xfb_obj->Buffers[3])
1145 sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
1146 #endif
1147
1148 /* We always read the whole vertex. This could be reduced at some
1149 * point by reading less and offsetting the register index in the
1150 * SO_DECLs.
1151 */
1152 sos.Stream0VertexReadOffset = urb_entry_read_offset;
1153 sos.Stream0VertexReadLength = urb_entry_read_length - 1;
1154 sos.Stream1VertexReadOffset = urb_entry_read_offset;
1155 sos.Stream1VertexReadLength = urb_entry_read_length - 1;
1156 sos.Stream2VertexReadOffset = urb_entry_read_offset;
1157 sos.Stream2VertexReadLength = urb_entry_read_length - 1;
1158 sos.Stream3VertexReadOffset = urb_entry_read_offset;
1159 sos.Stream3VertexReadLength = urb_entry_read_length - 1;
1160 }
1161 }
1162 }
1163
1164 static void
1165 genX(upload_sol)(struct brw_context *brw)
1166 {
1167 struct gl_context *ctx = &brw->ctx;
1168 /* BRW_NEW_TRANSFORM_FEEDBACK */
1169 bool active = _mesa_is_xfb_active_and_unpaused(ctx);
1170
1171 if (active) {
1172 genX(upload_3dstate_so_buffers)(brw);
1173
1174 /* BRW_NEW_VUE_MAP_GEOM_OUT */
1175 genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
1176 }
1177
1178 /* Finally, set up the SOL stage. This command must always follow updates to
1179 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
1180 * MMIO register updates (current performed by the kernel at each batch
1181 * emit).
1182 */
1183 genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
1184 }
1185
1186 static const struct brw_tracked_state genX(sol_state) = {
1187 .dirty = {
1188 .mesa = _NEW_LIGHT,
1189 .brw = BRW_NEW_BATCH |
1190 BRW_NEW_BLORP |
1191 BRW_NEW_RASTERIZER_DISCARD |
1192 BRW_NEW_VUE_MAP_GEOM_OUT |
1193 BRW_NEW_TRANSFORM_FEEDBACK,
1194 },
1195 .emit = genX(upload_sol),
1196 };
1197
1198 #endif
1199
1200 /* ---------------------------------------------------------------------- */
1201
1202 #if GEN_GEN >= 8
1203 static void
1204 genX(upload_raster)(struct brw_context *brw)
1205 {
1206 struct gl_context *ctx = &brw->ctx;
1207
1208 /* _NEW_BUFFERS */
1209 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
1210
1211 /* _NEW_POLYGON */
1212 struct gl_polygon_attrib *polygon = &ctx->Polygon;
1213
1214 /* _NEW_POINT */
1215 struct gl_point_attrib *point = &ctx->Point;
1216
1217 brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
1218 if (polygon->_FrontBit == render_to_fbo)
1219 raster.FrontWinding = CounterClockwise;
1220
1221 if (polygon->CullFlag) {
1222 switch (polygon->CullFaceMode) {
1223 case GL_FRONT:
1224 raster.CullMode = CULLMODE_FRONT;
1225 break;
1226 case GL_BACK:
1227 raster.CullMode = CULLMODE_BACK;
1228 break;
1229 case GL_FRONT_AND_BACK:
1230 raster.CullMode = CULLMODE_BOTH;
1231 break;
1232 default:
1233 unreachable("not reached");
1234 }
1235 } else {
1236 raster.CullMode = CULLMODE_NONE;
1237 }
1238
1239 point->SmoothFlag = raster.SmoothPointEnable;
1240
1241 raster.DXMultisampleRasterizationEnable =
1242 _mesa_is_multisample_enabled(ctx);
1243
1244 raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill;
1245 raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine;
1246 raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint;
1247
1248 switch (polygon->FrontMode) {
1249 case GL_FILL:
1250 raster.FrontFaceFillMode = FILL_MODE_SOLID;
1251 break;
1252 case GL_LINE:
1253 raster.FrontFaceFillMode = FILL_MODE_WIREFRAME;
1254 break;
1255 case GL_POINT:
1256 raster.FrontFaceFillMode = FILL_MODE_POINT;
1257 break;
1258 default:
1259 unreachable("not reached");
1260 }
1261
1262 switch (polygon->BackMode) {
1263 case GL_FILL:
1264 raster.BackFaceFillMode = FILL_MODE_SOLID;
1265 break;
1266 case GL_LINE:
1267 raster.BackFaceFillMode = FILL_MODE_WIREFRAME;
1268 break;
1269 case GL_POINT:
1270 raster.BackFaceFillMode = FILL_MODE_POINT;
1271 break;
1272 default:
1273 unreachable("not reached");
1274 }
1275
1276 /* _NEW_LINE */
1277 raster.AntialiasingEnable = ctx->Line.SmoothFlag;
1278
1279 /* _NEW_SCISSOR */
1280 raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
1281
1282 /* _NEW_TRANSFORM */
1283 if (!ctx->Transform.DepthClamp) {
1284 #if GEN_GEN >= 9
1285 raster.ViewportZFarClipTestEnable = true;
1286 raster.ViewportZNearClipTestEnable = true;
1287 #else
1288 raster.ViewportZClipTestEnable = true;
1289 #endif
1290 }
1291
1292 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
1293 #if GEN_GEN >= 9
1294 raster.ConservativeRasterizationEnable =
1295 ctx->IntelConservativeRasterization;
1296 #endif
1297
1298 raster.GlobalDepthOffsetClamp = polygon->OffsetClamp;
1299 raster.GlobalDepthOffsetScale = polygon->OffsetFactor;
1300
1301 raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2;
1302 }
1303 }
1304
1305 static const struct brw_tracked_state genX(raster_state) = {
1306 .dirty = {
1307 .mesa = _NEW_BUFFERS |
1308 _NEW_LINE |
1309 _NEW_MULTISAMPLE |
1310 _NEW_POINT |
1311 _NEW_POLYGON |
1312 _NEW_SCISSOR |
1313 _NEW_TRANSFORM,
1314 .brw = BRW_NEW_BLORP |
1315 BRW_NEW_CONTEXT |
1316 BRW_NEW_CONSERVATIVE_RASTERIZATION,
1317 },
1318 .emit = genX(upload_raster),
1319 };
1320 #endif
1321
1322 /* ---------------------------------------------------------------------- */
1323
1324 void
1325 genX(init_atoms)(struct brw_context *brw)
1326 {
1327 #if GEN_GEN < 6
1328 static const struct brw_tracked_state *render_atoms[] =
1329 {
1330 /* Once all the programs are done, we know how large urb entry
1331 * sizes need to be and can decide if we need to change the urb
1332 * layout.
1333 */
1334 &brw_curbe_offsets,
1335 &brw_recalculate_urb_fence,
1336
1337 &brw_cc_vp,
1338 &brw_cc_unit,
1339
1340 /* Surface state setup. Must come before the VS/WM unit. The binding
1341 * table upload must be last.
1342 */
1343 &brw_vs_pull_constants,
1344 &brw_wm_pull_constants,
1345 &brw_renderbuffer_surfaces,
1346 &brw_renderbuffer_read_surfaces,
1347 &brw_texture_surfaces,
1348 &brw_vs_binding_table,
1349 &brw_wm_binding_table,
1350
1351 &brw_fs_samplers,
1352 &brw_vs_samplers,
1353
1354 /* These set up state for brw_psp_urb_cbs */
1355 &brw_wm_unit,
1356 &brw_sf_vp,
1357 &brw_sf_unit,
1358 &brw_vs_unit, /* always required, enabled or not */
1359 &brw_clip_unit,
1360 &brw_gs_unit,
1361
1362 /* Command packets:
1363 */
1364 &brw_invariant_state,
1365
1366 &brw_binding_table_pointers,
1367 &brw_blend_constant_color,
1368
1369 &brw_depthbuffer,
1370
1371 &brw_polygon_stipple,
1372 &brw_polygon_stipple_offset,
1373
1374 &brw_line_stipple,
1375
1376 &brw_psp_urb_cbs,
1377
1378 &brw_drawing_rect,
1379 &brw_indices, /* must come before brw_vertices */
1380 &brw_index_buffer,
1381 &brw_vertices,
1382
1383 &brw_constant_buffer
1384 };
1385 #elif GEN_GEN == 6
1386 static const struct brw_tracked_state *render_atoms[] =
1387 {
1388 &gen6_sf_and_clip_viewports,
1389
1390 /* Command packets: */
1391
1392 &brw_cc_vp,
1393 &gen6_viewport_state, /* must do after *_vp stages */
1394
1395 &gen6_urb,
1396 &gen6_blend_state, /* must do before cc unit */
1397 &gen6_color_calc_state, /* must do before cc unit */
1398 &gen6_depth_stencil_state, /* must do before cc unit */
1399
1400 &gen6_vs_push_constants, /* Before vs_state */
1401 &gen6_gs_push_constants, /* Before gs_state */
1402 &gen6_wm_push_constants, /* Before wm_state */
1403
1404 /* Surface state setup. Must come before the VS/WM unit. The binding
1405 * table upload must be last.
1406 */
1407 &brw_vs_pull_constants,
1408 &brw_vs_ubo_surfaces,
1409 &brw_gs_pull_constants,
1410 &brw_gs_ubo_surfaces,
1411 &brw_wm_pull_constants,
1412 &brw_wm_ubo_surfaces,
1413 &gen6_renderbuffer_surfaces,
1414 &brw_renderbuffer_read_surfaces,
1415 &brw_texture_surfaces,
1416 &gen6_sol_surface,
1417 &brw_vs_binding_table,
1418 &gen6_gs_binding_table,
1419 &brw_wm_binding_table,
1420
1421 &brw_fs_samplers,
1422 &brw_vs_samplers,
1423 &brw_gs_samplers,
1424 &gen6_sampler_state,
1425 &gen6_multisample_state,
1426
1427 &gen6_vs_state,
1428 &gen6_gs_state,
1429 &genX(clip_state),
1430 &genX(sf_state),
1431 &gen6_wm_state,
1432
1433 &gen6_scissor_state,
1434
1435 &gen6_binding_table_pointers,
1436
1437 &brw_depthbuffer,
1438
1439 &brw_polygon_stipple,
1440 &brw_polygon_stipple_offset,
1441
1442 &brw_line_stipple,
1443
1444 &brw_drawing_rect,
1445
1446 &brw_indices, /* must come before brw_vertices */
1447 &brw_index_buffer,
1448 &brw_vertices,
1449 };
1450 #elif GEN_GEN == 7
1451 static const struct brw_tracked_state *render_atoms[] =
1452 {
1453 /* Command packets: */
1454
1455 &brw_cc_vp,
1456 &gen7_sf_clip_viewport,
1457
1458 &gen7_l3_state,
1459 &gen7_push_constant_space,
1460 &gen7_urb,
1461 &gen6_blend_state, /* must do before cc unit */
1462 &gen6_color_calc_state, /* must do before cc unit */
1463 &genX(depth_stencil_state), /* must do before cc unit */
1464
1465 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
1466 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
1467 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
1468 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
1469 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
1470
1471 &gen6_vs_push_constants, /* Before vs_state */
1472 &gen7_tcs_push_constants,
1473 &gen7_tes_push_constants,
1474 &gen6_gs_push_constants, /* Before gs_state */
1475 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
1476
1477 /* Surface state setup. Must come before the VS/WM unit. The binding
1478 * table upload must be last.
1479 */
1480 &brw_vs_pull_constants,
1481 &brw_vs_ubo_surfaces,
1482 &brw_vs_abo_surfaces,
1483 &brw_tcs_pull_constants,
1484 &brw_tcs_ubo_surfaces,
1485 &brw_tcs_abo_surfaces,
1486 &brw_tes_pull_constants,
1487 &brw_tes_ubo_surfaces,
1488 &brw_tes_abo_surfaces,
1489 &brw_gs_pull_constants,
1490 &brw_gs_ubo_surfaces,
1491 &brw_gs_abo_surfaces,
1492 &brw_wm_pull_constants,
1493 &brw_wm_ubo_surfaces,
1494 &brw_wm_abo_surfaces,
1495 &gen6_renderbuffer_surfaces,
1496 &brw_renderbuffer_read_surfaces,
1497 &brw_texture_surfaces,
1498 &brw_vs_binding_table,
1499 &brw_tcs_binding_table,
1500 &brw_tes_binding_table,
1501 &brw_gs_binding_table,
1502 &brw_wm_binding_table,
1503
1504 &brw_fs_samplers,
1505 &brw_vs_samplers,
1506 &brw_tcs_samplers,
1507 &brw_tes_samplers,
1508 &brw_gs_samplers,
1509 &gen6_multisample_state,
1510
1511 &gen7_vs_state,
1512 &gen7_hs_state,
1513 &gen7_te_state,
1514 &gen7_ds_state,
1515 &gen7_gs_state,
1516 &genX(sol_state),
1517 &genX(clip_state),
1518 &genX(sbe_state),
1519 &genX(sf_state),
1520 &gen7_wm_state,
1521 &gen7_ps_state,
1522
1523 &gen6_scissor_state,
1524
1525 &gen7_depthbuffer,
1526
1527 &brw_polygon_stipple,
1528 &brw_polygon_stipple_offset,
1529
1530 &brw_line_stipple,
1531
1532 &brw_drawing_rect,
1533
1534 &brw_indices, /* must come before brw_vertices */
1535 &brw_index_buffer,
1536 &brw_vertices,
1537
1538 &haswell_cut_index,
1539 };
1540 #elif GEN_GEN >= 8
1541 static const struct brw_tracked_state *render_atoms[] =
1542 {
1543 &brw_cc_vp,
1544 &gen8_sf_clip_viewport,
1545
1546 &gen7_l3_state,
1547 &gen7_push_constant_space,
1548 &gen7_urb,
1549 &gen8_blend_state,
1550 &gen6_color_calc_state,
1551
1552 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
1553 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
1554 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
1555 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
1556 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
1557
1558 &gen6_vs_push_constants, /* Before vs_state */
1559 &gen7_tcs_push_constants,
1560 &gen7_tes_push_constants,
1561 &gen6_gs_push_constants, /* Before gs_state */
1562 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
1563
1564 /* Surface state setup. Must come before the VS/WM unit. The binding
1565 * table upload must be last.
1566 */
1567 &brw_vs_pull_constants,
1568 &brw_vs_ubo_surfaces,
1569 &brw_vs_abo_surfaces,
1570 &brw_tcs_pull_constants,
1571 &brw_tcs_ubo_surfaces,
1572 &brw_tcs_abo_surfaces,
1573 &brw_tes_pull_constants,
1574 &brw_tes_ubo_surfaces,
1575 &brw_tes_abo_surfaces,
1576 &brw_gs_pull_constants,
1577 &brw_gs_ubo_surfaces,
1578 &brw_gs_abo_surfaces,
1579 &brw_wm_pull_constants,
1580 &brw_wm_ubo_surfaces,
1581 &brw_wm_abo_surfaces,
1582 &gen6_renderbuffer_surfaces,
1583 &brw_renderbuffer_read_surfaces,
1584 &brw_texture_surfaces,
1585 &brw_vs_binding_table,
1586 &brw_tcs_binding_table,
1587 &brw_tes_binding_table,
1588 &brw_gs_binding_table,
1589 &brw_wm_binding_table,
1590
1591 &brw_fs_samplers,
1592 &brw_vs_samplers,
1593 &brw_tcs_samplers,
1594 &brw_tes_samplers,
1595 &brw_gs_samplers,
1596 &gen8_multisample_state,
1597
1598 &gen8_vs_state,
1599 &gen8_hs_state,
1600 &gen7_te_state,
1601 &gen8_ds_state,
1602 &gen8_gs_state,
1603 &genX(sol_state),
1604 &genX(clip_state),
1605 &genX(raster_state),
1606 &genX(sbe_state),
1607 &genX(sf_state),
1608 &gen8_ps_blend,
1609 &gen8_ps_extra,
1610 &gen8_ps_state,
1611 &genX(depth_stencil_state),
1612 &gen8_wm_state,
1613
1614 &gen6_scissor_state,
1615
1616 &gen7_depthbuffer,
1617
1618 &brw_polygon_stipple,
1619 &brw_polygon_stipple_offset,
1620
1621 &brw_line_stipple,
1622
1623 &brw_drawing_rect,
1624
1625 &gen8_vf_topology,
1626
1627 &brw_indices,
1628 &gen8_index_buffer,
1629 &gen8_vertices,
1630
1631 &haswell_cut_index,
1632 &gen8_pma_fix,
1633 };
1634 #endif
1635
1636 STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms));
1637 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
1638 render_atoms, ARRAY_SIZE(render_atoms));
1639
1640 #if GEN_GEN >= 7
1641 static const struct brw_tracked_state *compute_atoms[] =
1642 {
1643 &gen7_l3_state,
1644 &brw_cs_image_surfaces,
1645 &gen7_cs_push_constants,
1646 &brw_cs_pull_constants,
1647 &brw_cs_ubo_surfaces,
1648 &brw_cs_abo_surfaces,
1649 &brw_cs_texture_surfaces,
1650 &brw_cs_work_groups_surface,
1651 &brw_cs_samplers,
1652 &brw_cs_state,
1653 };
1654
1655 STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
1656 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
1657 compute_atoms, ARRAY_SIZE(compute_atoms));
1658 #endif
1659 }