mesa: replace ctx->Polygon._FrontBit with a helper function
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "common/gen_device_info.h"
27 #include "common/gen_sample_positions.h"
28 #include "genxml/gen_macros.h"
29
30 #include "main/bufferobj.h"
31 #include "main/context.h"
32 #include "main/enums.h"
33 #include "main/macros.h"
34 #include "main/state.h"
35
36 #include "brw_context.h"
37 #if GEN_GEN == 6
38 #include "brw_defines.h"
39 #endif
40 #include "brw_draw.h"
41 #include "brw_multisample_state.h"
42 #include "brw_state.h"
43 #include "brw_wm.h"
44 #include "brw_util.h"
45
46 #include "intel_batchbuffer.h"
47 #include "intel_buffer_objects.h"
48 #include "intel_fbo.h"
49
50 #include "main/enums.h"
51 #include "main/fbobject.h"
52 #include "main/framebuffer.h"
53 #include "main/glformats.h"
54 #include "main/shaderapi.h"
55 #include "main/stencil.h"
56 #include "main/transformfeedback.h"
57 #include "main/varray.h"
58 #include "main/viewport.h"
59
60 UNUSED static void *
61 emit_dwords(struct brw_context *brw, unsigned n)
62 {
63 intel_batchbuffer_begin(brw, n, RENDER_RING);
64 uint32_t *map = brw->batch.map_next;
65 brw->batch.map_next += n;
66 intel_batchbuffer_advance(brw);
67 return map;
68 }
69
70 struct brw_address {
71 struct brw_bo *bo;
72 uint32_t read_domains;
73 uint32_t write_domain;
74 uint32_t offset;
75 };
76
77 static uint64_t
78 emit_reloc(struct brw_context *brw,
79 void *location, struct brw_address address, uint32_t delta)
80 {
81 uint32_t offset = (char *) location - (char *) brw->batch.map;
82
83 return brw_emit_reloc(&brw->batch, offset, address.bo,
84 address.offset + delta,
85 address.read_domains,
86 address.write_domain);
87 }
88
89 #define __gen_address_type struct brw_address
90 #define __gen_user_data struct brw_context
91
92 static uint64_t
93 __gen_combine_address(struct brw_context *brw, void *location,
94 struct brw_address address, uint32_t delta)
95 {
96 if (address.bo == NULL) {
97 return address.offset + delta;
98 } else {
99 return emit_reloc(brw, location, address, delta);
100 }
101 }
102
103 static inline struct brw_address
104 render_bo(struct brw_bo *bo, uint32_t offset)
105 {
106 return (struct brw_address) {
107 .bo = bo,
108 .offset = offset,
109 .read_domains = I915_GEM_DOMAIN_RENDER,
110 .write_domain = I915_GEM_DOMAIN_RENDER,
111 };
112 }
113
114 static inline struct brw_address
115 render_ro_bo(struct brw_bo *bo, uint32_t offset)
116 {
117 return (struct brw_address) {
118 .bo = bo,
119 .offset = offset,
120 .read_domains = I915_GEM_DOMAIN_RENDER,
121 .write_domain = 0,
122 };
123 }
124
125 static inline struct brw_address
126 instruction_bo(struct brw_bo *bo, uint32_t offset)
127 {
128 return (struct brw_address) {
129 .bo = bo,
130 .offset = offset,
131 .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
132 .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
133 };
134 }
135
136 static inline struct brw_address
137 instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
138 {
139 return (struct brw_address) {
140 .bo = bo,
141 .offset = offset,
142 .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
143 .write_domain = 0,
144 };
145 }
146
147 static inline struct brw_address
148 vertex_bo(struct brw_bo *bo, uint32_t offset)
149 {
150 return (struct brw_address) {
151 .bo = bo,
152 .offset = offset,
153 .read_domains = I915_GEM_DOMAIN_VERTEX,
154 .write_domain = 0,
155 };
156 }
157
158 #if GEN_GEN == 4
159 static inline struct brw_address
160 KSP(struct brw_context *brw, uint32_t offset)
161 {
162 return instruction_bo(brw->cache.bo, offset);
163 }
164
165 static inline struct brw_address
166 KSP_ro(struct brw_context *brw, uint32_t offset)
167 {
168 return instruction_ro_bo(brw->cache.bo, offset);
169 }
170 #else
171 static inline uint32_t
172 KSP(struct brw_context *brw, uint32_t offset)
173 {
174 return offset;
175 }
176
177 #define KSP_ro KSP
178
179 #endif
180
181 #include "genxml/genX_pack.h"
182
183 #define _brw_cmd_length(cmd) cmd ## _length
184 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias
185 #define _brw_cmd_header(cmd) cmd ## _header
186 #define _brw_cmd_pack(cmd) cmd ## _pack
187
188 #define brw_batch_emit(brw, cmd, name) \
189 for (struct cmd name = { _brw_cmd_header(cmd) }, \
190 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
191 __builtin_expect(_dst != NULL, 1); \
192 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
193 _dst = NULL)
194
195 #define brw_batch_emitn(brw, cmd, n, ...) ({ \
196 uint32_t *_dw = emit_dwords(brw, n); \
197 struct cmd template = { \
198 _brw_cmd_header(cmd), \
199 .DWordLength = n - _brw_cmd_length_bias(cmd), \
200 __VA_ARGS__ \
201 }; \
202 _brw_cmd_pack(cmd)(brw, _dw, &template); \
203 _dw + 1; /* Array starts at dw[1] */ \
204 })
205
206 #define brw_state_emit(brw, cmd, align, offset, name) \
207 for (struct cmd name = { 0, }, \
208 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
209 align, offset); \
210 __builtin_expect(_dst != NULL, 1); \
211 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
212 _dst = NULL)
213
214 /**
215 * Polygon stipple packet
216 */
217 static void
218 genX(upload_polygon_stipple)(struct brw_context *brw)
219 {
220 struct gl_context *ctx = &brw->ctx;
221
222 /* _NEW_POLYGON */
223 if (!ctx->Polygon.StippleFlag)
224 return;
225
226 brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
227 /* Polygon stipple is provided in OpenGL order, i.e. bottom
228 * row first. If we're rendering to a window (i.e. the
229 * default frame buffer object, 0), then we need to invert
230 * it to match our pixel layout. But if we're rendering
231 * to a FBO (i.e. any named frame buffer object), we *don't*
232 * need to invert - we already match the layout.
233 */
234 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
235 for (unsigned i = 0; i < 32; i++)
236 poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */
237 } else {
238 for (unsigned i = 0; i < 32; i++)
239 poly.PatternRow[i] = ctx->PolygonStipple[i];
240 }
241 }
242 }
243
244 static const struct brw_tracked_state genX(polygon_stipple) = {
245 .dirty = {
246 .mesa = _NEW_POLYGON |
247 _NEW_POLYGONSTIPPLE,
248 .brw = BRW_NEW_CONTEXT,
249 },
250 .emit = genX(upload_polygon_stipple),
251 };
252
253 /**
254 * Polygon stipple offset packet
255 */
256 static void
257 genX(upload_polygon_stipple_offset)(struct brw_context *brw)
258 {
259 struct gl_context *ctx = &brw->ctx;
260
261 /* _NEW_POLYGON */
262 if (!ctx->Polygon.StippleFlag)
263 return;
264
265 brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_OFFSET), poly) {
266 /* _NEW_BUFFERS
267 *
268 * If we're drawing to a system window we have to invert the Y axis
269 * in order to match the OpenGL pixel coordinate system, and our
270 * offset must be matched to the window position. If we're drawing
271 * to a user-created FBO then our native pixel coordinate system
272 * works just fine, and there's no window system to worry about.
273 */
274 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
275 poly.PolygonStippleYOffset =
276 (32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31;
277 }
278 }
279 }
280
281 static const struct brw_tracked_state genX(polygon_stipple_offset) = {
282 .dirty = {
283 .mesa = _NEW_BUFFERS |
284 _NEW_POLYGON,
285 .brw = BRW_NEW_CONTEXT,
286 },
287 .emit = genX(upload_polygon_stipple_offset),
288 };
289
290 /**
291 * Line stipple packet
292 */
293 static void
294 genX(upload_line_stipple)(struct brw_context *brw)
295 {
296 struct gl_context *ctx = &brw->ctx;
297
298 if (!ctx->Line.StippleFlag)
299 return;
300
301 brw_batch_emit(brw, GENX(3DSTATE_LINE_STIPPLE), line) {
302 line.LineStipplePattern = ctx->Line.StipplePattern;
303
304 line.LineStippleInverseRepeatCount = 1.0f / ctx->Line.StippleFactor;
305 line.LineStippleRepeatCount = ctx->Line.StippleFactor;
306 }
307 }
308
309 static const struct brw_tracked_state genX(line_stipple) = {
310 .dirty = {
311 .mesa = _NEW_LINE,
312 .brw = BRW_NEW_CONTEXT,
313 },
314 .emit = genX(upload_line_stipple),
315 };
316
317 /* Constant single cliprect for framebuffer object or DRI2 drawing */
318 static void
319 genX(upload_drawing_rect)(struct brw_context *brw)
320 {
321 struct gl_context *ctx = &brw->ctx;
322 const struct gl_framebuffer *fb = ctx->DrawBuffer;
323 const unsigned int fb_width = _mesa_geometric_width(fb);
324 const unsigned int fb_height = _mesa_geometric_height(fb);
325
326 brw_batch_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
327 rect.ClippedDrawingRectangleXMax = fb_width - 1;
328 rect.ClippedDrawingRectangleYMax = fb_height - 1;
329 }
330 }
331
332 static const struct brw_tracked_state genX(drawing_rect) = {
333 .dirty = {
334 .mesa = _NEW_BUFFERS,
335 .brw = BRW_NEW_BLORP |
336 BRW_NEW_CONTEXT,
337 },
338 .emit = genX(upload_drawing_rect),
339 };
340
341 static uint32_t *
342 genX(emit_vertex_buffer_state)(struct brw_context *brw,
343 uint32_t *dw,
344 unsigned buffer_nr,
345 struct brw_bo *bo,
346 unsigned start_offset,
347 unsigned end_offset,
348 unsigned stride,
349 unsigned step_rate)
350 {
351 struct GENX(VERTEX_BUFFER_STATE) buf_state = {
352 .VertexBufferIndex = buffer_nr,
353 .BufferPitch = stride,
354 .BufferStartingAddress = vertex_bo(bo, start_offset),
355 #if GEN_GEN >= 8
356 .BufferSize = end_offset - start_offset,
357 #endif
358
359 #if GEN_GEN >= 7
360 .AddressModifyEnable = true,
361 #endif
362
363 #if GEN_GEN < 8
364 .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA,
365 .InstanceDataStepRate = step_rate,
366 #if GEN_GEN >= 5
367 .EndAddress = vertex_bo(bo, end_offset - 1),
368 #endif
369 #endif
370
371 #if GEN_GEN == 10
372 .VertexBufferMOCS = CNL_MOCS_WB,
373 #elif GEN_GEN == 9
374 .VertexBufferMOCS = SKL_MOCS_WB,
375 #elif GEN_GEN == 8
376 .VertexBufferMOCS = BDW_MOCS_WB,
377 #elif GEN_GEN == 7
378 .VertexBufferMOCS = GEN7_MOCS_L3,
379 #endif
380 };
381
382 GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state);
383 return dw + GENX(VERTEX_BUFFER_STATE_length);
384 }
385
386 UNUSED static bool
387 is_passthru_format(uint32_t format)
388 {
389 switch (format) {
390 case ISL_FORMAT_R64_PASSTHRU:
391 case ISL_FORMAT_R64G64_PASSTHRU:
392 case ISL_FORMAT_R64G64B64_PASSTHRU:
393 case ISL_FORMAT_R64G64B64A64_PASSTHRU:
394 return true;
395 default:
396 return false;
397 }
398 }
399
400 UNUSED static int
401 uploads_needed(uint32_t format)
402 {
403 if (!is_passthru_format(format))
404 return 1;
405
406 switch (format) {
407 case ISL_FORMAT_R64_PASSTHRU:
408 case ISL_FORMAT_R64G64_PASSTHRU:
409 return 1;
410 case ISL_FORMAT_R64G64B64_PASSTHRU:
411 case ISL_FORMAT_R64G64B64A64_PASSTHRU:
412 return 2;
413 default:
414 unreachable("not reached");
415 }
416 }
417
418 /*
419 * Returns the format that we are finally going to use when upload a vertex
420 * element. It will only change if we are using *64*PASSTHRU formats, as for
421 * gen < 8 they need to be splitted on two *32*FLOAT formats.
422 *
423 * @upload points in which upload we are. Valid values are [0,1]
424 */
425 static uint32_t
426 downsize_format_if_needed(uint32_t format,
427 int upload)
428 {
429 assert(upload == 0 || upload == 1);
430
431 if (!is_passthru_format(format))
432 return format;
433
434 switch (format) {
435 case ISL_FORMAT_R64_PASSTHRU:
436 return ISL_FORMAT_R32G32_FLOAT;
437 case ISL_FORMAT_R64G64_PASSTHRU:
438 return ISL_FORMAT_R32G32B32A32_FLOAT;
439 case ISL_FORMAT_R64G64B64_PASSTHRU:
440 return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
441 : ISL_FORMAT_R32G32_FLOAT;
442 case ISL_FORMAT_R64G64B64A64_PASSTHRU:
443 return ISL_FORMAT_R32G32B32A32_FLOAT;
444 default:
445 unreachable("not reached");
446 }
447 }
448
449 /*
450 * Returns the number of componentes associated with a format that is used on
451 * a 64 to 32 format split. See downsize_format()
452 */
453 static int
454 upload_format_size(uint32_t upload_format)
455 {
456 switch (upload_format) {
457 case ISL_FORMAT_R32G32_FLOAT:
458 return 2;
459 case ISL_FORMAT_R32G32B32A32_FLOAT:
460 return 4;
461 default:
462 unreachable("not reached");
463 }
464 }
465
466 static void
467 genX(emit_vertices)(struct brw_context *brw)
468 {
469 uint32_t *dw;
470
471 brw_prepare_vertices(brw);
472 brw_prepare_shader_draw_parameters(brw);
473
474 #if GEN_GEN < 6
475 brw_emit_query_begin(brw);
476 #endif
477
478 const struct brw_vs_prog_data *vs_prog_data =
479 brw_vs_prog_data(brw->vs.base.prog_data);
480
481 #if GEN_GEN >= 8
482 struct gl_context *ctx = &brw->ctx;
483 const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
484 ctx->Polygon.BackMode != GL_FILL);
485
486 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
487 unsigned vue = brw->vb.nr_enabled;
488
489 /* The element for the edge flags must always be last, so we have to
490 * insert the SGVS before it in that case.
491 */
492 if (uses_edge_flag) {
493 assert(vue > 0);
494 vue--;
495 }
496
497 WARN_ONCE(vue >= 33,
498 "Trying to insert VID/IID past 33rd vertex element, "
499 "need to reorder the vertex attrbutes.");
500
501 brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) {
502 if (vs_prog_data->uses_vertexid) {
503 vfs.VertexIDEnable = true;
504 vfs.VertexIDComponentNumber = 2;
505 vfs.VertexIDElementOffset = vue;
506 }
507
508 if (vs_prog_data->uses_instanceid) {
509 vfs.InstanceIDEnable = true;
510 vfs.InstanceIDComponentNumber = 3;
511 vfs.InstanceIDElementOffset = vue;
512 }
513 }
514
515 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
516 vfi.InstancingEnable = true;
517 vfi.VertexElementIndex = vue;
518 }
519 } else {
520 brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
521 }
522
523 /* Normally we don't need an element for the SGVS attribute because the
524 * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
525 * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
526 * we're using draw parameters then we need an element for the those
527 * values. Additionally if there is an edge flag element then the SGVS
528 * can't be inserted past that so we need a dummy element to ensure that
529 * the edge flag is the last one.
530 */
531 const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
532 vs_prog_data->uses_baseinstance ||
533 ((vs_prog_data->uses_instanceid ||
534 vs_prog_data->uses_vertexid)
535 && uses_edge_flag));
536 #else
537 const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
538 vs_prog_data->uses_baseinstance ||
539 vs_prog_data->uses_instanceid ||
540 vs_prog_data->uses_vertexid);
541 #endif
542 unsigned nr_elements =
543 brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
544
545 #if GEN_GEN < 8
546 /* If any of the formats of vb.enabled needs more that one upload, we need
547 * to add it to nr_elements
548 */
549 for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
550 struct brw_vertex_element *input = brw->vb.enabled[i];
551 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
552
553 if (uploads_needed(format) > 1)
554 nr_elements++;
555 }
556 #endif
557
558 /* If the VS doesn't read any inputs (calculating vertex position from
559 * a state variable for some reason, for example), emit a single pad
560 * VERTEX_ELEMENT struct and bail.
561 *
562 * The stale VB state stays in place, but they don't do anything unless
563 * a VE loads from them.
564 */
565 if (nr_elements == 0) {
566 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
567 1 + GENX(VERTEX_ELEMENT_STATE_length));
568 struct GENX(VERTEX_ELEMENT_STATE) elem = {
569 .Valid = true,
570 .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
571 .Component0Control = VFCOMP_STORE_0,
572 .Component1Control = VFCOMP_STORE_0,
573 .Component2Control = VFCOMP_STORE_0,
574 .Component3Control = VFCOMP_STORE_1_FP,
575 };
576 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem);
577 return;
578 }
579
580 /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
581 const bool uses_draw_params =
582 vs_prog_data->uses_basevertex ||
583 vs_prog_data->uses_baseinstance;
584 const unsigned nr_buffers = brw->vb.nr_buffers +
585 uses_draw_params + vs_prog_data->uses_drawid;
586
587 if (nr_buffers) {
588 assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
589
590 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
591 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers);
592
593 for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
594 const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
595 /* Prior to Haswell and Bay Trail we have to use 4-component formats
596 * to fake 3-component ones. In particular, we do this for
597 * half-float and 8 and 16-bit integer formats. This means that the
598 * vertex element may poke over the end of the buffer by 2 bytes.
599 */
600 const unsigned padding =
601 (GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
602 const unsigned end = buffer->offset + buffer->size + padding;
603 dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
604 buffer->offset,
605 end,
606 buffer->stride,
607 buffer->step_rate);
608 }
609
610 if (uses_draw_params) {
611 dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers,
612 brw->draw.draw_params_bo,
613 brw->draw.draw_params_offset,
614 brw->draw.draw_params_bo->size,
615 0 /* stride */,
616 0 /* step rate */);
617 }
618
619 if (vs_prog_data->uses_drawid) {
620 dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
621 brw->draw.draw_id_bo,
622 brw->draw.draw_id_offset,
623 brw->draw.draw_id_bo->size,
624 0 /* stride */,
625 0 /* step rate */);
626 }
627 }
628
629 /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
630 * presumably for VertexID/InstanceID.
631 */
632 #if GEN_GEN >= 6
633 assert(nr_elements <= 34);
634 const struct brw_vertex_element *gen6_edgeflag_input = NULL;
635 #else
636 assert(nr_elements <= 18);
637 #endif
638
639 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
640 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
641 unsigned i;
642 for (i = 0; i < brw->vb.nr_enabled; i++) {
643 const struct brw_vertex_element *input = brw->vb.enabled[i];
644 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
645 uint32_t comp0 = VFCOMP_STORE_SRC;
646 uint32_t comp1 = VFCOMP_STORE_SRC;
647 uint32_t comp2 = VFCOMP_STORE_SRC;
648 uint32_t comp3 = VFCOMP_STORE_SRC;
649 const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
650
651 #if GEN_GEN >= 8
652 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
653 * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
654 * element which has edge flag enabled."
655 */
656 assert(!(is_passthru_format(format) && uses_edge_flag));
657 #endif
658
659 /* The gen4 driver expects edgeflag to come in as a float, and passes
660 * that float on to the tests in the clipper. Mesa's current vertex
661 * attribute value for EdgeFlag is stored as a float, which works out.
662 * glEdgeFlagPointer, on the other hand, gives us an unnormalized
663 * integer ubyte. Just rewrite that to convert to a float.
664 *
665 * Gen6+ passes edgeflag as sideband along with the vertex, instead
666 * of in the VUE. We have to upload it sideband as the last vertex
667 * element according to the B-Spec.
668 */
669 #if GEN_GEN >= 6
670 if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
671 gen6_edgeflag_input = input;
672 continue;
673 }
674 #endif
675
676 for (unsigned c = 0; c < num_uploads; c++) {
677 const uint32_t upload_format = GEN_GEN >= 8 ? format :
678 downsize_format_if_needed(format, c);
679 /* If we need more that one upload, the offset stride would be 128
680 * bits (16 bytes), as for previous uploads we are using the full
681 * entry. */
682 const unsigned offset = input->offset + c * 16;
683
684 const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
685 upload_format_size(upload_format) : input->glarray->Size;
686
687 switch (size) {
688 case 0: comp0 = VFCOMP_STORE_0;
689 case 1: comp1 = VFCOMP_STORE_0;
690 case 2: comp2 = VFCOMP_STORE_0;
691 case 3:
692 if (GEN_GEN >= 8 && input->glarray->Doubles) {
693 comp3 = VFCOMP_STORE_0;
694 } else if (input->glarray->Integer) {
695 comp3 = VFCOMP_STORE_1_INT;
696 } else {
697 comp3 = VFCOMP_STORE_1_FP;
698 }
699
700 break;
701 }
702
703 #if GEN_GEN >= 8
704 /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
705 *
706 * "When SourceElementFormat is set to one of the *64*_PASSTHRU
707 * formats, 64-bit components are stored in the URB without any
708 * conversion. In this case, vertex elements must be written as 128
709 * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as
710 * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
711 * component into the URB, Component 1 must be specified as
712 * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in
713 * order to output a 128-bit vertex element, or Components 1-3 must
714 * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
715 * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
716 * to be specified as VFCOMP_STORE_0 in order to output a 256-bit
717 * vertex element."
718 */
719 if (input->glarray->Doubles && !input->is_dual_slot) {
720 /* Store vertex elements which correspond to double and dvec2 vertex
721 * shader inputs as 128-bit vertex elements, instead of 256-bits.
722 */
723 comp2 = VFCOMP_NOSTORE;
724 comp3 = VFCOMP_NOSTORE;
725 }
726 #endif
727
728 struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
729 .VertexBufferIndex = input->buffer,
730 .Valid = true,
731 .SourceElementFormat = upload_format,
732 .SourceElementOffset = offset,
733 .Component0Control = comp0,
734 .Component1Control = comp1,
735 .Component2Control = comp2,
736 .Component3Control = comp3,
737 #if GEN_GEN < 5
738 .DestinationElementOffset = i * 4,
739 #endif
740 };
741
742 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
743 dw += GENX(VERTEX_ELEMENT_STATE_length);
744 }
745 }
746
747 if (needs_sgvs_element) {
748 struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
749 .Valid = true,
750 .Component0Control = VFCOMP_STORE_0,
751 .Component1Control = VFCOMP_STORE_0,
752 .Component2Control = VFCOMP_STORE_0,
753 .Component3Control = VFCOMP_STORE_0,
754 #if GEN_GEN < 5
755 .DestinationElementOffset = i * 4,
756 #endif
757 };
758
759 #if GEN_GEN >= 8
760 if (vs_prog_data->uses_basevertex ||
761 vs_prog_data->uses_baseinstance) {
762 elem_state.VertexBufferIndex = brw->vb.nr_buffers;
763 elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
764 elem_state.Component0Control = VFCOMP_STORE_SRC;
765 elem_state.Component1Control = VFCOMP_STORE_SRC;
766 }
767 #else
768 elem_state.VertexBufferIndex = brw->vb.nr_buffers;
769 elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
770 if (vs_prog_data->uses_basevertex)
771 elem_state.Component0Control = VFCOMP_STORE_SRC;
772
773 if (vs_prog_data->uses_baseinstance)
774 elem_state.Component1Control = VFCOMP_STORE_SRC;
775
776 if (vs_prog_data->uses_vertexid)
777 elem_state.Component2Control = VFCOMP_STORE_VID;
778
779 if (vs_prog_data->uses_instanceid)
780 elem_state.Component3Control = VFCOMP_STORE_IID;
781 #endif
782
783 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
784 dw += GENX(VERTEX_ELEMENT_STATE_length);
785 }
786
787 if (vs_prog_data->uses_drawid) {
788 struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
789 .Valid = true,
790 .VertexBufferIndex = brw->vb.nr_buffers + 1,
791 .SourceElementFormat = ISL_FORMAT_R32_UINT,
792 .Component0Control = VFCOMP_STORE_SRC,
793 .Component1Control = VFCOMP_STORE_0,
794 .Component2Control = VFCOMP_STORE_0,
795 .Component3Control = VFCOMP_STORE_0,
796 #if GEN_GEN < 5
797 .DestinationElementOffset = i * 4,
798 #endif
799 };
800
801 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
802 dw += GENX(VERTEX_ELEMENT_STATE_length);
803 }
804
805 #if GEN_GEN >= 6
806 if (gen6_edgeflag_input) {
807 const uint32_t format =
808 brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
809
810 struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
811 .Valid = true,
812 .VertexBufferIndex = gen6_edgeflag_input->buffer,
813 .EdgeFlagEnable = true,
814 .SourceElementFormat = format,
815 .SourceElementOffset = gen6_edgeflag_input->offset,
816 .Component0Control = VFCOMP_STORE_SRC,
817 .Component1Control = VFCOMP_STORE_0,
818 .Component2Control = VFCOMP_STORE_0,
819 .Component3Control = VFCOMP_STORE_0,
820 };
821
822 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
823 dw += GENX(VERTEX_ELEMENT_STATE_length);
824 }
825 #endif
826
827 #if GEN_GEN >= 8
828 for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
829 const struct brw_vertex_element *input = brw->vb.enabled[i];
830 const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
831 unsigned element_index;
832
833 /* The edge flag element is reordered to be the last one in the code
834 * above so we need to compensate for that in the element indices used
835 * below.
836 */
837 if (input == gen6_edgeflag_input)
838 element_index = nr_elements - 1;
839 else
840 element_index = j++;
841
842 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
843 vfi.VertexElementIndex = element_index;
844 vfi.InstancingEnable = buffer->step_rate != 0;
845 vfi.InstanceDataStepRate = buffer->step_rate;
846 }
847 }
848
849 if (vs_prog_data->uses_drawid) {
850 const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
851
852 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
853 vfi.VertexElementIndex = element;
854 }
855 }
856 #endif
857 }
858
859 static const struct brw_tracked_state genX(vertices) = {
860 .dirty = {
861 .mesa = _NEW_POLYGON,
862 .brw = BRW_NEW_BATCH |
863 BRW_NEW_BLORP |
864 BRW_NEW_VERTICES |
865 BRW_NEW_VS_PROG_DATA,
866 },
867 .emit = genX(emit_vertices),
868 };
869
870 static void
871 genX(emit_index_buffer)(struct brw_context *brw)
872 {
873 const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
874
875 if (index_buffer == NULL)
876 return;
877
878 brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
879 #if GEN_GEN < 8 && !GEN_IS_HASWELL
880 ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
881 #endif
882 ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
883 ib.BufferStartingAddress = vertex_bo(brw->ib.bo, 0);
884 #if GEN_GEN >= 8
885 ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
886 ib.BufferSize = brw->ib.size;
887 #else
888 ib.BufferEndingAddress = vertex_bo(brw->ib.bo, brw->ib.size - 1);
889 #endif
890 }
891 }
892
893 static const struct brw_tracked_state genX(index_buffer) = {
894 .dirty = {
895 .mesa = 0,
896 .brw = BRW_NEW_BATCH |
897 BRW_NEW_BLORP |
898 BRW_NEW_INDEX_BUFFER,
899 },
900 .emit = genX(emit_index_buffer),
901 };
902
903 #if GEN_IS_HASWELL || GEN_GEN >= 8
904 static void
905 genX(upload_cut_index)(struct brw_context *brw)
906 {
907 const struct gl_context *ctx = &brw->ctx;
908
909 brw_batch_emit(brw, GENX(3DSTATE_VF), vf) {
910 if (ctx->Array._PrimitiveRestart && brw->ib.ib) {
911 vf.IndexedDrawCutIndexEnable = true;
912 vf.CutIndex = _mesa_primitive_restart_index(ctx, brw->ib.index_size);
913 }
914 }
915 }
916
917 const struct brw_tracked_state genX(cut_index) = {
918 .dirty = {
919 .mesa = _NEW_TRANSFORM,
920 .brw = BRW_NEW_INDEX_BUFFER,
921 },
922 .emit = genX(upload_cut_index),
923 };
924 #endif
925
926 #if GEN_GEN >= 6
927 /**
928 * Determine the appropriate attribute override value to store into the
929 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
930 * override value contains two pieces of information: the location of the
931 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
932 * flag indicating whether to "swizzle" the attribute based on the direction
933 * the triangle is facing.
934 *
935 * If an attribute is "swizzled", then the given VUE location is used for
936 * front-facing triangles, and the VUE location that immediately follows is
937 * used for back-facing triangles. We use this to implement the mapping from
938 * gl_FrontColor/gl_BackColor to gl_Color.
939 *
940 * urb_entry_read_offset is the offset into the VUE at which the SF unit is
941 * being instructed to begin reading attribute data. It can be set to a
942 * nonzero value to prevent the SF unit from wasting time reading elements of
943 * the VUE that are not needed by the fragment shader. It is measured in
944 * 256-bit increments.
945 */
946 static void
947 genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
948 const struct brw_vue_map *vue_map,
949 int urb_entry_read_offset, int fs_attr,
950 bool two_side_color, uint32_t *max_source_attr)
951 {
952 /* Find the VUE slot for this attribute. */
953 int slot = vue_map->varying_to_slot[fs_attr];
954
955 /* Viewport and Layer are stored in the VUE header. We need to override
956 * them to zero if earlier stages didn't write them, as GL requires that
957 * they read back as zero when not explicitly set.
958 */
959 if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
960 attr->ComponentOverrideX = true;
961 attr->ComponentOverrideW = true;
962 attr->ConstantSource = CONST_0000;
963
964 if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
965 attr->ComponentOverrideY = true;
966 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
967 attr->ComponentOverrideZ = true;
968
969 return;
970 }
971
972 /* If there was only a back color written but not front, use back
973 * as the color instead of undefined
974 */
975 if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
976 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
977 if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
978 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
979
980 if (slot == -1) {
981 /* This attribute does not exist in the VUE--that means that the vertex
982 * shader did not write to it. This means that either:
983 *
984 * (a) This attribute is a texture coordinate, and it is going to be
985 * replaced with point coordinates (as a consequence of a call to
986 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
987 * hardware will ignore whatever attribute override we supply.
988 *
989 * (b) This attribute is read by the fragment shader but not written by
990 * the vertex shader, so its value is undefined. Therefore the
991 * attribute override we supply doesn't matter.
992 *
993 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
994 * previous shader stage.
995 *
996 * Note that we don't have to worry about the cases where the attribute
997 * is gl_PointCoord or is undergoing point sprite coordinate
998 * replacement, because in those cases, this function isn't called.
999 *
1000 * In case (c), we need to program the attribute overrides so that the
1001 * primitive ID will be stored in this slot. In every other case, the
1002 * attribute override we supply doesn't matter. So just go ahead and
1003 * program primitive ID in every case.
1004 */
1005 attr->ComponentOverrideW = true;
1006 attr->ComponentOverrideX = true;
1007 attr->ComponentOverrideY = true;
1008 attr->ComponentOverrideZ = true;
1009 attr->ConstantSource = PRIM_ID;
1010 return;
1011 }
1012
1013 /* Compute the location of the attribute relative to urb_entry_read_offset.
1014 * Each increment of urb_entry_read_offset represents a 256-bit value, so
1015 * it counts for two 128-bit VUE slots.
1016 */
1017 int source_attr = slot - 2 * urb_entry_read_offset;
1018 assert(source_attr >= 0 && source_attr < 32);
1019
1020 /* If we are doing two-sided color, and the VUE slot following this one
1021 * represents a back-facing color, then we need to instruct the SF unit to
1022 * do back-facing swizzling.
1023 */
1024 bool swizzling = two_side_color &&
1025 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
1026 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
1027 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
1028 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
1029
1030 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
1031 if (*max_source_attr < source_attr + swizzling)
1032 *max_source_attr = source_attr + swizzling;
1033
1034 attr->SourceAttribute = source_attr;
1035 if (swizzling)
1036 attr->SwizzleSelect = INPUTATTR_FACING;
1037 }
1038
1039
1040 static void
1041 genX(calculate_attr_overrides)(const struct brw_context *brw,
1042 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
1043 uint32_t *point_sprite_enables,
1044 uint32_t *urb_entry_read_length,
1045 uint32_t *urb_entry_read_offset)
1046 {
1047 const struct gl_context *ctx = &brw->ctx;
1048
1049 /* _NEW_POINT */
1050 const struct gl_point_attrib *point = &ctx->Point;
1051
1052 /* BRW_NEW_FS_PROG_DATA */
1053 const struct brw_wm_prog_data *wm_prog_data =
1054 brw_wm_prog_data(brw->wm.base.prog_data);
1055 uint32_t max_source_attr = 0;
1056
1057 *point_sprite_enables = 0;
1058
1059 /* BRW_NEW_FRAGMENT_PROGRAM
1060 *
1061 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
1062 * the full vertex header. Otherwise, we can program the SF to start
1063 * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
1064 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
1065 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
1066 */
1067
1068 bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
1069 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
1070
1071 *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
1072
1073 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
1074 * description of dw10 Point Sprite Texture Coordinate Enable:
1075 *
1076 * "This field must be programmed to zero when non-point primitives
1077 * are rendered."
1078 *
1079 * The SandyBridge PRM doesn't explicitly say that point sprite enables
1080 * must be programmed to zero when rendering non-point primitives, but
1081 * the IvyBridge PRM does, and if we don't, we get garbage.
1082 *
1083 * This is not required on Haswell, as the hardware ignores this state
1084 * when drawing non-points -- although we do still need to be careful to
1085 * correctly set the attr overrides.
1086 *
1087 * _NEW_POLYGON
1088 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
1089 */
1090 bool drawing_points = brw_is_drawing_points(brw);
1091
1092 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1093 int input_index = wm_prog_data->urb_setup[attr];
1094
1095 if (input_index < 0)
1096 continue;
1097
1098 /* _NEW_POINT */
1099 bool point_sprite = false;
1100 if (drawing_points) {
1101 if (point->PointSprite &&
1102 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
1103 (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
1104 point_sprite = true;
1105 }
1106
1107 if (attr == VARYING_SLOT_PNTC)
1108 point_sprite = true;
1109
1110 if (point_sprite)
1111 *point_sprite_enables |= (1 << input_index);
1112 }
1113
1114 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
1115 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
1116
1117 if (!point_sprite) {
1118 genX(get_attr_override)(&attribute,
1119 &brw->vue_map_geom_out,
1120 *urb_entry_read_offset, attr,
1121 _mesa_vertex_program_two_side_enabled(ctx),
1122 &max_source_attr);
1123 }
1124
1125 /* The hardware can only do the overrides on 16 overrides at a
1126 * time, and the other up to 16 have to be lined up so that the
1127 * input index = the output index. We'll need to do some
1128 * tweaking to make sure that's the case.
1129 */
1130 if (input_index < 16)
1131 attr_overrides[input_index] = attribute;
1132 else
1133 assert(attribute.SourceAttribute == input_index);
1134 }
1135
1136 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
1137 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
1138 *
1139 * "This field should be set to the minimum length required to read the
1140 * maximum source attribute. The maximum source attribute is indicated
1141 * by the maximum value of the enabled Attribute # Source Attribute if
1142 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1143 * enable is not set.
1144 * read_length = ceiling((max_source_attr + 1) / 2)
1145 *
1146 * [errata] Corruption/Hang possible if length programmed larger than
1147 * recommended"
1148 *
1149 * Similar text exists for Ivy Bridge.
1150 */
1151 *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
1152 }
1153 #endif
1154
1155 /* ---------------------------------------------------------------------- */
1156
1157 #if GEN_GEN >= 6
1158 static void
1159 genX(upload_depth_stencil_state)(struct brw_context *brw)
1160 {
1161 struct gl_context *ctx = &brw->ctx;
1162
1163 /* _NEW_BUFFERS */
1164 struct intel_renderbuffer *depth_irb =
1165 intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
1166
1167 /* _NEW_DEPTH */
1168 struct gl_depthbuffer_attrib *depth = &ctx->Depth;
1169
1170 /* _NEW_STENCIL */
1171 struct gl_stencil_attrib *stencil = &ctx->Stencil;
1172 const int b = stencil->_BackFace;
1173
1174 #if GEN_GEN >= 8
1175 brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
1176 #else
1177 uint32_t ds_offset;
1178 brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, wmds) {
1179 #endif
1180 if (depth->Test && depth_irb) {
1181 wmds.DepthTestEnable = true;
1182 wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
1183 wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
1184 }
1185
1186 if (brw->stencil_enabled) {
1187 wmds.StencilTestEnable = true;
1188 wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
1189 wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
1190
1191 wmds.StencilTestFunction =
1192 intel_translate_compare_func(stencil->Function[0]);
1193 wmds.StencilFailOp =
1194 intel_translate_stencil_op(stencil->FailFunc[0]);
1195 wmds.StencilPassDepthPassOp =
1196 intel_translate_stencil_op(stencil->ZPassFunc[0]);
1197 wmds.StencilPassDepthFailOp =
1198 intel_translate_stencil_op(stencil->ZFailFunc[0]);
1199
1200 wmds.StencilBufferWriteEnable = brw->stencil_write_enabled;
1201
1202 if (brw->stencil_two_sided) {
1203 wmds.DoubleSidedStencilEnable = true;
1204 wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
1205 wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
1206
1207 wmds.BackfaceStencilTestFunction =
1208 intel_translate_compare_func(stencil->Function[b]);
1209 wmds.BackfaceStencilFailOp =
1210 intel_translate_stencil_op(stencil->FailFunc[b]);
1211 wmds.BackfaceStencilPassDepthPassOp =
1212 intel_translate_stencil_op(stencil->ZPassFunc[b]);
1213 wmds.BackfaceStencilPassDepthFailOp =
1214 intel_translate_stencil_op(stencil->ZFailFunc[b]);
1215 }
1216
1217 #if GEN_GEN >= 9
1218 wmds.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
1219 wmds.BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
1220 #endif
1221 }
1222 }
1223
1224 #if GEN_GEN == 6
1225 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
1226 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
1227 ptr.DEPTH_STENCIL_STATEChange = true;
1228 }
1229 #elif GEN_GEN == 7
1230 brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
1231 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
1232 }
1233 #endif
1234 }
1235
1236 static const struct brw_tracked_state genX(depth_stencil_state) = {
1237 .dirty = {
1238 .mesa = _NEW_BUFFERS |
1239 _NEW_DEPTH |
1240 _NEW_STENCIL,
1241 .brw = BRW_NEW_BLORP |
1242 (GEN_GEN >= 8 ? BRW_NEW_CONTEXT
1243 : BRW_NEW_BATCH |
1244 BRW_NEW_STATE_BASE_ADDRESS),
1245 },
1246 .emit = genX(upload_depth_stencil_state),
1247 };
1248 #endif
1249
1250 /* ---------------------------------------------------------------------- */
1251
1252 #if GEN_GEN >= 6
1253 static void
1254 genX(upload_clip_state)(struct brw_context *brw)
1255 {
1256 struct gl_context *ctx = &brw->ctx;
1257
1258 /* _NEW_BUFFERS */
1259 struct gl_framebuffer *fb = ctx->DrawBuffer;
1260
1261 /* BRW_NEW_FS_PROG_DATA */
1262 struct brw_wm_prog_data *wm_prog_data =
1263 brw_wm_prog_data(brw->wm.base.prog_data);
1264
1265 brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) {
1266 clip.StatisticsEnable = !brw->meta_in_progress;
1267
1268 if (wm_prog_data->barycentric_interp_modes &
1269 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
1270 clip.NonPerspectiveBarycentricEnable = true;
1271
1272 #if GEN_GEN >= 7
1273 clip.EarlyCullEnable = true;
1274 #endif
1275
1276 #if GEN_GEN == 7
1277 clip.FrontWinding = brw->polygon_front_bit == _mesa_is_user_fbo(fb);
1278
1279 if (ctx->Polygon.CullFlag) {
1280 switch (ctx->Polygon.CullFaceMode) {
1281 case GL_FRONT:
1282 clip.CullMode = CULLMODE_FRONT;
1283 break;
1284 case GL_BACK:
1285 clip.CullMode = CULLMODE_BACK;
1286 break;
1287 case GL_FRONT_AND_BACK:
1288 clip.CullMode = CULLMODE_BOTH;
1289 break;
1290 default:
1291 unreachable("Should not get here: invalid CullFlag");
1292 }
1293 } else {
1294 clip.CullMode = CULLMODE_NONE;
1295 }
1296 #endif
1297
1298 #if GEN_GEN < 8
1299 clip.UserClipDistanceCullTestEnableBitmask =
1300 brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
1301
1302 clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
1303 #endif
1304
1305 /* _NEW_LIGHT */
1306 if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
1307 clip.TriangleStripListProvokingVertexSelect = 0;
1308 clip.TriangleFanProvokingVertexSelect = 1;
1309 clip.LineStripListProvokingVertexSelect = 0;
1310 } else {
1311 clip.TriangleStripListProvokingVertexSelect = 2;
1312 clip.TriangleFanProvokingVertexSelect = 2;
1313 clip.LineStripListProvokingVertexSelect = 1;
1314 }
1315
1316 /* _NEW_TRANSFORM */
1317 clip.UserClipDistanceClipTestEnableBitmask =
1318 ctx->Transform.ClipPlanesEnabled;
1319
1320 #if GEN_GEN >= 8
1321 clip.ForceUserClipDistanceClipTestEnableBitmask = true;
1322 #endif
1323
1324 if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
1325 clip.APIMode = APIMODE_D3D;
1326 else
1327 clip.APIMode = APIMODE_OGL;
1328
1329 clip.GuardbandClipTestEnable = true;
1330
1331 /* BRW_NEW_VIEWPORT_COUNT */
1332 const unsigned viewport_count = brw->clip.viewport_count;
1333
1334 if (ctx->RasterDiscard) {
1335 clip.ClipMode = CLIPMODE_REJECT_ALL;
1336 #if GEN_GEN == 6
1337 perf_debug("Rasterizer discard is currently implemented via the "
1338 "clipper; having the GS not write primitives would "
1339 "likely be faster.\n");
1340 #endif
1341 } else {
1342 clip.ClipMode = CLIPMODE_NORMAL;
1343 }
1344
1345 clip.ClipEnable = true;
1346
1347 /* _NEW_POLYGON,
1348 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
1349 */
1350 if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw))
1351 clip.ViewportXYClipTestEnable = true;
1352
1353 clip.MinimumPointWidth = 0.125;
1354 clip.MaximumPointWidth = 255.875;
1355 clip.MaximumVPIndex = viewport_count - 1;
1356 if (_mesa_geometric_layers(fb) == 0)
1357 clip.ForceZeroRTAIndexEnable = true;
1358 }
1359 }
1360
1361 static const struct brw_tracked_state genX(clip_state) = {
1362 .dirty = {
1363 .mesa = _NEW_BUFFERS |
1364 _NEW_LIGHT |
1365 _NEW_POLYGON |
1366 _NEW_TRANSFORM,
1367 .brw = BRW_NEW_BLORP |
1368 BRW_NEW_CONTEXT |
1369 BRW_NEW_FS_PROG_DATA |
1370 BRW_NEW_GS_PROG_DATA |
1371 BRW_NEW_VS_PROG_DATA |
1372 BRW_NEW_META_IN_PROGRESS |
1373 BRW_NEW_PRIMITIVE |
1374 BRW_NEW_RASTERIZER_DISCARD |
1375 BRW_NEW_TES_PROG_DATA |
1376 BRW_NEW_VIEWPORT_COUNT,
1377 },
1378 .emit = genX(upload_clip_state),
1379 };
1380 #endif
1381
1382 /* ---------------------------------------------------------------------- */
1383
1384 static void
1385 genX(upload_sf)(struct brw_context *brw)
1386 {
1387 struct gl_context *ctx = &brw->ctx;
1388 float point_size;
1389
1390 #if GEN_GEN <= 7
1391 /* _NEW_BUFFERS */
1392 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
1393 UNUSED const bool multisampled_fbo =
1394 _mesa_geometric_samples(ctx->DrawBuffer) > 1;
1395 #endif
1396
1397 #if GEN_GEN < 6
1398 const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
1399
1400 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
1401
1402 brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
1403 sf.KernelStartPointer = KSP_ro(brw, brw->sf.prog_offset);
1404 sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
1405 sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
1406 sf.DispatchGRFStartRegisterForURBData = 3;
1407 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
1408 sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
1409 sf.NumberofURBEntries = brw->urb.nr_sf_entries;
1410 sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
1411
1412 /* STATE_PREFETCH command description describes this state as being
1413 * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
1414 * domain.
1415 */
1416 sf.SetupViewportStateOffset =
1417 instruction_ro_bo(brw->batch.bo, brw->sf.vp_offset);
1418
1419 sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
1420
1421 /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
1422 /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
1423
1424 sf.MaximumNumberofThreads =
1425 MIN2(GEN_GEN == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
1426
1427 sf.SpritePointEnable = ctx->Point.PointSprite;
1428
1429 sf.DestinationOriginHorizontalBias = 0.5;
1430 sf.DestinationOriginVerticalBias = 0.5;
1431 #else
1432 brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
1433 sf.StatisticsEnable = true;
1434 #endif
1435 sf.ViewportTransformEnable = true;
1436
1437 #if GEN_GEN == 7
1438 /* _NEW_BUFFERS */
1439 sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
1440 #endif
1441
1442 #if GEN_GEN <= 7
1443 /* _NEW_POLYGON */
1444 sf.FrontWinding = brw->polygon_front_bit == render_to_fbo;
1445 #if GEN_GEN >= 6
1446 sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
1447 sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
1448 sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
1449
1450 switch (ctx->Polygon.FrontMode) {
1451 case GL_FILL:
1452 sf.FrontFaceFillMode = FILL_MODE_SOLID;
1453 break;
1454 case GL_LINE:
1455 sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
1456 break;
1457 case GL_POINT:
1458 sf.FrontFaceFillMode = FILL_MODE_POINT;
1459 break;
1460 default:
1461 unreachable("not reached");
1462 }
1463
1464 switch (ctx->Polygon.BackMode) {
1465 case GL_FILL:
1466 sf.BackFaceFillMode = FILL_MODE_SOLID;
1467 break;
1468 case GL_LINE:
1469 sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
1470 break;
1471 case GL_POINT:
1472 sf.BackFaceFillMode = FILL_MODE_POINT;
1473 break;
1474 default:
1475 unreachable("not reached");
1476 }
1477
1478 if (multisampled_fbo && ctx->Multisample.Enabled)
1479 sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
1480
1481 sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
1482 sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
1483 sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
1484 #endif
1485
1486 sf.ScissorRectangleEnable = true;
1487
1488 if (ctx->Polygon.CullFlag) {
1489 switch (ctx->Polygon.CullFaceMode) {
1490 case GL_FRONT:
1491 sf.CullMode = CULLMODE_FRONT;
1492 break;
1493 case GL_BACK:
1494 sf.CullMode = CULLMODE_BACK;
1495 break;
1496 case GL_FRONT_AND_BACK:
1497 sf.CullMode = CULLMODE_BOTH;
1498 break;
1499 default:
1500 unreachable("not reached");
1501 }
1502 } else {
1503 sf.CullMode = CULLMODE_NONE;
1504 }
1505
1506 #if GEN_IS_HASWELL
1507 sf.LineStippleEnable = ctx->Line.StippleFlag;
1508 #endif
1509
1510 #endif
1511
1512 /* _NEW_LINE */
1513 #if GEN_GEN == 8
1514 if (brw->is_cherryview)
1515 sf.CHVLineWidth = brw_get_line_width(brw);
1516 else
1517 sf.LineWidth = brw_get_line_width(brw);
1518 #else
1519 sf.LineWidth = brw_get_line_width(brw);
1520 #endif
1521
1522 if (ctx->Line.SmoothFlag) {
1523 sf.LineEndCapAntialiasingRegionWidth = _10pixels;
1524 #if GEN_GEN <= 7
1525 sf.AntiAliasingEnable = true;
1526 #endif
1527 }
1528
1529 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
1530 point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
1531 /* Clamp to the hardware limits */
1532 sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
1533
1534 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
1535 if (use_state_point_size(brw))
1536 sf.PointWidthSource = State;
1537
1538 #if GEN_GEN >= 8
1539 /* _NEW_POINT | _NEW_MULTISAMPLE */
1540 if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
1541 !ctx->Point.PointSprite)
1542 sf.SmoothPointEnable = true;
1543 #endif
1544
1545 #if GEN_IS_G4X || GEN_GEN >= 5
1546 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
1547 #endif
1548
1549 /* _NEW_LIGHT */
1550 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
1551 sf.TriangleStripListProvokingVertexSelect = 2;
1552 sf.TriangleFanProvokingVertexSelect = 2;
1553 sf.LineStripListProvokingVertexSelect = 1;
1554 } else {
1555 sf.TriangleFanProvokingVertexSelect = 1;
1556 }
1557
1558 #if GEN_GEN == 6
1559 /* BRW_NEW_FS_PROG_DATA */
1560 const struct brw_wm_prog_data *wm_prog_data =
1561 brw_wm_prog_data(brw->wm.base.prog_data);
1562
1563 sf.AttributeSwizzleEnable = true;
1564 sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
1565
1566 /*
1567 * Window coordinates in an FBO are inverted, which means point
1568 * sprite origin must be inverted, too.
1569 */
1570 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
1571 sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
1572 } else {
1573 sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
1574 }
1575
1576 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
1577 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
1578 */
1579 uint32_t urb_entry_read_length;
1580 uint32_t urb_entry_read_offset;
1581 uint32_t point_sprite_enables;
1582 genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
1583 &urb_entry_read_length,
1584 &urb_entry_read_offset);
1585 sf.VertexURBEntryReadLength = urb_entry_read_length;
1586 sf.VertexURBEntryReadOffset = urb_entry_read_offset;
1587 sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
1588 sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
1589 #endif
1590 }
1591 }
1592
1593 static const struct brw_tracked_state genX(sf_state) = {
1594 .dirty = {
1595 .mesa = _NEW_LIGHT |
1596 _NEW_LINE |
1597 _NEW_POINT |
1598 _NEW_PROGRAM |
1599 (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
1600 (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
1601 .brw = BRW_NEW_BLORP |
1602 BRW_NEW_VUE_MAP_GEOM_OUT |
1603 (GEN_GEN <= 5 ? BRW_NEW_BATCH |
1604 BRW_NEW_PROGRAM_CACHE |
1605 BRW_NEW_SF_PROG_DATA |
1606 BRW_NEW_SF_VP |
1607 BRW_NEW_URB_FENCE
1608 : 0) |
1609 (GEN_GEN >= 6 ? BRW_NEW_CONTEXT : 0) |
1610 (GEN_GEN >= 6 && GEN_GEN <= 7 ?
1611 BRW_NEW_GS_PROG_DATA |
1612 BRW_NEW_PRIMITIVE |
1613 BRW_NEW_TES_PROG_DATA
1614 : 0) |
1615 (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA |
1616 BRW_NEW_FRAGMENT_PROGRAM
1617 : 0),
1618 },
1619 .emit = genX(upload_sf),
1620 };
1621
1622 /* ---------------------------------------------------------------------- */
1623
1624 #if GEN_GEN >= 6
1625 static void
1626 genX(upload_wm)(struct brw_context *brw)
1627 {
1628 struct gl_context *ctx = &brw->ctx;
1629
1630 /* BRW_NEW_FS_PROG_DATA */
1631 const struct brw_wm_prog_data *wm_prog_data =
1632 brw_wm_prog_data(brw->wm.base.prog_data);
1633
1634 UNUSED bool writes_depth =
1635 wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
1636
1637 #if GEN_GEN < 7
1638 const struct brw_stage_state *stage_state = &brw->wm.base;
1639 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1640
1641 /* We can't fold this into gen6_upload_wm_push_constants(), because
1642 * according to the SNB PRM, vol 2 part 1 section 7.2.2
1643 * (3DSTATE_CONSTANT_PS [DevSNB]):
1644 *
1645 * "[DevSNB]: This packet must be followed by WM_STATE."
1646 */
1647 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) {
1648 if (wm_prog_data->base.nr_params != 0) {
1649 wmcp.Buffer0Valid = true;
1650 /* Pointer to the WM constant buffer. Covered by the set of
1651 * state flags from gen6_upload_wm_push_constants.
1652 */
1653 wmcp.PointertoPSConstantBuffer0 = stage_state->push_const_offset;
1654 wmcp.PSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
1655 }
1656 }
1657 #endif
1658
1659 brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
1660 wm.StatisticsEnable = true;
1661 wm.LineAntialiasingRegionWidth = _10pixels;
1662 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
1663
1664 #if GEN_GEN < 7
1665 if (wm_prog_data->base.use_alt_mode)
1666 wm.FloatingPointMode = Alternate;
1667
1668 wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
1669 wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
1670 wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1671 wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
1672 wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
1673 wm.DispatchGRFStartRegisterForConstantSetupData0 =
1674 wm_prog_data->base.dispatch_grf_start_reg;
1675 wm.DispatchGRFStartRegisterForConstantSetupData2 =
1676 wm_prog_data->dispatch_grf_start_reg_2;
1677 wm.KernelStartPointer0 = stage_state->prog_offset;
1678 wm.KernelStartPointer2 = stage_state->prog_offset +
1679 wm_prog_data->prog_offset_2;
1680 wm.DualSourceBlendEnable =
1681 wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
1682 ctx->Color.Blend[0]._UsesDualSrc;
1683 wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1684 wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
1685
1686 /* From the SNB PRM, volume 2 part 1, page 281:
1687 * "If the PS kernel does not need the Position XY Offsets
1688 * to compute a Position XY value, then this field should be
1689 * programmed to POSOFFSET_NONE."
1690 *
1691 * "SW Recommendation: If the PS kernel needs the Position Offsets
1692 * to compute a Position XY value, this field should match Position
1693 * ZW Interpolation Mode to ensure a consistent position.xyzw
1694 * computation."
1695 * We only require XY sample offsets. So, this recommendation doesn't
1696 * look useful at the moment. We might need this in future.
1697 */
1698 if (wm_prog_data->uses_pos_offset)
1699 wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
1700 else
1701 wm.PositionXYOffsetSelect = POSOFFSET_NONE;
1702
1703 if (wm_prog_data->base.total_scratch) {
1704 wm.ScratchSpaceBasePointer =
1705 render_bo(stage_state->scratch_bo,
1706 ffs(stage_state->per_thread_scratch) - 11);
1707 }
1708
1709 wm.PixelShaderComputedDepth = writes_depth;
1710 #endif
1711
1712 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
1713
1714 /* _NEW_LINE */
1715 wm.LineStippleEnable = ctx->Line.StippleFlag;
1716
1717 /* _NEW_POLYGON */
1718 wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
1719 wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
1720
1721 #if GEN_GEN < 8
1722 /* _NEW_BUFFERS */
1723 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
1724
1725 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1726 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1727 if (wm_prog_data->uses_kill ||
1728 _mesa_is_alpha_test_enabled(ctx) ||
1729 _mesa_is_alpha_to_coverage_enabled(ctx) ||
1730 wm_prog_data->uses_omask) {
1731 wm.PixelShaderKillsPixel = true;
1732 }
1733
1734 /* _NEW_BUFFERS | _NEW_COLOR */
1735 if (brw_color_buffer_write_enabled(brw) || writes_depth ||
1736 wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
1737 wm.ThreadDispatchEnable = true;
1738 }
1739 if (multisampled_fbo) {
1740 /* _NEW_MULTISAMPLE */
1741 if (ctx->Multisample.Enabled)
1742 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
1743 else
1744 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
1745
1746 if (wm_prog_data->persample_dispatch)
1747 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1748 else
1749 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
1750 } else {
1751 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
1752 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1753 }
1754
1755 #if GEN_GEN >= 7
1756 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1757 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1758 #endif
1759
1760 /* The "UAV access enable" bits are unnecessary on HSW because they only
1761 * seem to have an effect on the HW-assisted coherency mechanism which we
1762 * don't need, and the rasterization-related UAV_ONLY flag and the
1763 * DISPATCH_ENABLE bit can be set independently from it.
1764 * C.f. gen8_upload_ps_extra().
1765 *
1766 * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
1767 * _NEW_COLOR
1768 */
1769 #if GEN_IS_HASWELL
1770 if (!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
1771 wm_prog_data->has_side_effects)
1772 wm.PSUAVonly = ON;
1773 #endif
1774 #endif
1775
1776 #if GEN_GEN >= 7
1777 /* BRW_NEW_FS_PROG_DATA */
1778 if (wm_prog_data->early_fragment_tests)
1779 wm.EarlyDepthStencilControl = EDSC_PREPS;
1780 else if (wm_prog_data->has_side_effects)
1781 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
1782 #endif
1783 }
1784 }
1785
1786 static const struct brw_tracked_state genX(wm_state) = {
1787 .dirty = {
1788 .mesa = _NEW_LINE |
1789 _NEW_POLYGON |
1790 (GEN_GEN < 8 ? _NEW_BUFFERS |
1791 _NEW_COLOR |
1792 _NEW_MULTISAMPLE :
1793 0) |
1794 (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
1795 .brw = BRW_NEW_BLORP |
1796 BRW_NEW_FS_PROG_DATA |
1797 (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
1798 },
1799 .emit = genX(upload_wm),
1800 };
1801 #endif
1802
1803 /* ---------------------------------------------------------------------- */
1804
1805 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
1806 pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
1807 pkt.SamplerCount = \
1808 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
1809 pkt.BindingTableEntryCount = \
1810 stage_prog_data->binding_table.size_bytes / 4; \
1811 pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
1812 \
1813 if (stage_prog_data->total_scratch) { \
1814 pkt.ScratchSpaceBasePointer = \
1815 render_bo(stage_state->scratch_bo, 0); \
1816 pkt.PerThreadScratchSpace = \
1817 ffs(stage_state->per_thread_scratch) - 11; \
1818 } \
1819 \
1820 pkt.DispatchGRFStartRegisterForURBData = \
1821 stage_prog_data->dispatch_grf_start_reg; \
1822 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
1823 pkt.prefix##URBEntryReadOffset = 0; \
1824 \
1825 pkt.StatisticsEnable = true; \
1826 pkt.Enable = true;
1827
1828 static void
1829 genX(upload_vs_state)(struct brw_context *brw)
1830 {
1831 UNUSED struct gl_context *ctx = &brw->ctx;
1832 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1833 struct brw_stage_state *stage_state = &brw->vs.base;
1834
1835 /* BRW_NEW_VS_PROG_DATA */
1836 const struct brw_vue_prog_data *vue_prog_data =
1837 brw_vue_prog_data(brw->vs.base.prog_data);
1838 const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
1839
1840 assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
1841 vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
1842
1843 #if GEN_GEN == 6
1844 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1845 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1846 *
1847 * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
1848 * command that causes the VS Function Enable to toggle. Pipeline
1849 * flush can be executed by sending a PIPE_CONTROL command with CS
1850 * stall bit set and a post sync operation.
1851 *
1852 * We've already done such a flush at the start of state upload, so we
1853 * don't need to do another one here.
1854 */
1855 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
1856 if (stage_state->push_const_size != 0) {
1857 cvs.Buffer0Valid = true;
1858 cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset;
1859 cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
1860 }
1861 }
1862 #endif
1863
1864 if (GEN_GEN == 7 && devinfo->is_ivybridge)
1865 gen7_emit_vs_workaround_flush(brw);
1866
1867 #if GEN_GEN >= 6
1868 brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
1869 #else
1870 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
1871 brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
1872 #endif
1873 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
1874
1875 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1876
1877 #if GEN_GEN < 6
1878 vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
1879 vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
1880 vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
1881
1882 vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
1883 vs.URBEntryAllocationSize = brw->urb.vsize - 1;
1884
1885 vs.MaximumNumberofThreads =
1886 CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
1887
1888 vs.StatisticsEnable = false;
1889 vs.SamplerStatePointer =
1890 instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
1891 #endif
1892
1893 #if GEN_GEN == 5
1894 /* Force single program flow on Ironlake. We cannot reliably get
1895 * all applications working without it. See:
1896 * https://bugs.freedesktop.org/show_bug.cgi?id=29172
1897 *
1898 * The most notable and reliably failing application is the Humus
1899 * demo "CelShading"
1900 */
1901 vs.SingleProgramFlow = true;
1902 vs.SamplerCount = 0; /* hardware requirement */
1903 #endif
1904
1905 #if GEN_GEN >= 8
1906 vs.SIMD8DispatchEnable =
1907 vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
1908
1909 vs.UserClipDistanceCullTestEnableBitmask =
1910 vue_prog_data->cull_distance_mask;
1911 #endif
1912 }
1913
1914 #if GEN_GEN == 6
1915 /* Based on my reading of the simulator, the VS constants don't get
1916 * pulled into the VS FF unit until an appropriate pipeline flush
1917 * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
1918 * references to them into a little FIFO. The flushes are common,
1919 * but don't reliably happen between this and a 3DPRIMITIVE, causing
1920 * the primitive to use the wrong constants. Then the FIFO
1921 * containing the constant setup gets added to again on the next
1922 * constants change, and eventually when a flush does happen the
1923 * unit is overwhelmed by constant changes and dies.
1924 *
1925 * To avoid this, send a PIPE_CONTROL down the line that will
1926 * update the unit immediately loading the constants. The flush
1927 * type bits here were those set by the STATE_BASE_ADDRESS whose
1928 * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
1929 * bug reports that led to this workaround, and may be more than
1930 * what is strictly required to avoid the issue.
1931 */
1932 brw_emit_pipe_control_flush(brw,
1933 PIPE_CONTROL_DEPTH_STALL |
1934 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
1935 PIPE_CONTROL_STATE_CACHE_INVALIDATE);
1936 #endif
1937 }
1938
1939 static const struct brw_tracked_state genX(vs_state) = {
1940 .dirty = {
1941 .mesa = (GEN_GEN == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0),
1942 .brw = BRW_NEW_BATCH |
1943 BRW_NEW_BLORP |
1944 BRW_NEW_CONTEXT |
1945 BRW_NEW_VS_PROG_DATA |
1946 (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
1947 (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
1948 BRW_NEW_PROGRAM_CACHE |
1949 BRW_NEW_SAMPLER_STATE_TABLE |
1950 BRW_NEW_URB_FENCE
1951 : 0),
1952 },
1953 .emit = genX(upload_vs_state),
1954 };
1955
1956 /* ---------------------------------------------------------------------- */
1957
1958 static void
1959 genX(upload_cc_viewport)(struct brw_context *brw)
1960 {
1961 struct gl_context *ctx = &brw->ctx;
1962
1963 /* BRW_NEW_VIEWPORT_COUNT */
1964 const unsigned viewport_count = brw->clip.viewport_count;
1965
1966 struct GENX(CC_VIEWPORT) ccv;
1967 uint32_t cc_vp_offset;
1968 uint32_t *cc_map =
1969 brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count,
1970 32, &cc_vp_offset);
1971
1972 for (unsigned i = 0; i < viewport_count; i++) {
1973 /* _NEW_VIEWPORT | _NEW_TRANSFORM */
1974 const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
1975 if (ctx->Transform.DepthClamp) {
1976 ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
1977 ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
1978 } else {
1979 ccv.MinimumDepth = 0.0;
1980 ccv.MaximumDepth = 1.0;
1981 }
1982 GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv);
1983 cc_map += GENX(CC_VIEWPORT_length);
1984 }
1985
1986 #if GEN_GEN >= 7
1987 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
1988 ptr.CCViewportPointer = cc_vp_offset;
1989 }
1990 #elif GEN_GEN == 6
1991 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
1992 vp.CCViewportStateChange = 1;
1993 vp.PointertoCC_VIEWPORT = cc_vp_offset;
1994 }
1995 #else
1996 brw->cc.vp_offset = cc_vp_offset;
1997 ctx->NewDriverState |= BRW_NEW_CC_VP;
1998 #endif
1999 }
2000
2001 const struct brw_tracked_state genX(cc_vp) = {
2002 .dirty = {
2003 .mesa = _NEW_TRANSFORM |
2004 _NEW_VIEWPORT,
2005 .brw = BRW_NEW_BATCH |
2006 BRW_NEW_BLORP |
2007 BRW_NEW_VIEWPORT_COUNT,
2008 },
2009 .emit = genX(upload_cc_viewport)
2010 };
2011
2012 /* ---------------------------------------------------------------------- */
2013
2014 static inline void
2015 set_scissor_bits(const struct gl_context *ctx, int i,
2016 bool render_to_fbo, unsigned fb_width, unsigned fb_height,
2017 struct GENX(SCISSOR_RECT) *sc)
2018 {
2019 int bbox[4];
2020
2021 bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
2022 bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
2023 bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
2024 bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
2025 _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
2026
2027 if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
2028 /* If the scissor was out of bounds and got clamped to 0 width/height
2029 * at the bounds, the subtraction of 1 from maximums could produce a
2030 * negative number and thus not clip anything. Instead, just provide
2031 * a min > max scissor inside the bounds, which produces the expected
2032 * no rendering.
2033 */
2034 sc->ScissorRectangleXMin = 1;
2035 sc->ScissorRectangleXMax = 0;
2036 sc->ScissorRectangleYMin = 1;
2037 sc->ScissorRectangleYMax = 0;
2038 } else if (render_to_fbo) {
2039 /* texmemory: Y=0=bottom */
2040 sc->ScissorRectangleXMin = bbox[0];
2041 sc->ScissorRectangleXMax = bbox[1] - 1;
2042 sc->ScissorRectangleYMin = bbox[2];
2043 sc->ScissorRectangleYMax = bbox[3] - 1;
2044 } else {
2045 /* memory: Y=0=top */
2046 sc->ScissorRectangleXMin = bbox[0];
2047 sc->ScissorRectangleXMax = bbox[1] - 1;
2048 sc->ScissorRectangleYMin = fb_height - bbox[3];
2049 sc->ScissorRectangleYMax = fb_height - bbox[2] - 1;
2050 }
2051 }
2052
2053 #if GEN_GEN >= 6
2054 static void
2055 genX(upload_scissor_state)(struct brw_context *brw)
2056 {
2057 struct gl_context *ctx = &brw->ctx;
2058 const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
2059 struct GENX(SCISSOR_RECT) scissor;
2060 uint32_t scissor_state_offset;
2061 const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
2062 const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
2063 uint32_t *scissor_map;
2064
2065 /* BRW_NEW_VIEWPORT_COUNT */
2066 const unsigned viewport_count = brw->clip.viewport_count;
2067
2068 scissor_map = brw_state_batch(
2069 brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count,
2070 32, &scissor_state_offset);
2071
2072 /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
2073
2074 /* The scissor only needs to handle the intersection of drawable and
2075 * scissor rect. Clipping to the boundaries of static shared buffers
2076 * for front/back/depth is covered by looping over cliprects in brw_draw.c.
2077 *
2078 * Note that the hardware's coordinates are inclusive, while Mesa's min is
2079 * inclusive but max is exclusive.
2080 */
2081 for (unsigned i = 0; i < viewport_count; i++) {
2082 set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, &scissor);
2083 GENX(SCISSOR_RECT_pack)(
2084 NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
2085 }
2086
2087 brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
2088 ptr.ScissorRectPointer = scissor_state_offset;
2089 }
2090 }
2091
2092 static const struct brw_tracked_state genX(scissor_state) = {
2093 .dirty = {
2094 .mesa = _NEW_BUFFERS |
2095 _NEW_SCISSOR |
2096 _NEW_VIEWPORT,
2097 .brw = BRW_NEW_BATCH |
2098 BRW_NEW_BLORP |
2099 BRW_NEW_VIEWPORT_COUNT,
2100 },
2101 .emit = genX(upload_scissor_state),
2102 };
2103 #endif
2104
2105 /* ---------------------------------------------------------------------- */
2106
2107 static void
2108 brw_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
2109 float m00, float m11, float m30, float m31,
2110 float *xmin, float *xmax,
2111 float *ymin, float *ymax)
2112 {
2113 /* According to the "Vertex X,Y Clamping and Quantization" section of the
2114 * Strips and Fans documentation:
2115 *
2116 * "The vertex X and Y screen-space coordinates are also /clamped/ to the
2117 * fixed-point "guardband" range supported by the rasterization hardware"
2118 *
2119 * and
2120 *
2121 * "In almost all circumstances, if an object’s vertices are actually
2122 * modified by this clamping (i.e., had X or Y coordinates outside of
2123 * the guardband extent the rendered object will not match the intended
2124 * result. Therefore software should take steps to ensure that this does
2125 * not happen - e.g., by clipping objects such that they do not exceed
2126 * these limits after the Drawing Rectangle is applied."
2127 *
2128 * I believe the fundamental restriction is that the rasterizer (in
2129 * the SF/WM stages) have a limit on the number of pixels that can be
2130 * rasterized. We need to ensure any coordinates beyond the rasterizer
2131 * limit are handled by the clipper. So effectively that limit becomes
2132 * the clipper's guardband size.
2133 *
2134 * It goes on to say:
2135 *
2136 * "In addition, in order to be correctly rendered, objects must have a
2137 * screenspace bounding box not exceeding 8K in the X or Y direction.
2138 * This additional restriction must also be comprehended by software,
2139 * i.e., enforced by use of clipping."
2140 *
2141 * This makes no sense. Gen7+ hardware supports 16K render targets,
2142 * and you definitely need to be able to draw polygons that fill the
2143 * surface. Our assumption is that the rasterizer was limited to 8K
2144 * on Sandybridge, which only supports 8K surfaces, and it was actually
2145 * increased to 16K on Ivybridge and later.
2146 *
2147 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
2148 */
2149 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
2150
2151 if (m00 != 0 && m11 != 0) {
2152 /* First, we compute the screen-space render area */
2153 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
2154 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00);
2155 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11);
2156 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11);
2157
2158 /* We want the guardband to be centered on that */
2159 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size;
2160 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size;
2161 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size;
2162 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size;
2163
2164 /* Now we need it in native device coordinates */
2165 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00;
2166 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00;
2167 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11;
2168 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11;
2169
2170 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be
2171 * flipped upside-down. X should be fine though.
2172 */
2173 assert(ndc_gb_xmin <= ndc_gb_xmax);
2174 *xmin = ndc_gb_xmin;
2175 *xmax = ndc_gb_xmax;
2176 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax);
2177 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax);
2178 } else {
2179 /* The viewport scales to 0, so nothing will be rendered. */
2180 *xmin = 0.0f;
2181 *xmax = 0.0f;
2182 *ymin = 0.0f;
2183 *ymax = 0.0f;
2184 }
2185 }
2186
2187 static void
2188 genX(upload_sf_clip_viewport)(struct brw_context *brw)
2189 {
2190 struct gl_context *ctx = &brw->ctx;
2191 float y_scale, y_bias;
2192
2193 /* BRW_NEW_VIEWPORT_COUNT */
2194 const unsigned viewport_count = brw->clip.viewport_count;
2195
2196 /* _NEW_BUFFERS */
2197 const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
2198 const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer);
2199 const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
2200
2201 #if GEN_GEN >= 7
2202 #define clv sfv
2203 struct GENX(SF_CLIP_VIEWPORT) sfv;
2204 uint32_t sf_clip_vp_offset;
2205 uint32_t *sf_clip_map =
2206 brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count,
2207 64, &sf_clip_vp_offset);
2208 #else
2209 struct GENX(SF_VIEWPORT) sfv;
2210 struct GENX(CLIP_VIEWPORT) clv;
2211 uint32_t sf_vp_offset, clip_vp_offset;
2212 uint32_t *sf_map =
2213 brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count,
2214 32, &sf_vp_offset);
2215 uint32_t *clip_map =
2216 brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count,
2217 32, &clip_vp_offset);
2218 #endif
2219
2220 /* _NEW_BUFFERS */
2221 if (render_to_fbo) {
2222 y_scale = 1.0;
2223 y_bias = 0;
2224 } else {
2225 y_scale = -1.0;
2226 y_bias = (float)fb_height;
2227 }
2228
2229 for (unsigned i = 0; i < brw->clip.viewport_count; i++) {
2230 /* _NEW_VIEWPORT: Guardband Clipping */
2231 float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax;
2232 _mesa_get_viewport_xform(ctx, i, scale, translate);
2233
2234 sfv.ViewportMatrixElementm00 = scale[0];
2235 sfv.ViewportMatrixElementm11 = scale[1] * y_scale,
2236 sfv.ViewportMatrixElementm22 = scale[2],
2237 sfv.ViewportMatrixElementm30 = translate[0],
2238 sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias,
2239 sfv.ViewportMatrixElementm32 = translate[2],
2240 brw_calculate_guardband_size(fb_width, fb_height,
2241 sfv.ViewportMatrixElementm00,
2242 sfv.ViewportMatrixElementm11,
2243 sfv.ViewportMatrixElementm30,
2244 sfv.ViewportMatrixElementm31,
2245 &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
2246
2247
2248 clv.XMinClipGuardband = gb_xmin;
2249 clv.XMaxClipGuardband = gb_xmax;
2250 clv.YMinClipGuardband = gb_ymin;
2251 clv.YMaxClipGuardband = gb_ymax;
2252
2253 #if GEN_GEN < 6
2254 set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height,
2255 &sfv.ScissorRectangle);
2256 #elif GEN_GEN >= 8
2257 /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
2258 * The hardware will take the intersection of the drawing rectangle,
2259 * scissor rectangle, and the viewport extents. We don't need to be
2260 * smart, and can therefore just program the viewport extents.
2261 */
2262 const float viewport_Xmax =
2263 ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width;
2264 const float viewport_Ymax =
2265 ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height;
2266
2267 if (render_to_fbo) {
2268 sfv.XMinViewPort = ctx->ViewportArray[i].X;
2269 sfv.XMaxViewPort = viewport_Xmax - 1;
2270 sfv.YMinViewPort = ctx->ViewportArray[i].Y;
2271 sfv.YMaxViewPort = viewport_Ymax - 1;
2272 } else {
2273 sfv.XMinViewPort = ctx->ViewportArray[i].X;
2274 sfv.XMaxViewPort = viewport_Xmax - 1;
2275 sfv.YMinViewPort = fb_height - viewport_Ymax;
2276 sfv.YMaxViewPort = fb_height - ctx->ViewportArray[i].Y - 1;
2277 }
2278 #endif
2279
2280 #if GEN_GEN >= 7
2281 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv);
2282 sf_clip_map += GENX(SF_CLIP_VIEWPORT_length);
2283 #else
2284 GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv);
2285 GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv);
2286 sf_map += GENX(SF_VIEWPORT_length);
2287 clip_map += GENX(CLIP_VIEWPORT_length);
2288 #endif
2289 }
2290
2291 #if GEN_GEN >= 7
2292 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
2293 ptr.SFClipViewportPointer = sf_clip_vp_offset;
2294 }
2295 #elif GEN_GEN == 6
2296 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
2297 vp.SFViewportStateChange = 1;
2298 vp.CLIPViewportStateChange = 1;
2299 vp.PointertoCLIP_VIEWPORT = clip_vp_offset;
2300 vp.PointertoSF_VIEWPORT = sf_vp_offset;
2301 }
2302 #else
2303 brw->sf.vp_offset = sf_vp_offset;
2304 brw->clip.vp_offset = clip_vp_offset;
2305 brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP;
2306 #endif
2307 }
2308
2309 static const struct brw_tracked_state genX(sf_clip_viewport) = {
2310 .dirty = {
2311 .mesa = _NEW_BUFFERS |
2312 _NEW_VIEWPORT |
2313 (GEN_GEN <= 5 ? _NEW_SCISSOR : 0),
2314 .brw = BRW_NEW_BATCH |
2315 BRW_NEW_BLORP |
2316 BRW_NEW_VIEWPORT_COUNT,
2317 },
2318 .emit = genX(upload_sf_clip_viewport),
2319 };
2320
2321 /* ---------------------------------------------------------------------- */
2322
2323 #if GEN_GEN >= 6
2324 static void
2325 genX(upload_gs_state)(struct brw_context *brw)
2326 {
2327 const struct gen_device_info *devinfo = &brw->screen->devinfo;
2328 const struct brw_stage_state *stage_state = &brw->gs.base;
2329 /* BRW_NEW_GEOMETRY_PROGRAM */
2330 bool active = brw->geometry_program;
2331
2332 /* BRW_NEW_GS_PROG_DATA */
2333 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
2334 const struct brw_vue_prog_data *vue_prog_data =
2335 brw_vue_prog_data(stage_prog_data);
2336 #if GEN_GEN >= 7
2337 const struct brw_gs_prog_data *gs_prog_data =
2338 brw_gs_prog_data(stage_prog_data);
2339 #endif
2340
2341 #if GEN_GEN < 7
2342 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
2343 if (active && stage_state->push_const_size != 0) {
2344 cgs.Buffer0Valid = true;
2345 cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset;
2346 cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
2347 }
2348 }
2349 #endif
2350
2351 #if GEN_GEN == 7 && !GEN_IS_HASWELL
2352 /**
2353 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
2354 * Geometry > Geometry Shader > State:
2355 *
2356 * "Note: Because of corruption in IVB:GT2, software needs to flush the
2357 * whole fixed function pipeline when the GS enable changes value in
2358 * the 3DSTATE_GS."
2359 *
2360 * The hardware architects have clarified that in this context "flush the
2361 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
2362 * Stall" bit set.
2363 */
2364 if (brw->gt == 2 && brw->gs.enabled != active)
2365 gen7_emit_cs_stall_flush(brw);
2366 #endif
2367
2368 if (active) {
2369 brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
2370 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
2371
2372 #if GEN_GEN >= 7
2373 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
2374 gs.OutputTopology = gs_prog_data->output_topology;
2375 gs.ControlDataHeaderSize =
2376 gs_prog_data->control_data_header_size_hwords;
2377
2378 gs.InstanceControl = gs_prog_data->invocations - 1;
2379 gs.DispatchMode = vue_prog_data->dispatch_mode;
2380
2381 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
2382
2383 gs.ControlDataFormat = gs_prog_data->control_data_format;
2384 #endif
2385
2386 /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
2387 * Ivy Bridge and Haswell.
2388 *
2389 * On Ivy Bridge, setting this bit causes the vertices of a triangle
2390 * strip to be delivered to the geometry shader in an order that does
2391 * not strictly follow the OpenGL spec, but preserves triangle
2392 * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then
2393 * the geometry shader sees triangles:
2394 *
2395 * (1, 2, 3), (2, 4, 3), (3, 4, 5)
2396 *
2397 * (Clearing the bit is even worse, because it fails to preserve
2398 * orientation).
2399 *
2400 * Triangle strips with adjacency always ordered in a way that preserves
2401 * triangle orientation but does not strictly follow the OpenGL spec,
2402 * regardless of the setting of this bit.
2403 *
2404 * On Haswell, both triangle strips and triangle strips with adjacency
2405 * are always ordered in a way that preserves triangle orientation.
2406 * Setting this bit causes the ordering to strictly follow the OpenGL
2407 * spec.
2408 *
2409 * So in either case we want to set the bit. Unfortunately on Ivy
2410 * Bridge this will get the order close to correct but not perfect.
2411 */
2412 gs.ReorderMode = TRAILING;
2413 gs.MaximumNumberofThreads =
2414 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
2415 : (devinfo->max_gs_threads - 1);
2416
2417 #if GEN_GEN < 7
2418 gs.SOStatisticsEnable = true;
2419 gs.RenderingEnabled = 1;
2420 if (brw->geometry_program->info.has_transform_feedback_varyings)
2421 gs.SVBIPayloadEnable = true;
2422
2423 /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
2424 * was previously done for gen6.
2425 *
2426 * TODO: test with both disabled to see if the HW is behaving
2427 * as expected, like in gen7.
2428 */
2429 gs.SingleProgramFlow = true;
2430 gs.VectorMaskEnable = true;
2431 #endif
2432
2433 #if GEN_GEN >= 8
2434 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
2435
2436 if (gs_prog_data->static_vertex_count != -1) {
2437 gs.StaticOutput = true;
2438 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
2439 }
2440 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
2441
2442 gs.UserClipDistanceCullTestEnableBitmask =
2443 vue_prog_data->cull_distance_mask;
2444
2445 const int urb_entry_write_offset = 1;
2446 const uint32_t urb_entry_output_length =
2447 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
2448 urb_entry_write_offset;
2449
2450 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
2451 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
2452 #endif
2453 }
2454 #if GEN_GEN < 7
2455 } else if (brw->ff_gs.prog_active) {
2456 /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
2457 * program. This function provides the needed 3DSTATE_GS for this.
2458 */
2459 upload_gs_state_for_tf(brw);
2460 #endif
2461 } else {
2462 brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
2463 gs.StatisticsEnable = true;
2464 #if GEN_GEN < 7
2465 gs.RenderingEnabled = true;
2466 #endif
2467
2468 #if GEN_GEN < 8
2469 gs.DispatchGRFStartRegisterForURBData = 1;
2470 #if GEN_GEN >= 7
2471 gs.IncludeVertexHandles = true;
2472 #endif
2473 #endif
2474 }
2475 }
2476 #if GEN_GEN < 7
2477 brw->gs.enabled = active;
2478 #endif
2479 }
2480
2481 static const struct brw_tracked_state genX(gs_state) = {
2482 .dirty = {
2483 .mesa = (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
2484 .brw = BRW_NEW_BATCH |
2485 BRW_NEW_BLORP |
2486 BRW_NEW_CONTEXT |
2487 BRW_NEW_GEOMETRY_PROGRAM |
2488 BRW_NEW_GS_PROG_DATA |
2489 (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0),
2490 },
2491 .emit = genX(upload_gs_state),
2492 };
2493 #endif
2494
2495 /* ---------------------------------------------------------------------- */
2496
2497 UNUSED static GLenum
2498 fix_dual_blend_alpha_to_one(GLenum function)
2499 {
2500 switch (function) {
2501 case GL_SRC1_ALPHA:
2502 return GL_ONE;
2503
2504 case GL_ONE_MINUS_SRC1_ALPHA:
2505 return GL_ZERO;
2506 }
2507
2508 return function;
2509 }
2510
2511 #define blend_factor(x) brw_translate_blend_factor(x)
2512 #define blend_eqn(x) brw_translate_blend_equation(x)
2513
2514 #if GEN_GEN >= 6
2515 static void
2516 genX(upload_blend_state)(struct brw_context *brw)
2517 {
2518 struct gl_context *ctx = &brw->ctx;
2519 int size;
2520
2521 /* We need at least one BLEND_STATE written, because we might do
2522 * thread dispatch even if _NumColorDrawBuffers is 0 (for example
2523 * for computed depth or alpha test), which will do an FB write
2524 * with render target 0, which will reference BLEND_STATE[0] for
2525 * alpha test enable.
2526 */
2527 int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
2528 if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
2529 nr_draw_buffers = 1;
2530
2531 size = GENX(BLEND_STATE_ENTRY_length) * 4 * nr_draw_buffers;
2532 #if GEN_GEN >= 8
2533 size += GENX(BLEND_STATE_length) * 4;
2534 #endif
2535
2536 uint32_t *blend_map;
2537 blend_map = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset);
2538
2539 #if GEN_GEN >= 8
2540 struct GENX(BLEND_STATE) blend = { 0 };
2541 {
2542 #else
2543 for (int i = 0; i < nr_draw_buffers; i++) {
2544 struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
2545 #define blend entry
2546 #endif
2547 /* OpenGL specification 3.3 (page 196), section 4.1.3 says:
2548 * "If drawbuffer zero is not NONE and the buffer it references has an
2549 * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
2550 * operations are skipped."
2551 */
2552 if (!(ctx->DrawBuffer->_IntegerBuffers & 0x1)) {
2553 /* _NEW_MULTISAMPLE */
2554 if (_mesa_is_multisample_enabled(ctx)) {
2555 if (ctx->Multisample.SampleAlphaToCoverage) {
2556 blend.AlphaToCoverageEnable = true;
2557 blend.AlphaToCoverageDitherEnable = GEN_GEN >= 7;
2558 }
2559 if (ctx->Multisample.SampleAlphaToOne)
2560 blend.AlphaToOneEnable = true;
2561 }
2562
2563 /* _NEW_COLOR */
2564 if (ctx->Color.AlphaEnabled) {
2565 blend.AlphaTestEnable = true;
2566 blend.AlphaTestFunction =
2567 intel_translate_compare_func(ctx->Color.AlphaFunc);
2568 }
2569
2570 if (ctx->Color.DitherFlag) {
2571 blend.ColorDitherEnable = true;
2572 }
2573 }
2574
2575 #if GEN_GEN >= 8
2576 for (int i = 0; i < nr_draw_buffers; i++) {
2577 struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
2578 #else
2579 {
2580 #endif
2581
2582 /* _NEW_BUFFERS */
2583 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
2584
2585 /* Used for implementing the following bit of GL_EXT_texture_integer:
2586 * "Per-fragment operations that require floating-point color
2587 * components, including multisample alpha operations, alpha test,
2588 * blending, and dithering, have no effect when the corresponding
2589 * colors are written to an integer color buffer."
2590 */
2591 bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
2592
2593 /* _NEW_COLOR */
2594 if (ctx->Color.ColorLogicOpEnabled) {
2595 GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
2596 : GL_UNSIGNED_NORMALIZED;
2597 WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
2598 rb_type != GL_UNSIGNED_NORMALIZED &&
2599 rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
2600 "renderbuffer\n",
2601 _mesa_enum_to_string(ctx->Color.LogicOp),
2602 _mesa_enum_to_string(rb_type));
2603 if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
2604 entry.LogicOpEnable = true;
2605 entry.LogicOpFunction =
2606 intel_translate_logic_op(ctx->Color.LogicOp);
2607 }
2608 } else if (ctx->Color.BlendEnabled & (1 << i) && !integer &&
2609 !ctx->Color._AdvancedBlendMode) {
2610 GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
2611 GLenum eqA = ctx->Color.Blend[i].EquationA;
2612 GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
2613 GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
2614 GLenum srcA = ctx->Color.Blend[i].SrcA;
2615 GLenum dstA = ctx->Color.Blend[i].DstA;
2616
2617 if (eqRGB == GL_MIN || eqRGB == GL_MAX)
2618 srcRGB = dstRGB = GL_ONE;
2619
2620 if (eqA == GL_MIN || eqA == GL_MAX)
2621 srcA = dstA = GL_ONE;
2622
2623 /* Due to hardware limitations, the destination may have information
2624 * in an alpha channel even when the format specifies no alpha
2625 * channel. In order to avoid getting any incorrect blending due to
2626 * that alpha channel, coerce the blend factors to values that will
2627 * not read the alpha channel, but will instead use the correct
2628 * implicit value for alpha.
2629 */
2630 if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
2631 GL_TEXTURE_ALPHA_TYPE)) {
2632 srcRGB = brw_fix_xRGB_alpha(srcRGB);
2633 srcA = brw_fix_xRGB_alpha(srcA);
2634 dstRGB = brw_fix_xRGB_alpha(dstRGB);
2635 dstA = brw_fix_xRGB_alpha(dstA);
2636 }
2637
2638 /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
2639 * "If Dual Source Blending is enabled, this bit must be disabled."
2640 *
2641 * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
2642 * and leave it enabled anyway.
2643 */
2644 if (ctx->Color.Blend[i]._UsesDualSrc && blend.AlphaToOneEnable) {
2645 srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
2646 srcA = fix_dual_blend_alpha_to_one(srcA);
2647 dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
2648 dstA = fix_dual_blend_alpha_to_one(dstA);
2649 }
2650
2651 entry.ColorBufferBlendEnable = true;
2652 entry.DestinationBlendFactor = blend_factor(dstRGB);
2653 entry.SourceBlendFactor = blend_factor(srcRGB);
2654 entry.DestinationAlphaBlendFactor = blend_factor(dstA);
2655 entry.SourceAlphaBlendFactor = blend_factor(srcA);
2656 entry.ColorBlendFunction = blend_eqn(eqRGB);
2657 entry.AlphaBlendFunction = blend_eqn(eqA);
2658
2659 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
2660 blend.IndependentAlphaBlendEnable = true;
2661 }
2662
2663 /* See section 8.1.6 "Pre-Blend Color Clamping" of the
2664 * SandyBridge PRM Volume 2 Part 1 for HW requirements.
2665 *
2666 * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR
2667 * clamping in the fragment shader. For its clamping of
2668 * blending, the spec says:
2669 *
2670 * "RESOLVED: For fixed-point color buffers, the inputs and
2671 * the result of the blending equation are clamped. For
2672 * floating-point color buffers, no clamping occurs."
2673 *
2674 * So, generally, we want clamping to the render target's range.
2675 * And, good news, the hardware tables for both pre- and
2676 * post-blend color clamping are either ignored, or any are
2677 * allowed, or clamping is required but RT range clamping is a
2678 * valid option.
2679 */
2680 entry.PreBlendColorClampEnable = true;
2681 entry.PostBlendColorClampEnable = true;
2682 entry.ColorClampRange = COLORCLAMP_RTFORMAT;
2683
2684 entry.WriteDisableRed = !ctx->Color.ColorMask[i][0];
2685 entry.WriteDisableGreen = !ctx->Color.ColorMask[i][1];
2686 entry.WriteDisableBlue = !ctx->Color.ColorMask[i][2];
2687 entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
2688
2689 #if GEN_GEN >= 8
2690 GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
2691 #else
2692 GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);
2693 #endif
2694 }
2695 }
2696
2697 #if GEN_GEN >= 8
2698 GENX(BLEND_STATE_pack)(NULL, blend_map, &blend);
2699 #endif
2700
2701 #if GEN_GEN < 7
2702 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
2703 ptr.PointertoBLEND_STATE = brw->cc.blend_state_offset;
2704 ptr.BLEND_STATEChange = true;
2705 }
2706 #else
2707 brw_batch_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
2708 ptr.BlendStatePointer = brw->cc.blend_state_offset;
2709 #if GEN_GEN >= 8
2710 ptr.BlendStatePointerValid = true;
2711 #endif
2712 }
2713 #endif
2714 }
2715
2716 static const struct brw_tracked_state genX(blend_state) = {
2717 .dirty = {
2718 .mesa = _NEW_BUFFERS |
2719 _NEW_COLOR |
2720 _NEW_MULTISAMPLE,
2721 .brw = BRW_NEW_BATCH |
2722 BRW_NEW_BLORP |
2723 BRW_NEW_STATE_BASE_ADDRESS,
2724 },
2725 .emit = genX(upload_blend_state),
2726 };
2727 #endif
2728
2729 /* ---------------------------------------------------------------------- */
2730
2731 #if GEN_GEN >= 7
2732 UNUSED static const uint32_t push_constant_opcodes[] = {
2733 [MESA_SHADER_VERTEX] = 21,
2734 [MESA_SHADER_TESS_CTRL] = 25, /* HS */
2735 [MESA_SHADER_TESS_EVAL] = 26, /* DS */
2736 [MESA_SHADER_GEOMETRY] = 22,
2737 [MESA_SHADER_FRAGMENT] = 23,
2738 [MESA_SHADER_COMPUTE] = 0,
2739 };
2740
2741 static void
2742 upload_constant_state(struct brw_context *brw,
2743 struct brw_stage_state *stage_state,
2744 bool active, uint32_t stage)
2745 {
2746 UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
2747 active = active && stage_state->push_const_size != 0;
2748
2749 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
2750 pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
2751 if (active) {
2752 #if GEN_GEN >= 8 || GEN_IS_HASWELL
2753 pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
2754 pkt.ConstantBody.Buffer[2] =
2755 render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
2756 #else
2757 pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
2758 pkt.ConstantBody.Buffer[0].offset =
2759 stage_state->push_const_offset | mocs;
2760 #endif
2761 }
2762 }
2763
2764 brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
2765 }
2766 #endif
2767
2768 #if GEN_GEN >= 6
2769 static void
2770 genX(upload_vs_push_constants)(struct brw_context *brw)
2771 {
2772 struct brw_stage_state *stage_state = &brw->vs.base;
2773
2774 /* _BRW_NEW_VERTEX_PROGRAM */
2775 const struct brw_program *vp = brw_program_const(brw->vertex_program);
2776 /* BRW_NEW_VS_PROG_DATA */
2777 const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
2778
2779 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
2780 gen6_upload_push_constants(brw, &vp->program, prog_data, stage_state);
2781
2782 #if GEN_GEN >= 7
2783 if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail)
2784 gen7_emit_vs_workaround_flush(brw);
2785
2786 upload_constant_state(brw, stage_state, true /* active */,
2787 MESA_SHADER_VERTEX);
2788 #endif
2789 }
2790
2791 static const struct brw_tracked_state genX(vs_push_constants) = {
2792 .dirty = {
2793 .mesa = _NEW_PROGRAM_CONSTANTS |
2794 _NEW_TRANSFORM,
2795 .brw = BRW_NEW_BATCH |
2796 BRW_NEW_BLORP |
2797 BRW_NEW_PUSH_CONSTANT_ALLOCATION |
2798 BRW_NEW_VERTEX_PROGRAM |
2799 BRW_NEW_VS_PROG_DATA,
2800 },
2801 .emit = genX(upload_vs_push_constants),
2802 };
2803
2804 static void
2805 genX(upload_gs_push_constants)(struct brw_context *brw)
2806 {
2807 struct brw_stage_state *stage_state = &brw->gs.base;
2808
2809 /* BRW_NEW_GEOMETRY_PROGRAM */
2810 const struct brw_program *gp = brw_program_const(brw->geometry_program);
2811
2812 if (gp) {
2813 /* BRW_NEW_GS_PROG_DATA */
2814 struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
2815
2816 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
2817 gen6_upload_push_constants(brw, &gp->program, prog_data, stage_state);
2818 }
2819
2820 #if GEN_GEN >= 7
2821 upload_constant_state(brw, stage_state, gp, MESA_SHADER_GEOMETRY);
2822 #endif
2823 }
2824
2825 static const struct brw_tracked_state genX(gs_push_constants) = {
2826 .dirty = {
2827 .mesa = _NEW_PROGRAM_CONSTANTS |
2828 _NEW_TRANSFORM,
2829 .brw = BRW_NEW_BATCH |
2830 BRW_NEW_BLORP |
2831 BRW_NEW_GEOMETRY_PROGRAM |
2832 BRW_NEW_GS_PROG_DATA |
2833 BRW_NEW_PUSH_CONSTANT_ALLOCATION,
2834 },
2835 .emit = genX(upload_gs_push_constants),
2836 };
2837
2838 static void
2839 genX(upload_wm_push_constants)(struct brw_context *brw)
2840 {
2841 struct brw_stage_state *stage_state = &brw->wm.base;
2842 /* BRW_NEW_FRAGMENT_PROGRAM */
2843 const struct brw_program *fp = brw_program_const(brw->fragment_program);
2844 /* BRW_NEW_FS_PROG_DATA */
2845 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
2846
2847 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
2848
2849 gen6_upload_push_constants(brw, &fp->program, prog_data, stage_state);
2850
2851 #if GEN_GEN >= 7
2852 upload_constant_state(brw, stage_state, true, MESA_SHADER_FRAGMENT);
2853 #endif
2854 }
2855
2856 static const struct brw_tracked_state genX(wm_push_constants) = {
2857 .dirty = {
2858 .mesa = _NEW_PROGRAM_CONSTANTS,
2859 .brw = BRW_NEW_BATCH |
2860 BRW_NEW_BLORP |
2861 BRW_NEW_FRAGMENT_PROGRAM |
2862 BRW_NEW_FS_PROG_DATA |
2863 BRW_NEW_PUSH_CONSTANT_ALLOCATION,
2864 },
2865 .emit = genX(upload_wm_push_constants),
2866 };
2867 #endif
2868
2869 /* ---------------------------------------------------------------------- */
2870
2871 #if GEN_GEN >= 6
2872 static unsigned
2873 genX(determine_sample_mask)(struct brw_context *brw)
2874 {
2875 struct gl_context *ctx = &brw->ctx;
2876 float coverage = 1.0f;
2877 float coverage_invert = false;
2878 unsigned sample_mask = ~0u;
2879
2880 /* BRW_NEW_NUM_SAMPLES */
2881 unsigned num_samples = brw->num_samples;
2882
2883 if (_mesa_is_multisample_enabled(ctx)) {
2884 if (ctx->Multisample.SampleCoverage) {
2885 coverage = ctx->Multisample.SampleCoverageValue;
2886 coverage_invert = ctx->Multisample.SampleCoverageInvert;
2887 }
2888 if (ctx->Multisample.SampleMask) {
2889 sample_mask = ctx->Multisample.SampleMaskValue;
2890 }
2891 }
2892
2893 if (num_samples > 1) {
2894 int coverage_int = (int) (num_samples * coverage + 0.5f);
2895 uint32_t coverage_bits = (1 << coverage_int) - 1;
2896 if (coverage_invert)
2897 coverage_bits ^= (1 << num_samples) - 1;
2898 return coverage_bits & sample_mask;
2899 } else {
2900 return 1;
2901 }
2902 }
2903
2904 static void
2905 genX(emit_3dstate_multisample2)(struct brw_context *brw,
2906 unsigned num_samples)
2907 {
2908 assert(brw->num_samples <= 16);
2909
2910 unsigned log2_samples = ffs(MAX2(num_samples, 1)) - 1;
2911
2912 brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) {
2913 multi.PixelLocation = CENTER;
2914 multi.NumberofMultisamples = log2_samples;
2915 #if GEN_GEN == 6
2916 GEN_SAMPLE_POS_4X(multi.Sample);
2917 #elif GEN_GEN == 7
2918 switch (num_samples) {
2919 case 1:
2920 GEN_SAMPLE_POS_1X(multi.Sample);
2921 break;
2922 case 2:
2923 GEN_SAMPLE_POS_2X(multi.Sample);
2924 break;
2925 case 4:
2926 GEN_SAMPLE_POS_4X(multi.Sample);
2927 break;
2928 case 8:
2929 GEN_SAMPLE_POS_8X(multi.Sample);
2930 break;
2931 default:
2932 break;
2933 }
2934 #endif
2935 }
2936 }
2937
2938 static void
2939 genX(upload_multisample_state)(struct brw_context *brw)
2940 {
2941 genX(emit_3dstate_multisample2)(brw, brw->num_samples);
2942
2943 brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) {
2944 sm.SampleMask = genX(determine_sample_mask)(brw);
2945 }
2946 }
2947
2948 static const struct brw_tracked_state genX(multisample_state) = {
2949 .dirty = {
2950 .mesa = _NEW_MULTISAMPLE,
2951 .brw = BRW_NEW_BLORP |
2952 BRW_NEW_CONTEXT |
2953 BRW_NEW_NUM_SAMPLES,
2954 },
2955 .emit = genX(upload_multisample_state)
2956 };
2957 #endif
2958
2959 /* ---------------------------------------------------------------------- */
2960
2961 #if GEN_GEN >= 6
2962 static void
2963 genX(upload_color_calc_state)(struct brw_context *brw)
2964 {
2965 struct gl_context *ctx = &brw->ctx;
2966
2967 brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) {
2968 /* _NEW_COLOR */
2969 cc.AlphaTestFormat = ALPHATEST_UNORM8;
2970 UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
2971 ctx->Color.AlphaRef);
2972
2973 #if GEN_GEN < 9
2974 /* _NEW_STENCIL */
2975 cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
2976 cc.BackfaceStencilReferenceValue =
2977 _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
2978 #endif
2979
2980 /* _NEW_COLOR */
2981 cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
2982 cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
2983 cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
2984 cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
2985 }
2986
2987 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
2988 ptr.ColorCalcStatePointer = brw->cc.state_offset;
2989 #if GEN_GEN != 7
2990 ptr.ColorCalcStatePointerValid = true;
2991 #endif
2992 }
2993 }
2994
2995 static const struct brw_tracked_state genX(color_calc_state) = {
2996 .dirty = {
2997 .mesa = _NEW_COLOR |
2998 _NEW_STENCIL,
2999 .brw = BRW_NEW_BATCH |
3000 BRW_NEW_BLORP |
3001 BRW_NEW_CC_STATE |
3002 BRW_NEW_STATE_BASE_ADDRESS,
3003 },
3004 .emit = genX(upload_color_calc_state),
3005 };
3006
3007 #endif
3008
3009 /* ---------------------------------------------------------------------- */
3010
3011 #if GEN_GEN >= 7
3012 static void
3013 genX(upload_sbe)(struct brw_context *brw)
3014 {
3015 struct gl_context *ctx = &brw->ctx;
3016 /* BRW_NEW_FS_PROG_DATA */
3017 const struct brw_wm_prog_data *wm_prog_data =
3018 brw_wm_prog_data(brw->wm.base.prog_data);
3019 #if GEN_GEN >= 8
3020 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
3021 #else
3022 #define attr_overrides sbe.Attribute
3023 #endif
3024 uint32_t urb_entry_read_length;
3025 uint32_t urb_entry_read_offset;
3026 uint32_t point_sprite_enables;
3027
3028 brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) {
3029 sbe.AttributeSwizzleEnable = true;
3030 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
3031
3032 /* _NEW_BUFFERS */
3033 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
3034
3035 /* _NEW_POINT
3036 *
3037 * Window coordinates in an FBO are inverted, which means point
3038 * sprite origin must be inverted.
3039 */
3040 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
3041 sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
3042 else
3043 sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
3044
3045 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
3046 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
3047 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
3048 * BRW_NEW_VUE_MAP_GEOM_OUT
3049 */
3050 genX(calculate_attr_overrides)(brw,
3051 attr_overrides,
3052 &point_sprite_enables,
3053 &urb_entry_read_length,
3054 &urb_entry_read_offset);
3055
3056 /* Typically, the URB entry read length and offset should be programmed
3057 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
3058 * stage which produces geometry. However, we don't know the proper
3059 * value until we call calculate_attr_overrides().
3060 *
3061 * To fit with our existing code, we override the inherited values and
3062 * specify it here directly, as we did on previous generations.
3063 */
3064 sbe.VertexURBEntryReadLength = urb_entry_read_length;
3065 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
3066 sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
3067 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
3068
3069 #if GEN_GEN >= 8
3070 sbe.ForceVertexURBEntryReadLength = true;
3071 sbe.ForceVertexURBEntryReadOffset = true;
3072 #endif
3073
3074 #if GEN_GEN >= 9
3075 /* prepare the active component dwords */
3076 int input_index = 0;
3077 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
3078 if (!(brw->fragment_program->info.inputs_read &
3079 BITFIELD64_BIT(attr))) {
3080 continue;
3081 }
3082
3083 assert(input_index < 32);
3084
3085 sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
3086 ++input_index;
3087 }
3088 #endif
3089 }
3090
3091 #if GEN_GEN >= 8
3092 brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) {
3093 for (int i = 0; i < 16; i++)
3094 sbes.Attribute[i] = attr_overrides[i];
3095 }
3096 #endif
3097
3098 #undef attr_overrides
3099 }
3100
3101 static const struct brw_tracked_state genX(sbe_state) = {
3102 .dirty = {
3103 .mesa = _NEW_BUFFERS |
3104 _NEW_LIGHT |
3105 _NEW_POINT |
3106 _NEW_POLYGON |
3107 _NEW_PROGRAM,
3108 .brw = BRW_NEW_BLORP |
3109 BRW_NEW_CONTEXT |
3110 BRW_NEW_FRAGMENT_PROGRAM |
3111 BRW_NEW_FS_PROG_DATA |
3112 BRW_NEW_GS_PROG_DATA |
3113 BRW_NEW_TES_PROG_DATA |
3114 BRW_NEW_VUE_MAP_GEOM_OUT |
3115 (GEN_GEN == 7 ? BRW_NEW_PRIMITIVE
3116 : 0),
3117 },
3118 .emit = genX(upload_sbe),
3119 };
3120 #endif
3121
3122 /* ---------------------------------------------------------------------- */
3123
3124 #if GEN_GEN >= 7
3125 /**
3126 * Outputs the 3DSTATE_SO_DECL_LIST command.
3127 *
3128 * The data output is a series of 64-bit entries containing a SO_DECL per
3129 * stream. We only have one stream of rendering coming out of the GS unit, so
3130 * we only emit stream 0 (low 16 bits) SO_DECLs.
3131 */
3132 static void
3133 genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
3134 const struct brw_vue_map *vue_map)
3135 {
3136 struct gl_context *ctx = &brw->ctx;
3137 /* BRW_NEW_TRANSFORM_FEEDBACK */
3138 struct gl_transform_feedback_object *xfb_obj =
3139 ctx->TransformFeedback.CurrentObject;
3140 const struct gl_transform_feedback_info *linked_xfb_info =
3141 xfb_obj->program->sh.LinkedTransformFeedback;
3142 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
3143 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3144 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3145 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3146 int max_decls = 0;
3147 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
3148
3149 memset(so_decl, 0, sizeof(so_decl));
3150
3151 /* Construct the list of SO_DECLs to be emitted. The formatting of the
3152 * command feels strange -- each dword pair contains a SO_DECL per stream.
3153 */
3154 for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
3155 const struct gl_transform_feedback_output *output =
3156 &linked_xfb_info->Outputs[i];
3157 const int buffer = output->OutputBuffer;
3158 const int varying = output->OutputRegister;
3159 const unsigned stream_id = output->StreamId;
3160 assert(stream_id < MAX_VERTEX_STREAMS);
3161
3162 buffer_mask[stream_id] |= 1 << buffer;
3163
3164 assert(vue_map->varying_to_slot[varying] >= 0);
3165
3166 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
3167 * array. Instead, it simply increments DstOffset for the following
3168 * input by the number of components that should be skipped.
3169 *
3170 * Our hardware is unusual in that it requires us to program SO_DECLs
3171 * for fake "hole" components, rather than simply taking the offset
3172 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
3173 * program as many size = 4 holes as we can, then a final hole to
3174 * accommodate the final 1, 2, or 3 remaining.
3175 */
3176 int skip_components = output->DstOffset - next_offset[buffer];
3177
3178 while (skip_components > 0) {
3179 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
3180 .HoleFlag = 1,
3181 .OutputBufferSlot = output->OutputBuffer,
3182 .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
3183 };
3184 skip_components -= 4;
3185 }
3186
3187 next_offset[buffer] = output->DstOffset + output->NumComponents;
3188
3189 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
3190 .OutputBufferSlot = output->OutputBuffer,
3191 .RegisterIndex = vue_map->varying_to_slot[varying],
3192 .ComponentMask =
3193 ((1 << output->NumComponents) - 1) << output->ComponentOffset,
3194 };
3195
3196 if (decls[stream_id] > max_decls)
3197 max_decls = decls[stream_id];
3198 }
3199
3200 uint32_t *dw;
3201 dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
3202 .StreamtoBufferSelects0 = buffer_mask[0],
3203 .StreamtoBufferSelects1 = buffer_mask[1],
3204 .StreamtoBufferSelects2 = buffer_mask[2],
3205 .StreamtoBufferSelects3 = buffer_mask[3],
3206 .NumEntries0 = decls[0],
3207 .NumEntries1 = decls[1],
3208 .NumEntries2 = decls[2],
3209 .NumEntries3 = decls[3]);
3210
3211 for (int i = 0; i < max_decls; i++) {
3212 GENX(SO_DECL_ENTRY_pack)(
3213 brw, dw + 2 + i * 2,
3214 &(struct GENX(SO_DECL_ENTRY)) {
3215 .Stream0Decl = so_decl[0][i],
3216 .Stream1Decl = so_decl[1][i],
3217 .Stream2Decl = so_decl[2][i],
3218 .Stream3Decl = so_decl[3][i],
3219 });
3220 }
3221 }
3222
3223 static void
3224 genX(upload_3dstate_so_buffers)(struct brw_context *brw)
3225 {
3226 struct gl_context *ctx = &brw->ctx;
3227 /* BRW_NEW_TRANSFORM_FEEDBACK */
3228 struct gl_transform_feedback_object *xfb_obj =
3229 ctx->TransformFeedback.CurrentObject;
3230 #if GEN_GEN < 8
3231 const struct gl_transform_feedback_info *linked_xfb_info =
3232 xfb_obj->program->sh.LinkedTransformFeedback;
3233 #else
3234 struct brw_transform_feedback_object *brw_obj =
3235 (struct brw_transform_feedback_object *) xfb_obj;
3236 uint32_t mocs_wb = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
3237 #endif
3238
3239 /* Set up the up to 4 output buffers. These are the ranges defined in the
3240 * gl_transform_feedback_object.
3241 */
3242 for (int i = 0; i < 4; i++) {
3243 struct intel_buffer_object *bufferobj =
3244 intel_buffer_object(xfb_obj->Buffers[i]);
3245
3246 if (!bufferobj) {
3247 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
3248 sob.SOBufferIndex = i;
3249 }
3250 continue;
3251 }
3252
3253 uint32_t start = xfb_obj->Offset[i];
3254 assert(start % 4 == 0);
3255 uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
3256 struct brw_bo *bo =
3257 intel_bufferobj_buffer(brw, bufferobj, start, end - start);
3258 assert(end <= bo->size);
3259
3260 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
3261 sob.SOBufferIndex = i;
3262
3263 sob.SurfaceBaseAddress = render_bo(bo, start);
3264 #if GEN_GEN < 8
3265 sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
3266 sob.SurfaceEndAddress = render_bo(bo, end);
3267 #else
3268 sob.SOBufferEnable = true;
3269 sob.StreamOffsetWriteEnable = true;
3270 sob.StreamOutputBufferOffsetAddressEnable = true;
3271 sob.SOBufferMOCS = mocs_wb;
3272
3273 sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
3274 sob.StreamOutputBufferOffsetAddress =
3275 instruction_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
3276
3277 if (brw_obj->zero_offsets) {
3278 /* Zero out the offset and write that to offset_bo */
3279 sob.StreamOffset = 0;
3280 } else {
3281 /* Use offset_bo as the "Stream Offset." */
3282 sob.StreamOffset = 0xFFFFFFFF;
3283 }
3284 #endif
3285 }
3286 }
3287
3288 #if GEN_GEN >= 8
3289 brw_obj->zero_offsets = false;
3290 #endif
3291 }
3292
3293 static inline bool
3294 query_active(struct gl_query_object *q)
3295 {
3296 return q && q->Active;
3297 }
3298
3299 static void
3300 genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
3301 const struct brw_vue_map *vue_map)
3302 {
3303 struct gl_context *ctx = &brw->ctx;
3304 /* BRW_NEW_TRANSFORM_FEEDBACK */
3305 struct gl_transform_feedback_object *xfb_obj =
3306 ctx->TransformFeedback.CurrentObject;
3307
3308 brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
3309 if (active) {
3310 int urb_entry_read_offset = 0;
3311 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
3312 urb_entry_read_offset;
3313
3314 sos.SOFunctionEnable = true;
3315 sos.SOStatisticsEnable = true;
3316
3317 /* BRW_NEW_RASTERIZER_DISCARD */
3318 if (ctx->RasterDiscard) {
3319 if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
3320 sos.RenderingDisable = true;
3321 } else {
3322 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
3323 "query active relies on the clipper.");
3324 }
3325 }
3326
3327 /* _NEW_LIGHT */
3328 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
3329 sos.ReorderMode = TRAILING;
3330
3331 #if GEN_GEN < 8
3332 sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
3333 sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
3334 sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
3335 sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
3336 #else
3337 const struct gl_transform_feedback_info *linked_xfb_info =
3338 xfb_obj->program->sh.LinkedTransformFeedback;
3339 /* Set buffer pitches; 0 means unbound. */
3340 if (xfb_obj->Buffers[0])
3341 sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
3342 if (xfb_obj->Buffers[1])
3343 sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
3344 if (xfb_obj->Buffers[2])
3345 sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
3346 if (xfb_obj->Buffers[3])
3347 sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
3348 #endif
3349
3350 /* We always read the whole vertex. This could be reduced at some
3351 * point by reading less and offsetting the register index in the
3352 * SO_DECLs.
3353 */
3354 sos.Stream0VertexReadOffset = urb_entry_read_offset;
3355 sos.Stream0VertexReadLength = urb_entry_read_length - 1;
3356 sos.Stream1VertexReadOffset = urb_entry_read_offset;
3357 sos.Stream1VertexReadLength = urb_entry_read_length - 1;
3358 sos.Stream2VertexReadOffset = urb_entry_read_offset;
3359 sos.Stream2VertexReadLength = urb_entry_read_length - 1;
3360 sos.Stream3VertexReadOffset = urb_entry_read_offset;
3361 sos.Stream3VertexReadLength = urb_entry_read_length - 1;
3362 }
3363 }
3364 }
3365
3366 static void
3367 genX(upload_sol)(struct brw_context *brw)
3368 {
3369 struct gl_context *ctx = &brw->ctx;
3370 /* BRW_NEW_TRANSFORM_FEEDBACK */
3371 bool active = _mesa_is_xfb_active_and_unpaused(ctx);
3372
3373 if (active) {
3374 genX(upload_3dstate_so_buffers)(brw);
3375
3376 /* BRW_NEW_VUE_MAP_GEOM_OUT */
3377 genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
3378 }
3379
3380 /* Finally, set up the SOL stage. This command must always follow updates to
3381 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
3382 * MMIO register updates (current performed by the kernel at each batch
3383 * emit).
3384 */
3385 genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
3386 }
3387
3388 static const struct brw_tracked_state genX(sol_state) = {
3389 .dirty = {
3390 .mesa = _NEW_LIGHT,
3391 .brw = BRW_NEW_BATCH |
3392 BRW_NEW_BLORP |
3393 BRW_NEW_RASTERIZER_DISCARD |
3394 BRW_NEW_VUE_MAP_GEOM_OUT |
3395 BRW_NEW_TRANSFORM_FEEDBACK,
3396 },
3397 .emit = genX(upload_sol),
3398 };
3399 #endif
3400
3401 /* ---------------------------------------------------------------------- */
3402
3403 #if GEN_GEN >= 7
3404 static void
3405 genX(upload_ps)(struct brw_context *brw)
3406 {
3407 UNUSED const struct gl_context *ctx = &brw->ctx;
3408 UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
3409
3410 /* BRW_NEW_FS_PROG_DATA */
3411 const struct brw_wm_prog_data *prog_data =
3412 brw_wm_prog_data(brw->wm.base.prog_data);
3413 const struct brw_stage_state *stage_state = &brw->wm.base;
3414
3415 #if GEN_GEN < 8
3416 #endif
3417
3418 brw_batch_emit(brw, GENX(3DSTATE_PS), ps) {
3419 /* Initialize the execution mask with VMask. Otherwise, derivatives are
3420 * incorrect for subspans where some of the pixels are unlit. We believe
3421 * the bit just didn't take effect in previous generations.
3422 */
3423 ps.VectorMaskEnable = GEN_GEN >= 8;
3424
3425 ps.SamplerCount =
3426 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
3427
3428 /* BRW_NEW_FS_PROG_DATA */
3429 ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
3430
3431 if (prog_data->base.use_alt_mode)
3432 ps.FloatingPointMode = Alternate;
3433
3434 /* Haswell requires the sample mask to be set in this packet as well as
3435 * in 3DSTATE_SAMPLE_MASK; the values should match.
3436 */
3437
3438 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
3439 #if GEN_IS_HASWELL
3440 ps.SampleMask = genX(determine_sample_mask(brw));
3441 #endif
3442
3443 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
3444 * it implicitly scales for different GT levels (which have some # of
3445 * PSDs).
3446 *
3447 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
3448 */
3449 #if GEN_GEN >= 9
3450 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
3451 #elif GEN_GEN >= 8
3452 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
3453 #else
3454 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
3455 #endif
3456
3457 if (prog_data->base.nr_params > 0)
3458 ps.PushConstantEnable = true;
3459
3460 #if GEN_GEN < 8
3461 /* From the IVB PRM, volume 2 part 1, page 287:
3462 * "This bit is inserted in the PS payload header and made available to
3463 * the DataPort (either via the message header or via header bypass) to
3464 * indicate that oMask data (one or two phases) is included in Render
3465 * Target Write messages. If present, the oMask data is used to mask off
3466 * samples."
3467 */
3468 ps.oMaskPresenttoRenderTarget = prog_data->uses_omask;
3469
3470 /* The hardware wedges if you have this bit set but don't turn on any
3471 * dual source blend factors.
3472 *
3473 * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
3474 */
3475 ps.DualSourceBlendEnable = prog_data->dual_src_blend &&
3476 (ctx->Color.BlendEnabled & 1) &&
3477 ctx->Color.Blend[0]._UsesDualSrc;
3478
3479 /* BRW_NEW_FS_PROG_DATA */
3480 ps.AttributeEnable = (prog_data->num_varying_inputs != 0);
3481 #endif
3482
3483 /* From the documentation for this packet:
3484 * "If the PS kernel does not need the Position XY Offsets to
3485 * compute a Position Value, then this field should be programmed
3486 * to POSOFFSET_NONE."
3487 *
3488 * "SW Recommendation: If the PS kernel needs the Position Offsets
3489 * to compute a Position XY value, this field should match Position
3490 * ZW Interpolation Mode to ensure a consistent position.xyzw
3491 * computation."
3492 *
3493 * We only require XY sample offsets. So, this recommendation doesn't
3494 * look useful at the moment. We might need this in future.
3495 */
3496 if (prog_data->uses_pos_offset)
3497 ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
3498 else
3499 ps.PositionXYOffsetSelect = POSOFFSET_NONE;
3500
3501 ps.RenderTargetFastClearEnable = brw->wm.fast_clear_op;
3502 ps._8PixelDispatchEnable = prog_data->dispatch_8;
3503 ps._16PixelDispatchEnable = prog_data->dispatch_16;
3504 ps.DispatchGRFStartRegisterForConstantSetupData0 =
3505 prog_data->base.dispatch_grf_start_reg;
3506 ps.DispatchGRFStartRegisterForConstantSetupData2 =
3507 prog_data->dispatch_grf_start_reg_2;
3508
3509 ps.KernelStartPointer0 = stage_state->prog_offset;
3510 ps.KernelStartPointer2 = stage_state->prog_offset +
3511 prog_data->prog_offset_2;
3512
3513 if (prog_data->base.total_scratch) {
3514 ps.ScratchSpaceBasePointer =
3515 render_bo(stage_state->scratch_bo,
3516 ffs(stage_state->per_thread_scratch) - 11);
3517 }
3518 }
3519 }
3520
3521 static const struct brw_tracked_state genX(ps_state) = {
3522 .dirty = {
3523 .mesa = _NEW_MULTISAMPLE |
3524 (GEN_GEN < 8 ? _NEW_BUFFERS |
3525 _NEW_COLOR
3526 : 0),
3527 .brw = BRW_NEW_BATCH |
3528 BRW_NEW_BLORP |
3529 BRW_NEW_FS_PROG_DATA,
3530 },
3531 .emit = genX(upload_ps),
3532 };
3533 #endif
3534
3535 /* ---------------------------------------------------------------------- */
3536
3537 #if GEN_GEN >= 7
3538 static void
3539 genX(upload_hs_state)(struct brw_context *brw)
3540 {
3541 const struct gen_device_info *devinfo = &brw->screen->devinfo;
3542 struct brw_stage_state *stage_state = &brw->tcs.base;
3543 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
3544 const struct brw_vue_prog_data *vue_prog_data =
3545 brw_vue_prog_data(stage_prog_data);
3546
3547 /* BRW_NEW_TES_PROG_DATA */
3548 struct brw_tcs_prog_data *tcs_prog_data =
3549 brw_tcs_prog_data(stage_prog_data);
3550
3551 if (!tcs_prog_data) {
3552 brw_batch_emit(brw, GENX(3DSTATE_HS), hs);
3553 } else {
3554 brw_batch_emit(brw, GENX(3DSTATE_HS), hs) {
3555 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
3556
3557 hs.InstanceCount = tcs_prog_data->instances - 1;
3558 hs.IncludeVertexHandles = true;
3559
3560 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
3561 }
3562 }
3563 }
3564
3565 static const struct brw_tracked_state genX(hs_state) = {
3566 .dirty = {
3567 .mesa = 0,
3568 .brw = BRW_NEW_BATCH |
3569 BRW_NEW_BLORP |
3570 BRW_NEW_TCS_PROG_DATA |
3571 BRW_NEW_TESS_PROGRAMS,
3572 },
3573 .emit = genX(upload_hs_state),
3574 };
3575
3576 static void
3577 genX(upload_ds_state)(struct brw_context *brw)
3578 {
3579 const struct gen_device_info *devinfo = &brw->screen->devinfo;
3580 const struct brw_stage_state *stage_state = &brw->tes.base;
3581 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
3582
3583 /* BRW_NEW_TES_PROG_DATA */
3584 const struct brw_tes_prog_data *tes_prog_data =
3585 brw_tes_prog_data(stage_prog_data);
3586 const struct brw_vue_prog_data *vue_prog_data =
3587 brw_vue_prog_data(stage_prog_data);
3588
3589 if (!tes_prog_data) {
3590 brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
3591 } else {
3592 brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
3593 INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
3594
3595 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
3596 ds.ComputeWCoordinateEnable =
3597 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
3598
3599 #if GEN_GEN >= 8
3600 if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
3601 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
3602 ds.UserClipDistanceCullTestEnableBitmask =
3603 vue_prog_data->cull_distance_mask;
3604 #endif
3605 }
3606 }
3607 }
3608
3609 static const struct brw_tracked_state genX(ds_state) = {
3610 .dirty = {
3611 .mesa = 0,
3612 .brw = BRW_NEW_BATCH |
3613 BRW_NEW_BLORP |
3614 BRW_NEW_TESS_PROGRAMS |
3615 BRW_NEW_TES_PROG_DATA,
3616 },
3617 .emit = genX(upload_ds_state),
3618 };
3619
3620 /* ---------------------------------------------------------------------- */
3621
3622 static void
3623 upload_te_state(struct brw_context *brw)
3624 {
3625 /* BRW_NEW_TESS_PROGRAMS */
3626 bool active = brw->tess_eval_program;
3627
3628 /* BRW_NEW_TES_PROG_DATA */
3629 const struct brw_tes_prog_data *tes_prog_data =
3630 brw_tes_prog_data(brw->tes.base.prog_data);
3631
3632 if (active) {
3633 brw_batch_emit(brw, GENX(3DSTATE_TE), te) {
3634 te.Partitioning = tes_prog_data->partitioning;
3635 te.OutputTopology = tes_prog_data->output_topology;
3636 te.TEDomain = tes_prog_data->domain;
3637 te.TEEnable = true;
3638 te.MaximumTessellationFactorOdd = 63.0;
3639 te.MaximumTessellationFactorNotOdd = 64.0;
3640 }
3641 } else {
3642 brw_batch_emit(brw, GENX(3DSTATE_TE), te);
3643 }
3644 }
3645
3646 static const struct brw_tracked_state genX(te_state) = {
3647 .dirty = {
3648 .mesa = 0,
3649 .brw = BRW_NEW_BLORP |
3650 BRW_NEW_CONTEXT |
3651 BRW_NEW_TES_PROG_DATA |
3652 BRW_NEW_TESS_PROGRAMS,
3653 },
3654 .emit = upload_te_state,
3655 };
3656
3657 /* ---------------------------------------------------------------------- */
3658
3659 static void
3660 genX(upload_tes_push_constants)(struct brw_context *brw)
3661 {
3662 struct brw_stage_state *stage_state = &brw->tes.base;
3663 /* BRW_NEW_TESS_PROGRAMS */
3664 const struct brw_program *tep = brw_program_const(brw->tess_eval_program);
3665
3666 if (tep) {
3667 /* BRW_NEW_TES_PROG_DATA */
3668 const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
3669 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
3670 gen6_upload_push_constants(brw, &tep->program, prog_data, stage_state);
3671 }
3672
3673 upload_constant_state(brw, stage_state, tep, MESA_SHADER_TESS_EVAL);
3674 }
3675
3676 static const struct brw_tracked_state genX(tes_push_constants) = {
3677 .dirty = {
3678 .mesa = _NEW_PROGRAM_CONSTANTS,
3679 .brw = BRW_NEW_BATCH |
3680 BRW_NEW_BLORP |
3681 BRW_NEW_PUSH_CONSTANT_ALLOCATION |
3682 BRW_NEW_TESS_PROGRAMS |
3683 BRW_NEW_TES_PROG_DATA,
3684 },
3685 .emit = genX(upload_tes_push_constants),
3686 };
3687
3688 static void
3689 genX(upload_tcs_push_constants)(struct brw_context *brw)
3690 {
3691 struct brw_stage_state *stage_state = &brw->tcs.base;
3692 /* BRW_NEW_TESS_PROGRAMS */
3693 const struct brw_program *tcp = brw_program_const(brw->tess_ctrl_program);
3694 bool active = brw->tess_eval_program;
3695
3696 if (active) {
3697 /* BRW_NEW_TCS_PROG_DATA */
3698 const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
3699
3700 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
3701 gen6_upload_push_constants(brw, &tcp->program, prog_data, stage_state);
3702 }
3703
3704 upload_constant_state(brw, stage_state, active, MESA_SHADER_TESS_CTRL);
3705 }
3706
3707 static const struct brw_tracked_state genX(tcs_push_constants) = {
3708 .dirty = {
3709 .mesa = _NEW_PROGRAM_CONSTANTS,
3710 .brw = BRW_NEW_BATCH |
3711 BRW_NEW_BLORP |
3712 BRW_NEW_DEFAULT_TESS_LEVELS |
3713 BRW_NEW_PUSH_CONSTANT_ALLOCATION |
3714 BRW_NEW_TESS_PROGRAMS |
3715 BRW_NEW_TCS_PROG_DATA,
3716 },
3717 .emit = genX(upload_tcs_push_constants),
3718 };
3719
3720 #endif
3721
3722 /* ---------------------------------------------------------------------- */
3723
3724 #if GEN_GEN >= 7
3725 static void
3726 genX(upload_cs_state)(struct brw_context *brw)
3727 {
3728 if (!brw->cs.base.prog_data)
3729 return;
3730
3731 uint32_t offset;
3732 uint32_t *desc = (uint32_t*) brw_state_batch(
3733 brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64,
3734 &offset);
3735
3736 struct brw_stage_state *stage_state = &brw->cs.base;
3737 struct brw_stage_prog_data *prog_data = stage_state->prog_data;
3738 struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
3739 const struct gen_device_info *devinfo = &brw->screen->devinfo;
3740
3741 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
3742 brw_emit_buffer_surface_state(
3743 brw, &stage_state->surf_offset[
3744 prog_data->binding_table.shader_time_start],
3745 brw->shader_time.bo, 0, ISL_FORMAT_RAW,
3746 brw->shader_time.bo->size, 1, true);
3747 }
3748
3749 uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
3750 32, &stage_state->bind_bo_offset);
3751
3752 brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
3753 if (prog_data->total_scratch) {
3754 uint32_t bo_offset;
3755
3756 if (GEN_GEN >= 8) {
3757 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
3758 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
3759 */
3760 bo_offset = ffs(stage_state->per_thread_scratch) - 11;
3761 } else if (GEN_IS_HASWELL) {
3762 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
3763 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
3764 */
3765 bo_offset = ffs(stage_state->per_thread_scratch) - 12;
3766 } else {
3767 /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
3768 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
3769 */
3770 bo_offset = stage_state->per_thread_scratch / 1024 - 1;
3771 }
3772 vfe.ScratchSpaceBasePointer =
3773 render_bo(stage_state->scratch_bo, bo_offset);
3774 }
3775
3776 const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
3777 vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
3778 vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
3779 vfe.ResetGatewayTimer =
3780 Resettingrelativetimerandlatchingtheglobaltimestamp;
3781 #if GEN_GEN < 9
3782 vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
3783 #endif
3784 #if GEN_GEN == 7
3785 vfe.GPGPUMode = 1;
3786 #endif
3787
3788 /* We are uploading duplicated copies of push constant uniforms for each
3789 * thread. Although the local id data needs to vary per thread, it won't
3790 * change for other uniform data. Unfortunately this duplication is
3791 * required for gen7. As of Haswell, this duplication can be avoided,
3792 * but this older mechanism with duplicated data continues to work.
3793 *
3794 * FINISHME: As of Haswell, we could make use of the
3795 * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length"
3796 * field to only store one copy of uniform data.
3797 *
3798 * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
3799 * which is described in the GPGPU_WALKER command and in the Broadwell
3800 * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
3801 * Operations => GPGPU Mode => Indirect Payload Storage.
3802 *
3803 * Note: The constant data is built in brw_upload_cs_push_constants
3804 * below.
3805 */
3806 vfe.URBEntryAllocationSize = GEN_GEN >= 8 ? 2 : 0;
3807
3808 const uint32_t vfe_curbe_allocation =
3809 ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
3810 cs_prog_data->push.cross_thread.regs, 2);
3811 vfe.CURBEAllocationSize = vfe_curbe_allocation;
3812 }
3813
3814 if (cs_prog_data->push.total.size > 0) {
3815 brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
3816 curbe.CURBETotalDataLength =
3817 ALIGN(cs_prog_data->push.total.size, 64);
3818 curbe.CURBEDataStartAddress = stage_state->push_const_offset;
3819 }
3820 }
3821
3822 /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
3823 memcpy(bind, stage_state->surf_offset,
3824 prog_data->binding_table.size_bytes);
3825 const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
3826 .KernelStartPointer = brw->cs.base.prog_offset,
3827 .SamplerStatePointer = stage_state->sampler_offset,
3828 .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
3829 .BindingTablePointer = stage_state->bind_bo_offset,
3830 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
3831 .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
3832 .SharedLocalMemorySize = encode_slm_size(devinfo->gen,
3833 prog_data->total_shared),
3834 .BarrierEnable = cs_prog_data->uses_barrier,
3835 #if GEN_GEN >= 8 || GEN_IS_HASWELL
3836 .CrossThreadConstantDataReadLength =
3837 cs_prog_data->push.cross_thread.regs,
3838 #endif
3839 };
3840
3841 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd);
3842
3843 brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
3844 load.InterfaceDescriptorTotalLength =
3845 GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
3846 load.InterfaceDescriptorDataStartAddress = offset;
3847 }
3848 }
3849
3850 static const struct brw_tracked_state genX(cs_state) = {
3851 .dirty = {
3852 .mesa = _NEW_PROGRAM_CONSTANTS,
3853 .brw = BRW_NEW_BATCH |
3854 BRW_NEW_BLORP |
3855 BRW_NEW_CS_PROG_DATA |
3856 BRW_NEW_SAMPLER_STATE_TABLE |
3857 BRW_NEW_SURFACES,
3858 },
3859 .emit = genX(upload_cs_state)
3860 };
3861
3862 #endif
3863
3864 /* ---------------------------------------------------------------------- */
3865
3866 #if GEN_GEN >= 8
3867 static void
3868 genX(upload_raster)(struct brw_context *brw)
3869 {
3870 struct gl_context *ctx = &brw->ctx;
3871
3872 /* _NEW_BUFFERS */
3873 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
3874
3875 /* _NEW_POLYGON */
3876 struct gl_polygon_attrib *polygon = &ctx->Polygon;
3877
3878 /* _NEW_POINT */
3879 struct gl_point_attrib *point = &ctx->Point;
3880
3881 brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
3882 if (brw->polygon_front_bit == render_to_fbo)
3883 raster.FrontWinding = CounterClockwise;
3884
3885 if (polygon->CullFlag) {
3886 switch (polygon->CullFaceMode) {
3887 case GL_FRONT:
3888 raster.CullMode = CULLMODE_FRONT;
3889 break;
3890 case GL_BACK:
3891 raster.CullMode = CULLMODE_BACK;
3892 break;
3893 case GL_FRONT_AND_BACK:
3894 raster.CullMode = CULLMODE_BOTH;
3895 break;
3896 default:
3897 unreachable("not reached");
3898 }
3899 } else {
3900 raster.CullMode = CULLMODE_NONE;
3901 }
3902
3903 point->SmoothFlag = raster.SmoothPointEnable;
3904
3905 raster.DXMultisampleRasterizationEnable =
3906 _mesa_is_multisample_enabled(ctx);
3907
3908 raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill;
3909 raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine;
3910 raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint;
3911
3912 switch (polygon->FrontMode) {
3913 case GL_FILL:
3914 raster.FrontFaceFillMode = FILL_MODE_SOLID;
3915 break;
3916 case GL_LINE:
3917 raster.FrontFaceFillMode = FILL_MODE_WIREFRAME;
3918 break;
3919 case GL_POINT:
3920 raster.FrontFaceFillMode = FILL_MODE_POINT;
3921 break;
3922 default:
3923 unreachable("not reached");
3924 }
3925
3926 switch (polygon->BackMode) {
3927 case GL_FILL:
3928 raster.BackFaceFillMode = FILL_MODE_SOLID;
3929 break;
3930 case GL_LINE:
3931 raster.BackFaceFillMode = FILL_MODE_WIREFRAME;
3932 break;
3933 case GL_POINT:
3934 raster.BackFaceFillMode = FILL_MODE_POINT;
3935 break;
3936 default:
3937 unreachable("not reached");
3938 }
3939
3940 /* _NEW_LINE */
3941 raster.AntialiasingEnable = ctx->Line.SmoothFlag;
3942
3943 /* _NEW_SCISSOR */
3944 raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
3945
3946 /* _NEW_TRANSFORM */
3947 if (!ctx->Transform.DepthClamp) {
3948 #if GEN_GEN >= 9
3949 raster.ViewportZFarClipTestEnable = true;
3950 raster.ViewportZNearClipTestEnable = true;
3951 #else
3952 raster.ViewportZClipTestEnable = true;
3953 #endif
3954 }
3955
3956 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
3957 #if GEN_GEN >= 9
3958 raster.ConservativeRasterizationEnable =
3959 ctx->IntelConservativeRasterization;
3960 #endif
3961
3962 raster.GlobalDepthOffsetClamp = polygon->OffsetClamp;
3963 raster.GlobalDepthOffsetScale = polygon->OffsetFactor;
3964
3965 raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2;
3966 }
3967 }
3968
3969 static const struct brw_tracked_state genX(raster_state) = {
3970 .dirty = {
3971 .mesa = _NEW_BUFFERS |
3972 _NEW_LINE |
3973 _NEW_MULTISAMPLE |
3974 _NEW_POINT |
3975 _NEW_POLYGON |
3976 _NEW_SCISSOR |
3977 _NEW_TRANSFORM,
3978 .brw = BRW_NEW_BLORP |
3979 BRW_NEW_CONTEXT |
3980 BRW_NEW_CONSERVATIVE_RASTERIZATION,
3981 },
3982 .emit = genX(upload_raster),
3983 };
3984 #endif
3985
3986 /* ---------------------------------------------------------------------- */
3987
3988 #if GEN_GEN >= 8
3989 static void
3990 genX(upload_ps_extra)(struct brw_context *brw)
3991 {
3992 UNUSED struct gl_context *ctx = &brw->ctx;
3993
3994 const struct brw_wm_prog_data *prog_data =
3995 brw_wm_prog_data(brw->wm.base.prog_data);
3996
3997 brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
3998 psx.PixelShaderValid = true;
3999 psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
4000 psx.PixelShaderKillsPixel = prog_data->uses_kill;
4001 psx.AttributeEnable = prog_data->num_varying_inputs != 0;
4002 psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
4003 psx.PixelShaderUsesSourceW = prog_data->uses_src_w;
4004 psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
4005
4006 /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
4007 if (prog_data->uses_sample_mask) {
4008 #if GEN_GEN >= 9
4009 if (prog_data->post_depth_coverage)
4010 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
4011 else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
4012 psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
4013 else
4014 psx.InputCoverageMaskState = ICMS_NORMAL;
4015 #else
4016 psx.PixelShaderUsesInputCoverageMask = true;
4017 #endif
4018 }
4019
4020 psx.oMaskPresenttoRenderTarget = prog_data->uses_omask;
4021 #if GEN_GEN >= 9
4022 psx.PixelShaderPullsBary = prog_data->pulls_bary;
4023 psx.PixelShaderComputesStencil = prog_data->computed_stencil;
4024 #endif
4025
4026 /* The stricter cross-primitive coherency guarantees that the hardware
4027 * gives us with the "Accesses UAV" bit set for at least one shader stage
4028 * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
4029 * are redundant within the current image, atomic counter and SSBO GL
4030 * APIs, which all have very loose ordering and coherency requirements
4031 * and generally rely on the application to insert explicit barriers when
4032 * a shader invocation is expected to see the memory writes performed by
4033 * the invocations of some previous primitive. Regardless of the value
4034 * of "UAV coherency required", the "Accesses UAV" bits will implicitly
4035 * cause an in most cases useless DC flush when the lowermost stage with
4036 * the bit set finishes execution.
4037 *
4038 * It would be nice to disable it, but in some cases we can't because on
4039 * Gen8+ it also has an influence on rasterization via the PS UAV-only
4040 * signal (which could be set independently from the coherency mechanism
4041 * in the 3DSTATE_WM command on Gen7), and because in some cases it will
4042 * determine whether the hardware skips execution of the fragment shader
4043 * or not via the ThreadDispatchEnable signal. However if we know that
4044 * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
4045 * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
4046 * difference so we may just disable it here.
4047 *
4048 * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
4049 * take into account KillPixels when no depth or stencil writes are
4050 * enabled. In order for occlusion queries to work correctly with no
4051 * attachments, we need to force-enable here.
4052 *
4053 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
4054 * _NEW_COLOR
4055 */
4056 if ((prog_data->has_side_effects || prog_data->uses_kill) &&
4057 !brw_color_buffer_write_enabled(brw))
4058 psx.PixelShaderHasUAV = true;
4059 }
4060 }
4061
4062 const struct brw_tracked_state genX(ps_extra) = {
4063 .dirty = {
4064 .mesa = _NEW_BUFFERS | _NEW_COLOR,
4065 .brw = BRW_NEW_BLORP |
4066 BRW_NEW_CONTEXT |
4067 BRW_NEW_FRAGMENT_PROGRAM |
4068 BRW_NEW_FS_PROG_DATA |
4069 BRW_NEW_CONSERVATIVE_RASTERIZATION,
4070 },
4071 .emit = genX(upload_ps_extra),
4072 };
4073 #endif
4074
4075 /* ---------------------------------------------------------------------- */
4076
4077 #if GEN_GEN >= 8
4078 static void
4079 genX(upload_ps_blend)(struct brw_context *brw)
4080 {
4081 struct gl_context *ctx = &brw->ctx;
4082
4083 /* _NEW_BUFFERS */
4084 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
4085 const bool buffer0_is_integer = ctx->DrawBuffer->_IntegerBuffers & 0x1;
4086
4087 /* _NEW_COLOR */
4088 struct gl_colorbuffer_attrib *color = &ctx->Color;
4089
4090 brw_batch_emit(brw, GENX(3DSTATE_PS_BLEND), pb) {
4091 /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
4092 pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
4093
4094 bool alpha_to_one = false;
4095
4096 if (!buffer0_is_integer) {
4097 /* _NEW_MULTISAMPLE */
4098
4099 if (_mesa_is_multisample_enabled(ctx)) {
4100 pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
4101 alpha_to_one = ctx->Multisample.SampleAlphaToOne;
4102 }
4103
4104 pb.AlphaTestEnable = color->AlphaEnabled;
4105 }
4106
4107 /* Used for implementing the following bit of GL_EXT_texture_integer:
4108 * "Per-fragment operations that require floating-point color
4109 * components, including multisample alpha operations, alpha test,
4110 * blending, and dithering, have no effect when the corresponding
4111 * colors are written to an integer color buffer."
4112 *
4113 * The OpenGL specification 3.3 (page 196), section 4.1.3 says:
4114 * "If drawbuffer zero is not NONE and the buffer it references has an
4115 * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
4116 * operations are skipped."
4117 */
4118 if (rb && !buffer0_is_integer && (color->BlendEnabled & 1)) {
4119 GLenum eqRGB = color->Blend[0].EquationRGB;
4120 GLenum eqA = color->Blend[0].EquationA;
4121 GLenum srcRGB = color->Blend[0].SrcRGB;
4122 GLenum dstRGB = color->Blend[0].DstRGB;
4123 GLenum srcA = color->Blend[0].SrcA;
4124 GLenum dstA = color->Blend[0].DstA;
4125
4126 if (eqRGB == GL_MIN || eqRGB == GL_MAX)
4127 srcRGB = dstRGB = GL_ONE;
4128
4129 if (eqA == GL_MIN || eqA == GL_MAX)
4130 srcA = dstA = GL_ONE;
4131
4132 /* Due to hardware limitations, the destination may have information
4133 * in an alpha channel even when the format specifies no alpha
4134 * channel. In order to avoid getting any incorrect blending due to
4135 * that alpha channel, coerce the blend factors to values that will
4136 * not read the alpha channel, but will instead use the correct
4137 * implicit value for alpha.
4138 */
4139 if (!_mesa_base_format_has_channel(rb->_BaseFormat,
4140 GL_TEXTURE_ALPHA_TYPE)) {
4141 srcRGB = brw_fix_xRGB_alpha(srcRGB);
4142 srcA = brw_fix_xRGB_alpha(srcA);
4143 dstRGB = brw_fix_xRGB_alpha(dstRGB);
4144 dstA = brw_fix_xRGB_alpha(dstA);
4145 }
4146
4147 /* Alpha to One doesn't work with Dual Color Blending. Override
4148 * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
4149 */
4150 if (alpha_to_one && color->Blend[0]._UsesDualSrc) {
4151 srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
4152 srcA = fix_dual_blend_alpha_to_one(srcA);
4153 dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
4154 dstA = fix_dual_blend_alpha_to_one(dstA);
4155 }
4156
4157 pb.ColorBufferBlendEnable = true;
4158 pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
4159 pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
4160 pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB);
4161 pb.DestinationBlendFactor = brw_translate_blend_factor(dstRGB);
4162
4163 pb.IndependentAlphaBlendEnable =
4164 srcA != srcRGB || dstA != dstRGB || eqA != eqRGB;
4165 }
4166 }
4167 }
4168
4169 static const struct brw_tracked_state genX(ps_blend) = {
4170 .dirty = {
4171 .mesa = _NEW_BUFFERS |
4172 _NEW_COLOR |
4173 _NEW_MULTISAMPLE,
4174 .brw = BRW_NEW_BLORP |
4175 BRW_NEW_CONTEXT |
4176 BRW_NEW_FRAGMENT_PROGRAM,
4177 },
4178 .emit = genX(upload_ps_blend)
4179 };
4180 #endif
4181
4182 /* ---------------------------------------------------------------------- */
4183
4184 #if GEN_GEN >= 8
4185 static void
4186 genX(emit_vf_topology)(struct brw_context *brw)
4187 {
4188 brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) {
4189 vftopo.PrimitiveTopologyType = brw->primitive;
4190 }
4191 }
4192
4193 static const struct brw_tracked_state genX(vf_topology) = {
4194 .dirty = {
4195 .mesa = 0,
4196 .brw = BRW_NEW_BLORP |
4197 BRW_NEW_PRIMITIVE,
4198 },
4199 .emit = genX(emit_vf_topology),
4200 };
4201 #endif
4202
4203 /* ---------------------------------------------------------------------- */
4204
4205 #if GEN_GEN >= 7
4206 static void
4207 genX(emit_mi_report_perf_count)(struct brw_context *brw,
4208 struct brw_bo *bo,
4209 uint32_t offset_in_bytes,
4210 uint32_t report_id)
4211 {
4212 brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
4213 mi_rpc.MemoryAddress = instruction_bo(bo, offset_in_bytes);
4214 mi_rpc.ReportID = report_id;
4215 }
4216 }
4217 #endif
4218
4219 /* ---------------------------------------------------------------------- */
4220
4221 void
4222 genX(init_atoms)(struct brw_context *brw)
4223 {
4224 #if GEN_GEN < 6
4225 static const struct brw_tracked_state *render_atoms[] =
4226 {
4227 /* Once all the programs are done, we know how large urb entry
4228 * sizes need to be and can decide if we need to change the urb
4229 * layout.
4230 */
4231 &brw_curbe_offsets,
4232 &brw_recalculate_urb_fence,
4233
4234 &genX(cc_vp),
4235 &brw_cc_unit,
4236
4237 /* Surface state setup. Must come before the VS/WM unit. The binding
4238 * table upload must be last.
4239 */
4240 &brw_vs_pull_constants,
4241 &brw_wm_pull_constants,
4242 &brw_renderbuffer_surfaces,
4243 &brw_renderbuffer_read_surfaces,
4244 &brw_texture_surfaces,
4245 &brw_vs_binding_table,
4246 &brw_wm_binding_table,
4247
4248 &brw_fs_samplers,
4249 &brw_vs_samplers,
4250
4251 /* These set up state for brw_psp_urb_cbs */
4252 &brw_wm_unit,
4253 &genX(sf_clip_viewport),
4254 &genX(sf_state),
4255 &genX(vs_state), /* always required, enabled or not */
4256 &brw_clip_unit,
4257 &brw_gs_unit,
4258
4259 /* Command packets:
4260 */
4261 &brw_invariant_state,
4262
4263 &brw_binding_table_pointers,
4264 &brw_blend_constant_color,
4265
4266 &brw_depthbuffer,
4267
4268 &genX(polygon_stipple),
4269 &genX(polygon_stipple_offset),
4270
4271 &genX(line_stipple),
4272
4273 &brw_psp_urb_cbs,
4274
4275 &genX(drawing_rect),
4276 &brw_indices, /* must come before brw_vertices */
4277 &genX(index_buffer),
4278 &genX(vertices),
4279
4280 &brw_constant_buffer
4281 };
4282 #elif GEN_GEN == 6
4283 static const struct brw_tracked_state *render_atoms[] =
4284 {
4285 &genX(sf_clip_viewport),
4286
4287 /* Command packets: */
4288
4289 &genX(cc_vp),
4290
4291 &gen6_urb,
4292 &genX(blend_state), /* must do before cc unit */
4293 &genX(color_calc_state), /* must do before cc unit */
4294 &genX(depth_stencil_state), /* must do before cc unit */
4295
4296 &genX(vs_push_constants), /* Before vs_state */
4297 &genX(gs_push_constants), /* Before gs_state */
4298 &genX(wm_push_constants), /* Before wm_state */
4299
4300 /* Surface state setup. Must come before the VS/WM unit. The binding
4301 * table upload must be last.
4302 */
4303 &brw_vs_pull_constants,
4304 &brw_vs_ubo_surfaces,
4305 &brw_gs_pull_constants,
4306 &brw_gs_ubo_surfaces,
4307 &brw_wm_pull_constants,
4308 &brw_wm_ubo_surfaces,
4309 &gen6_renderbuffer_surfaces,
4310 &brw_renderbuffer_read_surfaces,
4311 &brw_texture_surfaces,
4312 &gen6_sol_surface,
4313 &brw_vs_binding_table,
4314 &gen6_gs_binding_table,
4315 &brw_wm_binding_table,
4316
4317 &brw_fs_samplers,
4318 &brw_vs_samplers,
4319 &brw_gs_samplers,
4320 &gen6_sampler_state,
4321 &genX(multisample_state),
4322
4323 &genX(vs_state),
4324 &genX(gs_state),
4325 &genX(clip_state),
4326 &genX(sf_state),
4327 &genX(wm_state),
4328
4329 &genX(scissor_state),
4330
4331 &gen6_binding_table_pointers,
4332
4333 &brw_depthbuffer,
4334
4335 &genX(polygon_stipple),
4336 &genX(polygon_stipple_offset),
4337
4338 &genX(line_stipple),
4339
4340 &genX(drawing_rect),
4341
4342 &brw_indices, /* must come before brw_vertices */
4343 &genX(index_buffer),
4344 &genX(vertices),
4345 };
4346 #elif GEN_GEN == 7
4347 static const struct brw_tracked_state *render_atoms[] =
4348 {
4349 /* Command packets: */
4350
4351 &genX(cc_vp),
4352 &genX(sf_clip_viewport),
4353
4354 &gen7_l3_state,
4355 &gen7_push_constant_space,
4356 &gen7_urb,
4357 &genX(blend_state), /* must do before cc unit */
4358 &genX(color_calc_state), /* must do before cc unit */
4359 &genX(depth_stencil_state), /* must do before cc unit */
4360
4361 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
4362 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
4363 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
4364 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
4365 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
4366
4367 &genX(vs_push_constants), /* Before vs_state */
4368 &genX(tcs_push_constants),
4369 &genX(tes_push_constants),
4370 &genX(gs_push_constants), /* Before gs_state */
4371 &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
4372
4373 /* Surface state setup. Must come before the VS/WM unit. The binding
4374 * table upload must be last.
4375 */
4376 &brw_vs_pull_constants,
4377 &brw_vs_ubo_surfaces,
4378 &brw_vs_abo_surfaces,
4379 &brw_tcs_pull_constants,
4380 &brw_tcs_ubo_surfaces,
4381 &brw_tcs_abo_surfaces,
4382 &brw_tes_pull_constants,
4383 &brw_tes_ubo_surfaces,
4384 &brw_tes_abo_surfaces,
4385 &brw_gs_pull_constants,
4386 &brw_gs_ubo_surfaces,
4387 &brw_gs_abo_surfaces,
4388 &brw_wm_pull_constants,
4389 &brw_wm_ubo_surfaces,
4390 &brw_wm_abo_surfaces,
4391 &gen6_renderbuffer_surfaces,
4392 &brw_renderbuffer_read_surfaces,
4393 &brw_texture_surfaces,
4394 &brw_vs_binding_table,
4395 &brw_tcs_binding_table,
4396 &brw_tes_binding_table,
4397 &brw_gs_binding_table,
4398 &brw_wm_binding_table,
4399
4400 &brw_fs_samplers,
4401 &brw_vs_samplers,
4402 &brw_tcs_samplers,
4403 &brw_tes_samplers,
4404 &brw_gs_samplers,
4405 &genX(multisample_state),
4406
4407 &genX(vs_state),
4408 &genX(hs_state),
4409 &genX(te_state),
4410 &genX(ds_state),
4411 &genX(gs_state),
4412 &genX(sol_state),
4413 &genX(clip_state),
4414 &genX(sbe_state),
4415 &genX(sf_state),
4416 &genX(wm_state),
4417 &genX(ps_state),
4418
4419 &genX(scissor_state),
4420
4421 &gen7_depthbuffer,
4422
4423 &genX(polygon_stipple),
4424 &genX(polygon_stipple_offset),
4425
4426 &genX(line_stipple),
4427
4428 &genX(drawing_rect),
4429
4430 &brw_indices, /* must come before brw_vertices */
4431 &genX(index_buffer),
4432 &genX(vertices),
4433
4434 #if GEN_IS_HASWELL
4435 &genX(cut_index),
4436 #endif
4437 };
4438 #elif GEN_GEN >= 8
4439 static const struct brw_tracked_state *render_atoms[] =
4440 {
4441 &genX(cc_vp),
4442 &genX(sf_clip_viewport),
4443
4444 &gen7_l3_state,
4445 &gen7_push_constant_space,
4446 &gen7_urb,
4447 &genX(blend_state),
4448 &genX(color_calc_state),
4449
4450 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
4451 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
4452 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
4453 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
4454 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
4455
4456 &genX(vs_push_constants), /* Before vs_state */
4457 &genX(tcs_push_constants),
4458 &genX(tes_push_constants),
4459 &genX(gs_push_constants), /* Before gs_state */
4460 &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
4461
4462 /* Surface state setup. Must come before the VS/WM unit. The binding
4463 * table upload must be last.
4464 */
4465 &brw_vs_pull_constants,
4466 &brw_vs_ubo_surfaces,
4467 &brw_vs_abo_surfaces,
4468 &brw_tcs_pull_constants,
4469 &brw_tcs_ubo_surfaces,
4470 &brw_tcs_abo_surfaces,
4471 &brw_tes_pull_constants,
4472 &brw_tes_ubo_surfaces,
4473 &brw_tes_abo_surfaces,
4474 &brw_gs_pull_constants,
4475 &brw_gs_ubo_surfaces,
4476 &brw_gs_abo_surfaces,
4477 &brw_wm_pull_constants,
4478 &brw_wm_ubo_surfaces,
4479 &brw_wm_abo_surfaces,
4480 &gen6_renderbuffer_surfaces,
4481 &brw_renderbuffer_read_surfaces,
4482 &brw_texture_surfaces,
4483 &brw_vs_binding_table,
4484 &brw_tcs_binding_table,
4485 &brw_tes_binding_table,
4486 &brw_gs_binding_table,
4487 &brw_wm_binding_table,
4488
4489 &brw_fs_samplers,
4490 &brw_vs_samplers,
4491 &brw_tcs_samplers,
4492 &brw_tes_samplers,
4493 &brw_gs_samplers,
4494 &genX(multisample_state),
4495
4496 &genX(vs_state),
4497 &genX(hs_state),
4498 &genX(te_state),
4499 &genX(ds_state),
4500 &genX(gs_state),
4501 &genX(sol_state),
4502 &genX(clip_state),
4503 &genX(raster_state),
4504 &genX(sbe_state),
4505 &genX(sf_state),
4506 &genX(ps_blend),
4507 &genX(ps_extra),
4508 &genX(ps_state),
4509 &genX(depth_stencil_state),
4510 &genX(wm_state),
4511
4512 &genX(scissor_state),
4513
4514 &gen7_depthbuffer,
4515
4516 &genX(polygon_stipple),
4517 &genX(polygon_stipple_offset),
4518
4519 &genX(line_stipple),
4520
4521 &genX(drawing_rect),
4522
4523 &genX(vf_topology),
4524
4525 &brw_indices,
4526 &genX(index_buffer),
4527 &genX(vertices),
4528
4529 &genX(cut_index),
4530 &gen8_pma_fix,
4531 };
4532 #endif
4533
4534 STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms));
4535 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
4536 render_atoms, ARRAY_SIZE(render_atoms));
4537
4538 #if GEN_GEN >= 7
4539 static const struct brw_tracked_state *compute_atoms[] =
4540 {
4541 &gen7_l3_state,
4542 &brw_cs_image_surfaces,
4543 &gen7_cs_push_constants,
4544 &brw_cs_pull_constants,
4545 &brw_cs_ubo_surfaces,
4546 &brw_cs_abo_surfaces,
4547 &brw_cs_texture_surfaces,
4548 &brw_cs_work_groups_surface,
4549 &brw_cs_samplers,
4550 &genX(cs_state),
4551 };
4552
4553 STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
4554 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
4555 compute_atoms, ARRAY_SIZE(compute_atoms));
4556
4557 brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count);
4558 #endif
4559 }