i965/gen4-5: Emit MI_FLUSH as required prior to switching pipelines.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37
38 #include "brw_context.h"
39 #include "brw_state.h"
40 #include "brw_defines.h"
41
42 #include "main/framebuffer.h"
43 #include "main/fbobject.h"
44 #include "main/glformats.h"
45
46 /* Constant single cliprect for framebuffer object or DRI2 drawing */
47 static void
48 upload_drawing_rect(struct brw_context *brw)
49 {
50 struct gl_context *ctx = &brw->ctx;
51 const struct gl_framebuffer *fb = ctx->DrawBuffer;
52 const unsigned int fb_width = _mesa_geometric_width(fb);
53 const unsigned int fb_height = _mesa_geometric_height(fb);
54
55 BEGIN_BATCH(4);
56 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
57 OUT_BATCH(0); /* xmin, ymin */
58 OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
59 OUT_BATCH(0);
60 ADVANCE_BATCH();
61 }
62
63 const struct brw_tracked_state brw_drawing_rect = {
64 .dirty = {
65 .mesa = _NEW_BUFFERS,
66 .brw = BRW_NEW_CONTEXT,
67 },
68 .emit = upload_drawing_rect
69 };
70
71 /**
72 * Upload pointers to the per-stage state.
73 *
74 * The state pointers in this packet are all relative to the general state
75 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
76 */
77 static void
78 upload_pipelined_state_pointers(struct brw_context *brw)
79 {
80 if (brw->gen == 5) {
81 /* Need to flush before changing clip max threads for errata. */
82 BEGIN_BATCH(1);
83 OUT_BATCH(MI_FLUSH);
84 ADVANCE_BATCH();
85 }
86
87 BEGIN_BATCH(7);
88 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
89 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
90 brw->vs.base.state_offset);
91 if (brw->ff_gs.prog_active)
92 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
93 brw->ff_gs.state_offset | 1);
94 else
95 OUT_BATCH(0);
96 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
97 brw->clip.state_offset | 1);
98 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
99 brw->sf.state_offset);
100 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
101 brw->wm.base.state_offset);
102 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
103 brw->cc.state_offset);
104 ADVANCE_BATCH();
105
106 brw->ctx.NewDriverState |= BRW_NEW_PSP;
107 }
108
109 static void
110 upload_psp_urb_cbs(struct brw_context *brw)
111 {
112 upload_pipelined_state_pointers(brw);
113 brw_upload_urb_fence(brw);
114 brw_upload_cs_urb_state(brw);
115 }
116
117 const struct brw_tracked_state brw_psp_urb_cbs = {
118 .dirty = {
119 .mesa = 0,
120 .brw = BRW_NEW_BATCH |
121 BRW_NEW_FF_GS_PROG_DATA |
122 BRW_NEW_GEN4_UNIT_STATE |
123 BRW_NEW_STATE_BASE_ADDRESS |
124 BRW_NEW_URB_FENCE,
125 },
126 .emit = upload_psp_urb_cbs,
127 };
128
129 uint32_t
130 brw_depthbuffer_format(struct brw_context *brw)
131 {
132 struct gl_context *ctx = &brw->ctx;
133 struct gl_framebuffer *fb = ctx->DrawBuffer;
134 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
135 struct intel_renderbuffer *srb;
136
137 if (!drb &&
138 (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
139 !srb->mt->stencil_mt &&
140 (intel_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
141 intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
142 drb = srb;
143 }
144
145 if (!drb)
146 return BRW_DEPTHFORMAT_D32_FLOAT;
147
148 return brw_depth_format(brw, drb->mt->format);
149 }
150
151 /**
152 * Returns the mask of how many bits of x and y must be handled through the
153 * depthbuffer's draw offset x and y fields.
154 *
155 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
156 * between the depth, hiz, and stencil buffers. Because it can be hard to get
157 * all 3 to agree on this value, we want to do as much drawing offset
158 * adjustment as possible by moving the base offset of the 3 buffers, which is
159 * restricted to tile boundaries.
160 *
161 * For each buffer, the remainder must be applied through the x/y draw offset.
162 * This returns the worst-case mask of the low bits that have to go into the
163 * packet. If the 3 buffers don't agree on the drawing offset ANDed with this
164 * mask, then we're in trouble.
165 */
166 void
167 brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
168 uint32_t depth_level,
169 uint32_t depth_layer,
170 struct intel_mipmap_tree *stencil_mt,
171 uint32_t *out_tile_mask_x,
172 uint32_t *out_tile_mask_y)
173 {
174 uint32_t tile_mask_x = 0, tile_mask_y = 0;
175
176 if (depth_mt) {
177 intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
178 depth_mt->cpp, false,
179 &tile_mask_x, &tile_mask_y);
180
181 if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
182 uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
183 intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
184 depth_mt->hiz_buf->mt->tr_mode,
185 depth_mt->hiz_buf->mt->cpp,
186 false, &hiz_tile_mask_x,
187 &hiz_tile_mask_y);
188
189 /* Each HiZ row represents 2 rows of pixels */
190 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
191
192 tile_mask_x |= hiz_tile_mask_x;
193 tile_mask_y |= hiz_tile_mask_y;
194 }
195 }
196
197 if (stencil_mt) {
198 if (stencil_mt->stencil_mt)
199 stencil_mt = stencil_mt->stencil_mt;
200
201 if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
202 /* Separate stencil buffer uses 64x64 tiles. */
203 tile_mask_x |= 63;
204 tile_mask_y |= 63;
205 } else {
206 uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
207 intel_get_tile_masks(stencil_mt->tiling,
208 stencil_mt->tr_mode,
209 stencil_mt->cpp,
210 false, &stencil_tile_mask_x,
211 &stencil_tile_mask_y);
212
213 tile_mask_x |= stencil_tile_mask_x;
214 tile_mask_y |= stencil_tile_mask_y;
215 }
216 }
217
218 *out_tile_mask_x = tile_mask_x;
219 *out_tile_mask_y = tile_mask_y;
220 }
221
222 static struct intel_mipmap_tree *
223 get_stencil_miptree(struct intel_renderbuffer *irb)
224 {
225 if (!irb)
226 return NULL;
227 if (irb->mt->stencil_mt)
228 return irb->mt->stencil_mt;
229 return irb->mt;
230 }
231
232 void
233 brw_workaround_depthstencil_alignment(struct brw_context *brw,
234 GLbitfield clear_mask)
235 {
236 struct gl_context *ctx = &brw->ctx;
237 struct gl_framebuffer *fb = ctx->DrawBuffer;
238 bool rebase_depth = false;
239 bool rebase_stencil = false;
240 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
241 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
242 struct intel_mipmap_tree *depth_mt = NULL;
243 struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
244 uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
245 uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
246 bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
247 bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
248
249 if (depth_irb)
250 depth_mt = depth_irb->mt;
251
252 /* Initialize brw->depthstencil to 'nop' workaround state.
253 */
254 brw->depthstencil.tile_x = 0;
255 brw->depthstencil.tile_y = 0;
256 brw->depthstencil.depth_offset = 0;
257 brw->depthstencil.stencil_offset = 0;
258 brw->depthstencil.hiz_offset = 0;
259 brw->depthstencil.depth_mt = NULL;
260 brw->depthstencil.stencil_mt = NULL;
261 if (depth_irb)
262 brw->depthstencil.depth_mt = depth_mt;
263 if (stencil_irb)
264 brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb);
265
266 /* Gen6+ doesn't require the workarounds, since we always program the
267 * surface state at the start of the whole surface.
268 */
269 if (brw->gen >= 6)
270 return;
271
272 /* Check if depth buffer is in depth/stencil format. If so, then it's only
273 * safe to invalidate it if we're also clearing stencil, and both depth_irb
274 * and stencil_irb point to the same miptree.
275 *
276 * Note: it's not sufficient to check for the case where
277 * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
278 * because this fails to catch depth/stencil buffers on hardware that uses
279 * separate stencil. To catch that case, we check whether
280 * depth_mt->stencil_mt is non-NULL.
281 */
282 if (depth_irb && invalidate_depth &&
283 (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
284 depth_mt->stencil_mt)) {
285 invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
286 && depth_irb->mt == stencil_irb->mt;
287 }
288
289 uint32_t tile_mask_x, tile_mask_y;
290 brw_get_depthstencil_tile_masks(depth_mt,
291 depth_mt ? depth_irb->mt_level : 0,
292 depth_mt ? depth_irb->mt_layer : 0,
293 stencil_mt,
294 &tile_mask_x, &tile_mask_y);
295
296 if (depth_irb) {
297 tile_x = depth_irb->draw_x & tile_mask_x;
298 tile_y = depth_irb->draw_y & tile_mask_y;
299
300 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
301 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
302 * Coordinate Offset X/Y":
303 *
304 * "The 3 LSBs of both offsets must be zero to ensure correct
305 * alignment"
306 */
307 if (tile_x & 7 || tile_y & 7)
308 rebase_depth = true;
309
310 /* We didn't even have intra-tile offsets before g45. */
311 if (!brw->has_surface_tile_offset) {
312 if (tile_x || tile_y)
313 rebase_depth = true;
314 }
315
316 if (rebase_depth) {
317 perf_debug("HW workaround: blitting depth level %d to a temporary "
318 "to fix alignment (depth tile offset %d,%d)\n",
319 depth_irb->mt_level, tile_x, tile_y);
320 intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
321 /* In the case of stencil_irb being the same packed depth/stencil
322 * texture but not the same rb, make it point at our rebased mt, too.
323 */
324 if (stencil_irb &&
325 stencil_irb != depth_irb &&
326 stencil_irb->mt == depth_mt) {
327 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
328 intel_renderbuffer_set_draw_offset(stencil_irb);
329 }
330
331 stencil_mt = get_stencil_miptree(stencil_irb);
332
333 tile_x = depth_irb->draw_x & tile_mask_x;
334 tile_y = depth_irb->draw_y & tile_mask_y;
335 }
336
337 if (stencil_irb) {
338 stencil_mt = get_stencil_miptree(stencil_irb);
339 intel_miptree_get_image_offset(stencil_mt,
340 stencil_irb->mt_level,
341 stencil_irb->mt_layer,
342 &stencil_draw_x, &stencil_draw_y);
343 int stencil_tile_x = stencil_draw_x & tile_mask_x;
344 int stencil_tile_y = stencil_draw_y & tile_mask_y;
345
346 /* If stencil doesn't match depth, then we'll need to rebase stencil
347 * as well. (if we hadn't decided to rebase stencil before, the
348 * post-stencil depth test will also rebase depth to try to match it
349 * up).
350 */
351 if (tile_x != stencil_tile_x ||
352 tile_y != stencil_tile_y) {
353 rebase_stencil = true;
354 }
355 }
356 }
357
358 /* If we have (just) stencil, check it for ignored low bits as well */
359 if (stencil_irb) {
360 intel_miptree_get_image_offset(stencil_mt,
361 stencil_irb->mt_level,
362 stencil_irb->mt_layer,
363 &stencil_draw_x, &stencil_draw_y);
364 stencil_tile_x = stencil_draw_x & tile_mask_x;
365 stencil_tile_y = stencil_draw_y & tile_mask_y;
366
367 if (stencil_tile_x & 7 || stencil_tile_y & 7)
368 rebase_stencil = true;
369
370 if (!brw->has_surface_tile_offset) {
371 if (stencil_tile_x || stencil_tile_y)
372 rebase_stencil = true;
373 }
374 }
375
376 if (rebase_stencil) {
377 perf_debug("HW workaround: blitting stencil level %d to a temporary "
378 "to fix alignment (stencil tile offset %d,%d)\n",
379 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
380
381 intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil);
382 stencil_mt = get_stencil_miptree(stencil_irb);
383
384 intel_miptree_get_image_offset(stencil_mt,
385 stencil_irb->mt_level,
386 stencil_irb->mt_layer,
387 &stencil_draw_x, &stencil_draw_y);
388 stencil_tile_x = stencil_draw_x & tile_mask_x;
389 stencil_tile_y = stencil_draw_y & tile_mask_y;
390
391 if (depth_irb && depth_irb->mt == stencil_irb->mt) {
392 intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
393 intel_renderbuffer_set_draw_offset(depth_irb);
394 } else if (depth_irb && !rebase_depth) {
395 if (tile_x != stencil_tile_x ||
396 tile_y != stencil_tile_y) {
397 perf_debug("HW workaround: blitting depth level %d to a temporary "
398 "to match stencil level %d alignment (depth tile offset "
399 "%d,%d, stencil offset %d,%d)\n",
400 depth_irb->mt_level,
401 stencil_irb->mt_level,
402 tile_x, tile_y,
403 stencil_tile_x, stencil_tile_y);
404
405 intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
406
407 tile_x = depth_irb->draw_x & tile_mask_x;
408 tile_y = depth_irb->draw_y & tile_mask_y;
409
410 if (stencil_irb && stencil_irb->mt == depth_mt) {
411 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
412 intel_renderbuffer_set_draw_offset(stencil_irb);
413 }
414
415 WARN_ONCE(stencil_tile_x != tile_x ||
416 stencil_tile_y != tile_y,
417 "Rebased stencil tile offset (%d,%d) doesn't match depth "
418 "tile offset (%d,%d).\n",
419 stencil_tile_x, stencil_tile_y,
420 tile_x, tile_y);
421 }
422 }
423 }
424
425 if (!depth_irb) {
426 tile_x = stencil_tile_x;
427 tile_y = stencil_tile_y;
428 }
429
430 /* While we just tried to get everything aligned, we may have failed to do
431 * so in the case of rendering to array or 3D textures, where nonzero faces
432 * will still have an offset post-rebase. At least give an informative
433 * warning.
434 */
435 WARN_ONCE((tile_x & 7) || (tile_y & 7),
436 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
437 "Truncating offset, bad rendering may occur.\n");
438 tile_x &= ~7;
439 tile_y &= ~7;
440
441 /* Now, after rebasing, save off the new dephtstencil state so the hardware
442 * packets can just dereference that without re-calculating tile offsets.
443 */
444 brw->depthstencil.tile_x = tile_x;
445 brw->depthstencil.tile_y = tile_y;
446 if (depth_irb) {
447 depth_mt = depth_irb->mt;
448 brw->depthstencil.depth_mt = depth_mt;
449 brw->depthstencil.depth_offset =
450 intel_miptree_get_aligned_offset(depth_mt,
451 depth_irb->draw_x & ~tile_mask_x,
452 depth_irb->draw_y & ~tile_mask_y,
453 false);
454 if (intel_renderbuffer_has_hiz(depth_irb)) {
455 brw->depthstencil.hiz_offset =
456 intel_miptree_get_aligned_offset(depth_mt,
457 depth_irb->draw_x & ~tile_mask_x,
458 (depth_irb->draw_y & ~tile_mask_y) / 2,
459 false);
460 }
461 }
462 if (stencil_irb) {
463 stencil_mt = get_stencil_miptree(stencil_irb);
464
465 brw->depthstencil.stencil_mt = stencil_mt;
466 if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
467 /* Note: we can't compute the stencil offset using
468 * intel_region_get_aligned_offset(), because stencil_region claims
469 * that the region is untiled even though it's W tiled.
470 */
471 brw->depthstencil.stencil_offset =
472 (stencil_draw_y & ~tile_mask_y) * stencil_mt->pitch +
473 (stencil_draw_x & ~tile_mask_x) * 64;
474 }
475 }
476 }
477
478 void
479 brw_emit_depthbuffer(struct brw_context *brw)
480 {
481 struct gl_context *ctx = &brw->ctx;
482 struct gl_framebuffer *fb = ctx->DrawBuffer;
483 /* _NEW_BUFFERS */
484 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
485 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
486 struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
487 struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
488 uint32_t tile_x = brw->depthstencil.tile_x;
489 uint32_t tile_y = brw->depthstencil.tile_y;
490 bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
491 bool separate_stencil = false;
492 uint32_t depth_surface_type = BRW_SURFACE_NULL;
493 uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
494 uint32_t depth_offset = 0;
495 uint32_t width = 1, height = 1;
496
497 if (stencil_mt) {
498 separate_stencil = stencil_mt->format == MESA_FORMAT_S_UINT8;
499
500 /* Gen7 supports only separate stencil */
501 assert(separate_stencil || brw->gen < 7);
502 }
503
504 /* If there's a packed depth/stencil bound to stencil only, we need to
505 * emit the packed depth/stencil buffer packet.
506 */
507 if (!depth_irb && stencil_irb && !separate_stencil) {
508 depth_irb = stencil_irb;
509 depth_mt = stencil_mt;
510 }
511
512 if (depth_irb && depth_mt) {
513 /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
514 * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
515 * depthstencil format.
516 *
517 * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
518 * set to the same value. Gens after 7 implicitly always set
519 * Separate_Stencil_Enable; software cannot disable it.
520 */
521 if ((brw->gen < 7 && hiz) || brw->gen >= 7) {
522 assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
523 }
524
525 /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
526 assert(brw->gen >= 7 || !separate_stencil || hiz);
527
528 assert(brw->gen < 6 || depth_mt->tiling == I915_TILING_Y);
529 assert(!hiz || depth_mt->tiling == I915_TILING_Y);
530
531 depthbuffer_format = brw_depthbuffer_format(brw);
532 depth_surface_type = BRW_SURFACE_2D;
533 depth_offset = brw->depthstencil.depth_offset;
534 width = depth_irb->Base.Base.Width;
535 height = depth_irb->Base.Base.Height;
536 } else if (separate_stencil) {
537 /*
538 * There exists a separate stencil buffer but no depth buffer.
539 *
540 * The stencil buffer inherits most of its fields from
541 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
542 * height.
543 *
544 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
545 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
546 * [DevGT+]: This field must be set to TRUE.
547 */
548 assert(brw->has_separate_stencil);
549
550 depth_surface_type = BRW_SURFACE_2D;
551 width = stencil_irb->Base.Base.Width;
552 height = stencil_irb->Base.Base.Height;
553 }
554
555 if (depth_mt)
556 brw_render_cache_set_check_flush(brw, depth_mt->bo);
557 if (stencil_mt)
558 brw_render_cache_set_check_flush(brw, stencil_mt->bo);
559
560 brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
561 depthbuffer_format, depth_surface_type,
562 stencil_mt, hiz, separate_stencil,
563 width, height, tile_x, tile_y);
564 }
565
566 void
567 brw_emit_depth_stencil_hiz(struct brw_context *brw,
568 struct intel_mipmap_tree *depth_mt,
569 uint32_t depth_offset, uint32_t depthbuffer_format,
570 uint32_t depth_surface_type,
571 struct intel_mipmap_tree *stencil_mt,
572 bool hiz, bool separate_stencil,
573 uint32_t width, uint32_t height,
574 uint32_t tile_x, uint32_t tile_y)
575 {
576 /* Enable the hiz bit if we're doing separate stencil, because it and the
577 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
578 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
579 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
580 * Enable must also be enabled.
581 *
582 * [DevGT]: This field must be set to the same value (enabled or
583 * disabled) as Hierarchical Depth Buffer Enable
584 */
585 bool enable_hiz_ss = hiz || separate_stencil;
586
587
588 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
589 * non-pipelined state that will need the PIPE_CONTROL workaround.
590 */
591 if (brw->gen == 6) {
592 brw_emit_depth_stall_flushes(brw);
593 }
594
595 unsigned int len;
596 if (brw->gen >= 6)
597 len = 7;
598 else if (brw->is_g4x || brw->gen == 5)
599 len = 6;
600 else
601 len = 5;
602
603 BEGIN_BATCH(len);
604 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
605 OUT_BATCH((depth_mt ? depth_mt->pitch - 1 : 0) |
606 (depthbuffer_format << 18) |
607 ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
608 ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
609 (BRW_TILEWALK_YMAJOR << 26) |
610 ((depth_mt ? depth_mt->tiling != I915_TILING_NONE : 1)
611 << 27) |
612 (depth_surface_type << 29));
613
614 if (depth_mt) {
615 OUT_RELOC(depth_mt->bo,
616 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
617 depth_offset);
618 } else {
619 OUT_BATCH(0);
620 }
621
622 OUT_BATCH(((width + tile_x - 1) << 6) |
623 ((height + tile_y - 1) << 19));
624 OUT_BATCH(0);
625
626 if (brw->is_g4x || brw->gen >= 5)
627 OUT_BATCH(tile_x | (tile_y << 16));
628 else
629 assert(tile_x == 0 && tile_y == 0);
630
631 if (brw->gen >= 6)
632 OUT_BATCH(0);
633
634 ADVANCE_BATCH();
635
636 if (hiz || separate_stencil) {
637 /*
638 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
639 * stencil enable' and 'hiz enable' bits were set. Therefore we must
640 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
641 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
642 * failure to do so causes hangs on gen5 and a stall on gen6.
643 */
644
645 /* Emit hiz buffer. */
646 if (hiz) {
647 struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
648 BEGIN_BATCH(3);
649 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
650 OUT_BATCH(hiz_mt->pitch - 1);
651 OUT_RELOC(hiz_mt->bo,
652 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
653 brw->depthstencil.hiz_offset);
654 ADVANCE_BATCH();
655 } else {
656 BEGIN_BATCH(3);
657 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
658 OUT_BATCH(0);
659 OUT_BATCH(0);
660 ADVANCE_BATCH();
661 }
662
663 /* Emit stencil buffer. */
664 if (separate_stencil) {
665 BEGIN_BATCH(3);
666 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
667 /* The stencil buffer has quirky pitch requirements. From Vol 2a,
668 * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
669 * The pitch must be set to 2x the value computed based on width, as
670 * the stencil buffer is stored with two rows interleaved.
671 */
672 OUT_BATCH(2 * stencil_mt->pitch - 1);
673 OUT_RELOC(stencil_mt->bo,
674 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
675 brw->depthstencil.stencil_offset);
676 ADVANCE_BATCH();
677 } else {
678 BEGIN_BATCH(3);
679 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
680 OUT_BATCH(0);
681 OUT_BATCH(0);
682 ADVANCE_BATCH();
683 }
684 }
685
686 /*
687 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
688 * params must be emitted.
689 *
690 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
691 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
692 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
693 */
694 if (brw->gen >= 6 || hiz) {
695 BEGIN_BATCH(2);
696 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
697 GEN5_DEPTH_CLEAR_VALID |
698 (2 - 2));
699 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
700 ADVANCE_BATCH();
701 }
702 }
703
704 const struct brw_tracked_state brw_depthbuffer = {
705 .dirty = {
706 .mesa = _NEW_BUFFERS,
707 .brw = BRW_NEW_BATCH,
708 },
709 .emit = brw_emit_depthbuffer,
710 };
711
712 /**
713 * Polygon stipple packet
714 */
715 static void
716 upload_polygon_stipple(struct brw_context *brw)
717 {
718 struct gl_context *ctx = &brw->ctx;
719 GLuint i;
720
721 /* _NEW_POLYGON */
722 if (!ctx->Polygon.StippleFlag)
723 return;
724
725 BEGIN_BATCH(33);
726 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
727
728 /* Polygon stipple is provided in OpenGL order, i.e. bottom
729 * row first. If we're rendering to a window (i.e. the
730 * default frame buffer object, 0), then we need to invert
731 * it to match our pixel layout. But if we're rendering
732 * to a FBO (i.e. any named frame buffer object), we *don't*
733 * need to invert - we already match the layout.
734 */
735 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
736 for (i = 0; i < 32; i++)
737 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
738 } else {
739 for (i = 0; i < 32; i++)
740 OUT_BATCH(ctx->PolygonStipple[i]);
741 }
742 ADVANCE_BATCH();
743 }
744
745 const struct brw_tracked_state brw_polygon_stipple = {
746 .dirty = {
747 .mesa = _NEW_POLYGON |
748 _NEW_POLYGONSTIPPLE,
749 .brw = BRW_NEW_CONTEXT,
750 },
751 .emit = upload_polygon_stipple
752 };
753
754 /**
755 * Polygon stipple offset packet
756 */
757 static void
758 upload_polygon_stipple_offset(struct brw_context *brw)
759 {
760 struct gl_context *ctx = &brw->ctx;
761
762 /* _NEW_POLYGON */
763 if (!ctx->Polygon.StippleFlag)
764 return;
765
766 BEGIN_BATCH(2);
767 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
768
769 /* _NEW_BUFFERS
770 *
771 * If we're drawing to a system window we have to invert the Y axis
772 * in order to match the OpenGL pixel coordinate system, and our
773 * offset must be matched to the window position. If we're drawing
774 * to a user-created FBO then our native pixel coordinate system
775 * works just fine, and there's no window system to worry about.
776 */
777 if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
778 OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
779 else
780 OUT_BATCH(0);
781 ADVANCE_BATCH();
782 }
783
784 const struct brw_tracked_state brw_polygon_stipple_offset = {
785 .dirty = {
786 .mesa = _NEW_BUFFERS |
787 _NEW_POLYGON,
788 .brw = BRW_NEW_CONTEXT,
789 },
790 .emit = upload_polygon_stipple_offset
791 };
792
793 /**
794 * AA Line parameters
795 */
796 static void
797 upload_aa_line_parameters(struct brw_context *brw)
798 {
799 struct gl_context *ctx = &brw->ctx;
800
801 if (!ctx->Line.SmoothFlag)
802 return;
803
804 /* Original Gen4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
805 if (brw->gen == 4 && !brw->is_g4x)
806 return;
807
808 BEGIN_BATCH(3);
809 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
810 /* use legacy aa line coverage computation */
811 OUT_BATCH(0);
812 OUT_BATCH(0);
813 ADVANCE_BATCH();
814 }
815
816 const struct brw_tracked_state brw_aa_line_parameters = {
817 .dirty = {
818 .mesa = _NEW_LINE,
819 .brw = BRW_NEW_CONTEXT,
820 },
821 .emit = upload_aa_line_parameters
822 };
823
824 /**
825 * Line stipple packet
826 */
827 static void
828 upload_line_stipple(struct brw_context *brw)
829 {
830 struct gl_context *ctx = &brw->ctx;
831 GLfloat tmp;
832 GLint tmpi;
833
834 if (!ctx->Line.StippleFlag)
835 return;
836
837 BEGIN_BATCH(3);
838 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
839 OUT_BATCH(ctx->Line.StipplePattern);
840
841 if (brw->gen >= 7) {
842 /* in U1.16 */
843 tmp = 1.0f / ctx->Line.StippleFactor;
844 tmpi = tmp * (1<<16);
845 OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
846 } else {
847 /* in U1.13 */
848 tmp = 1.0f / ctx->Line.StippleFactor;
849 tmpi = tmp * (1<<13);
850 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
851 }
852
853 ADVANCE_BATCH();
854 }
855
856 const struct brw_tracked_state brw_line_stipple = {
857 .dirty = {
858 .mesa = _NEW_LINE,
859 .brw = BRW_NEW_CONTEXT,
860 },
861 .emit = upload_line_stipple
862 };
863
864 void
865 brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
866 {
867 const bool is_965 = brw->gen == 4 && !brw->is_g4x;
868 const uint32_t _3DSTATE_PIPELINE_SELECT =
869 is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
870
871 if (brw->gen >= 8 && brw->gen < 10) {
872 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
873 *
874 * Software must clear the COLOR_CALC_STATE Valid field in
875 * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
876 * with Pipeline Select set to GPGPU.
877 *
878 * The internal hardware docs recommend the same workaround for Gen9
879 * hardware too.
880 */
881 if (pipeline == BRW_COMPUTE_PIPELINE) {
882 BEGIN_BATCH(2);
883 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
884 OUT_BATCH(0);
885 ADVANCE_BATCH();
886
887 brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
888 }
889
890 } else if (brw->gen >= 6) {
891 /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
892 * PIPELINE_SELECT [DevBWR+]":
893 *
894 * Project: DEVSNB+
895 *
896 * Software must ensure all the write caches are flushed through a
897 * stalling PIPE_CONTROL command followed by another PIPE_CONTROL
898 * command to invalidate read only caches prior to programming
899 * MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
900 */
901 const unsigned dc_flush =
902 brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
903
904 if (brw->gen == 6) {
905 /* Hardware workaround: SNB B-Spec says:
906 *
907 * Before a PIPE_CONTROL with Write Cache Flush Enable = 1, a
908 * PIPE_CONTROL with any non-zero post-sync-op is required.
909 */
910 brw_emit_post_sync_nonzero_flush(brw);
911 }
912
913 brw_emit_pipe_control_flush(brw,
914 PIPE_CONTROL_RENDER_TARGET_FLUSH |
915 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
916 dc_flush |
917 PIPE_CONTROL_NO_WRITE |
918 PIPE_CONTROL_CS_STALL);
919
920 brw_emit_pipe_control_flush(brw,
921 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
922 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
923 PIPE_CONTROL_STATE_CACHE_INVALIDATE |
924 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
925 PIPE_CONTROL_NO_WRITE);
926
927 } else {
928 /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
929 * PIPELINE_SELECT [DevBWR+]":
930 *
931 * Project: PRE-DEVSNB
932 *
933 * Software must ensure the current pipeline is flushed via an
934 * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
935 */
936 BEGIN_BATCH(1);
937 OUT_BATCH(MI_FLUSH);
938 ADVANCE_BATCH();
939 }
940
941 /* Select the pipeline */
942 BEGIN_BATCH(1);
943 OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
944 (brw->gen >= 9 ? (3 << 8) : 0) |
945 (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
946 ADVANCE_BATCH();
947 }
948
949 /**
950 * Misc invariant state packets
951 */
952 void
953 brw_upload_invariant_state(struct brw_context *brw)
954 {
955 const bool is_965 = brw->gen == 4 && !brw->is_g4x;
956
957 brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
958 brw->last_pipeline = BRW_RENDER_PIPELINE;
959
960 if (brw->gen >= 8) {
961 BEGIN_BATCH(3);
962 OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
963 OUT_BATCH(0);
964 OUT_BATCH(0);
965 ADVANCE_BATCH();
966 } else {
967 BEGIN_BATCH(2);
968 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
969 OUT_BATCH(0);
970 ADVANCE_BATCH();
971 }
972
973 const uint32_t _3DSTATE_VF_STATISTICS =
974 is_965 ? GEN4_3DSTATE_VF_STATISTICS : GM45_3DSTATE_VF_STATISTICS;
975 BEGIN_BATCH(1);
976 OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 | 1);
977 ADVANCE_BATCH();
978 }
979
980 const struct brw_tracked_state brw_invariant_state = {
981 .dirty = {
982 .mesa = 0,
983 .brw = BRW_NEW_CONTEXT,
984 },
985 .emit = brw_upload_invariant_state
986 };
987
988 /**
989 * Define the base addresses which some state is referenced from.
990 *
991 * This allows us to avoid having to emit relocations for the objects,
992 * and is actually required for binding table pointers on gen6.
993 *
994 * Surface state base address covers binding table pointers and
995 * surface state objects, but not the surfaces that the surface state
996 * objects point to.
997 */
998 static void
999 upload_state_base_address(struct brw_context *brw)
1000 {
1001 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1002 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1003 * programmed prior to STATE_BASE_ADDRESS.
1004 *
1005 * However, given that the instruction SBA (general state base
1006 * address) on this chipset is always set to 0 across X and GL,
1007 * maybe this isn't required for us in particular.
1008 */
1009
1010 if (brw->gen >= 6) {
1011 uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
1012
1013 BEGIN_BATCH(10);
1014 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
1015 OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
1016 mocs << 4 | /* Stateless Data Port Access Memory Object Control State */
1017 1); /* General State Base Address Modify Enable */
1018 /* Surface state base address:
1019 * BINDING_TABLE_STATE
1020 * SURFACE_STATE
1021 */
1022 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
1023 /* Dynamic state base address:
1024 * SAMPLER_STATE
1025 * SAMPLER_BORDER_COLOR_STATE
1026 * CLIP, SF, WM/CC viewport state
1027 * COLOR_CALC_STATE
1028 * DEPTH_STENCIL_STATE
1029 * BLEND_STATE
1030 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1031 * Disable is clear, which we rely on)
1032 */
1033 OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
1034 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
1035
1036 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1037 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1038 1); /* Instruction base address: shader kernels (incl. SIP) */
1039
1040 OUT_BATCH(1); /* General state upper bound */
1041 /* Dynamic state upper bound. Although the documentation says that
1042 * programming it to zero will cause it to be ignored, that is a lie.
1043 * If this isn't programmed to a real bound, the sampler border color
1044 * pointer is rejected, causing border color to mysteriously fail.
1045 */
1046 OUT_BATCH(0xfffff001);
1047 OUT_BATCH(1); /* Indirect object upper bound */
1048 OUT_BATCH(1); /* Instruction access upper bound */
1049 ADVANCE_BATCH();
1050 } else if (brw->gen == 5) {
1051 BEGIN_BATCH(8);
1052 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
1053 OUT_BATCH(1); /* General state base address */
1054 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1055 1); /* Surface state base address */
1056 OUT_BATCH(1); /* Indirect object base address */
1057 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1058 1); /* Instruction base address */
1059 OUT_BATCH(0xfffff001); /* General state upper bound */
1060 OUT_BATCH(1); /* Indirect object upper bound */
1061 OUT_BATCH(1); /* Instruction access upper bound */
1062 ADVANCE_BATCH();
1063 } else {
1064 BEGIN_BATCH(6);
1065 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
1066 OUT_BATCH(1); /* General state base address */
1067 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1068 1); /* Surface state base address */
1069 OUT_BATCH(1); /* Indirect object base address */
1070 OUT_BATCH(1); /* General state upper bound */
1071 OUT_BATCH(1); /* Indirect object upper bound */
1072 ADVANCE_BATCH();
1073 }
1074
1075 /* According to section 3.6.1 of VOL1 of the 965 PRM,
1076 * STATE_BASE_ADDRESS updates require a reissue of:
1077 *
1078 * 3DSTATE_PIPELINE_POINTERS
1079 * 3DSTATE_BINDING_TABLE_POINTERS
1080 * MEDIA_STATE_POINTERS
1081 *
1082 * and this continues through Ironlake. The Sandy Bridge PRM, vol
1083 * 1 part 1 says that the folowing packets must be reissued:
1084 *
1085 * 3DSTATE_CC_POINTERS
1086 * 3DSTATE_BINDING_TABLE_POINTERS
1087 * 3DSTATE_SAMPLER_STATE_POINTERS
1088 * 3DSTATE_VIEWPORT_STATE_POINTERS
1089 * MEDIA_STATE_POINTERS
1090 *
1091 * Those are always reissued following SBA updates anyway (new
1092 * batch time), except in the case of the program cache BO
1093 * changing. Having a separate state flag makes the sequence more
1094 * obvious.
1095 */
1096
1097 brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
1098 }
1099
1100 const struct brw_tracked_state brw_state_base_address = {
1101 .dirty = {
1102 .mesa = 0,
1103 .brw = BRW_NEW_BATCH |
1104 BRW_NEW_PROGRAM_CACHE,
1105 },
1106 .emit = upload_state_base_address
1107 };