i965: Remove brw_context::depthstencil::hiz_mt
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 #include "main/fbobject.h"
44 #include "main/glformats.h"
45
46 /* Constant single cliprect for framebuffer object or DRI2 drawing */
47 static void upload_drawing_rect(struct brw_context *brw)
48 {
49 struct intel_context *intel = &brw->intel;
50 struct gl_context *ctx = &intel->ctx;
51
52 BEGIN_BATCH(4);
53 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
54 OUT_BATCH(0); /* xmin, ymin */
55 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
56 ((ctx->DrawBuffer->Height - 1) << 16));
57 OUT_BATCH(0);
58 ADVANCE_BATCH();
59 }
60
61 const struct brw_tracked_state brw_drawing_rect = {
62 .dirty = {
63 .mesa = _NEW_BUFFERS,
64 .brw = BRW_NEW_CONTEXT,
65 .cache = 0
66 },
67 .emit = upload_drawing_rect
68 };
69
70 /**
71 * Upload the binding table pointers, which point each stage's array of surface
72 * state pointers.
73 *
74 * The binding table pointers are relative to the surface state base address,
75 * which points at the batchbuffer containing the streamed batch state.
76 */
77 static void upload_binding_table_pointers(struct brw_context *brw)
78 {
79 struct intel_context *intel = &brw->intel;
80
81 BEGIN_BATCH(6);
82 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
83 OUT_BATCH(brw->vs.bind_bo_offset);
84 OUT_BATCH(0); /* gs */
85 OUT_BATCH(0); /* clip */
86 OUT_BATCH(0); /* sf */
87 OUT_BATCH(brw->wm.bind_bo_offset);
88 ADVANCE_BATCH();
89 }
90
91 const struct brw_tracked_state brw_binding_table_pointers = {
92 .dirty = {
93 .mesa = 0,
94 .brw = (BRW_NEW_BATCH |
95 BRW_NEW_STATE_BASE_ADDRESS |
96 BRW_NEW_VS_BINDING_TABLE |
97 BRW_NEW_GS_BINDING_TABLE |
98 BRW_NEW_PS_BINDING_TABLE),
99 .cache = 0,
100 },
101 .emit = upload_binding_table_pointers,
102 };
103
104 /**
105 * Upload the binding table pointers, which point each stage's array of surface
106 * state pointers.
107 *
108 * The binding table pointers are relative to the surface state base address,
109 * which points at the batchbuffer containing the streamed batch state.
110 */
111 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
112 {
113 struct intel_context *intel = &brw->intel;
114
115 BEGIN_BATCH(4);
116 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
117 GEN6_BINDING_TABLE_MODIFY_VS |
118 GEN6_BINDING_TABLE_MODIFY_GS |
119 GEN6_BINDING_TABLE_MODIFY_PS |
120 (4 - 2));
121 OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
122 OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
123 OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
124 ADVANCE_BATCH();
125 }
126
127 const struct brw_tracked_state gen6_binding_table_pointers = {
128 .dirty = {
129 .mesa = 0,
130 .brw = (BRW_NEW_BATCH |
131 BRW_NEW_STATE_BASE_ADDRESS |
132 BRW_NEW_VS_BINDING_TABLE |
133 BRW_NEW_GS_BINDING_TABLE |
134 BRW_NEW_PS_BINDING_TABLE),
135 .cache = 0,
136 },
137 .emit = upload_gen6_binding_table_pointers,
138 };
139
140 /**
141 * Upload pointers to the per-stage state.
142 *
143 * The state pointers in this packet are all relative to the general state
144 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
145 */
146 static void upload_pipelined_state_pointers(struct brw_context *brw )
147 {
148 struct intel_context *intel = &brw->intel;
149
150 if (intel->gen == 5) {
151 /* Need to flush before changing clip max threads for errata. */
152 BEGIN_BATCH(1);
153 OUT_BATCH(MI_FLUSH);
154 ADVANCE_BATCH();
155 }
156
157 BEGIN_BATCH(7);
158 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
159 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160 brw->vs.state_offset);
161 if (brw->gs.prog_active)
162 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
163 brw->gs.state_offset | 1);
164 else
165 OUT_BATCH(0);
166 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
167 brw->clip.state_offset | 1);
168 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
169 brw->sf.state_offset);
170 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
171 brw->wm.state_offset);
172 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
173 brw->cc.state_offset);
174 ADVANCE_BATCH();
175
176 brw->state.dirty.brw |= BRW_NEW_PSP;
177 }
178
179 static void upload_psp_urb_cbs(struct brw_context *brw )
180 {
181 upload_pipelined_state_pointers(brw);
182 brw_upload_urb_fence(brw);
183 brw_upload_cs_urb_state(brw);
184 }
185
186 const struct brw_tracked_state brw_psp_urb_cbs = {
187 .dirty = {
188 .mesa = 0,
189 .brw = (BRW_NEW_URB_FENCE |
190 BRW_NEW_BATCH |
191 BRW_NEW_STATE_BASE_ADDRESS),
192 .cache = (CACHE_NEW_VS_UNIT |
193 CACHE_NEW_GS_UNIT |
194 CACHE_NEW_GS_PROG |
195 CACHE_NEW_CLIP_UNIT |
196 CACHE_NEW_SF_UNIT |
197 CACHE_NEW_WM_UNIT |
198 CACHE_NEW_CC_UNIT)
199 },
200 .emit = upload_psp_urb_cbs,
201 };
202
203 uint32_t
204 brw_depthbuffer_format(struct brw_context *brw)
205 {
206 struct intel_context *intel = &brw->intel;
207 struct gl_context *ctx = &intel->ctx;
208 struct gl_framebuffer *fb = ctx->DrawBuffer;
209 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
210 struct intel_renderbuffer *srb;
211
212 if (!drb &&
213 (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
214 !srb->mt->stencil_mt &&
215 (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
216 intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
217 drb = srb;
218 }
219
220 if (!drb)
221 return BRW_DEPTHFORMAT_D32_FLOAT;
222
223 switch (drb->mt->format) {
224 case MESA_FORMAT_Z16:
225 return BRW_DEPTHFORMAT_D16_UNORM;
226 case MESA_FORMAT_Z32_FLOAT:
227 return BRW_DEPTHFORMAT_D32_FLOAT;
228 case MESA_FORMAT_X8_Z24:
229 if (intel->gen >= 6) {
230 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
231 } else {
232 /* Use D24_UNORM_S8, not D24_UNORM_X8.
233 *
234 * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
235 * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
236 * 3DSTATE_DEPTH_BUFFER.Surface_Format).
237 *
238 * However, on Gen5, D24_UNORM_X8 may be used only if separate
239 * stencil is enabled, and we never enable it. From the Ironlake PRM,
240 * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
241 * If this field is disabled, the Surface Format of the depth
242 * buffer cannot be D24_UNORM_X8_UINT.
243 */
244 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
245 }
246 case MESA_FORMAT_S8_Z24:
247 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
248 case MESA_FORMAT_Z32_FLOAT_X24S8:
249 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
250 default:
251 _mesa_problem(ctx, "Unexpected depth format %s\n",
252 _mesa_get_format_name(intel_rb_format(drb)));
253 return BRW_DEPTHFORMAT_D16_UNORM;
254 }
255 }
256
257 /**
258 * Returns the mask of how many bits of x and y must be handled through the
259 * depthbuffer's draw offset x and y fields.
260 *
261 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
262 * between the depth, hiz, and stencil buffers. Because it can be hard to get
263 * all 3 to agree on this value, we want to do as much drawing offset
264 * adjustment as possible by moving the base offset of the 3 buffers, which is
265 * restricted to tile boundaries.
266 *
267 * For each buffer, the remainder must be applied through the x/y draw offset.
268 * This returns the worst-case mask of the low bits that have to go into the
269 * packet. If the 3 buffers don't agree on the drawing offset ANDed with this
270 * mask, then we're in trouble.
271 */
272 void
273 brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
274 uint32_t depth_level,
275 uint32_t depth_layer,
276 struct intel_mipmap_tree *stencil_mt,
277 uint32_t *out_tile_mask_x,
278 uint32_t *out_tile_mask_y)
279 {
280 uint32_t tile_mask_x = 0, tile_mask_y = 0;
281
282 if (depth_mt) {
283 intel_region_get_tile_masks(depth_mt->region,
284 &tile_mask_x, &tile_mask_y, false);
285
286 if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) {
287 uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
288 intel_region_get_tile_masks(depth_mt->hiz_mt->region,
289 &hiz_tile_mask_x, &hiz_tile_mask_y, false);
290
291 /* Each HiZ row represents 2 rows of pixels */
292 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
293
294 tile_mask_x |= hiz_tile_mask_x;
295 tile_mask_y |= hiz_tile_mask_y;
296 }
297 }
298
299 if (stencil_mt) {
300 if (stencil_mt->stencil_mt)
301 stencil_mt = stencil_mt->stencil_mt;
302
303 if (stencil_mt->format == MESA_FORMAT_S8) {
304 /* Separate stencil buffer uses 64x64 tiles. */
305 tile_mask_x |= 63;
306 tile_mask_y |= 63;
307 } else {
308 uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
309 intel_region_get_tile_masks(stencil_mt->region,
310 &stencil_tile_mask_x,
311 &stencil_tile_mask_y, false);
312
313 tile_mask_x |= stencil_tile_mask_x;
314 tile_mask_y |= stencil_tile_mask_y;
315 }
316 }
317
318 *out_tile_mask_x = tile_mask_x;
319 *out_tile_mask_y = tile_mask_y;
320 }
321
322 static struct intel_mipmap_tree *
323 get_stencil_miptree(struct intel_renderbuffer *irb)
324 {
325 if (!irb)
326 return NULL;
327 if (irb->mt->stencil_mt)
328 return irb->mt->stencil_mt;
329 return irb->mt;
330 }
331
332 void
333 brw_workaround_depthstencil_alignment(struct brw_context *brw,
334 GLbitfield clear_mask)
335 {
336 struct intel_context *intel = &brw->intel;
337 struct gl_context *ctx = &intel->ctx;
338 struct gl_framebuffer *fb = ctx->DrawBuffer;
339 bool rebase_depth = false;
340 bool rebase_stencil = false;
341 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
342 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
343 struct intel_mipmap_tree *depth_mt = NULL;
344 struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
345 uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
346 uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
347 bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
348 bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
349
350 if (depth_irb)
351 depth_mt = depth_irb->mt;
352
353 /* Check if depth buffer is in depth/stencil format. If so, then it's only
354 * safe to invalidate it if we're also clearing stencil, and both depth_irb
355 * and stencil_irb point to the same miptree.
356 *
357 * Note: it's not sufficient to check for the case where
358 * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
359 * because this fails to catch depth/stencil buffers on hardware that uses
360 * separate stencil. To catch that case, we check whether
361 * depth_mt->stencil_mt is non-NULL.
362 */
363 if (depth_irb && invalidate_depth &&
364 (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
365 depth_mt->stencil_mt)) {
366 invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
367 && depth_irb->mt == stencil_irb->mt;
368 }
369
370 uint32_t tile_mask_x, tile_mask_y;
371 brw_get_depthstencil_tile_masks(depth_mt,
372 depth_mt ? depth_irb->mt_level : 0,
373 depth_mt ? depth_irb->mt_layer : 0,
374 stencil_mt,
375 &tile_mask_x, &tile_mask_y);
376
377 if (depth_irb) {
378 tile_x = depth_irb->draw_x & tile_mask_x;
379 tile_y = depth_irb->draw_y & tile_mask_y;
380
381 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
382 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
383 * Coordinate Offset X/Y":
384 *
385 * "The 3 LSBs of both offsets must be zero to ensure correct
386 * alignment"
387 */
388 if (tile_x & 7 || tile_y & 7)
389 rebase_depth = true;
390
391 /* We didn't even have intra-tile offsets before g45. */
392 if (intel->gen == 4 && !intel->is_g4x) {
393 if (tile_x || tile_y)
394 rebase_depth = true;
395 }
396
397 if (rebase_depth) {
398 perf_debug("HW workaround: blitting depth level %d to a temporary "
399 "to fix alignment (depth tile offset %d,%d)\n",
400 depth_irb->mt_level, tile_x, tile_y);
401 intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth);
402 /* In the case of stencil_irb being the same packed depth/stencil
403 * texture but not the same rb, make it point at our rebased mt, too.
404 */
405 if (stencil_irb &&
406 stencil_irb != depth_irb &&
407 stencil_irb->mt == depth_mt) {
408 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
409 intel_renderbuffer_set_draw_offset(stencil_irb);
410 }
411
412 stencil_mt = get_stencil_miptree(stencil_irb);
413
414 tile_x = depth_irb->draw_x & tile_mask_x;
415 tile_y = depth_irb->draw_y & tile_mask_y;
416 }
417
418 if (stencil_irb) {
419 stencil_mt = get_stencil_miptree(stencil_irb);
420 intel_miptree_get_image_offset(stencil_mt,
421 stencil_irb->mt_level,
422 stencil_irb->mt_layer,
423 &stencil_draw_x, &stencil_draw_y);
424 int stencil_tile_x = stencil_draw_x & tile_mask_x;
425 int stencil_tile_y = stencil_draw_y & tile_mask_y;
426
427 /* If stencil doesn't match depth, then we'll need to rebase stencil
428 * as well. (if we hadn't decided to rebase stencil before, the
429 * post-stencil depth test will also rebase depth to try to match it
430 * up).
431 */
432 if (tile_x != stencil_tile_x ||
433 tile_y != stencil_tile_y) {
434 rebase_stencil = true;
435 }
436 }
437 }
438
439 /* If we have (just) stencil, check it for ignored low bits as well */
440 if (stencil_irb) {
441 intel_miptree_get_image_offset(stencil_mt,
442 stencil_irb->mt_level,
443 stencil_irb->mt_layer,
444 &stencil_draw_x, &stencil_draw_y);
445 stencil_tile_x = stencil_draw_x & tile_mask_x;
446 stencil_tile_y = stencil_draw_y & tile_mask_y;
447
448 if (stencil_tile_x & 7 || stencil_tile_y & 7)
449 rebase_stencil = true;
450
451 if (intel->gen == 4 && !intel->is_g4x) {
452 if (stencil_tile_x || stencil_tile_y)
453 rebase_stencil = true;
454 }
455 }
456
457 if (rebase_stencil) {
458 perf_debug("HW workaround: blitting stencil level %d to a temporary "
459 "to fix alignment (stencil tile offset %d,%d)\n",
460 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
461
462 intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil);
463 stencil_mt = get_stencil_miptree(stencil_irb);
464
465 intel_miptree_get_image_offset(stencil_mt,
466 stencil_irb->mt_level,
467 stencil_irb->mt_layer,
468 &stencil_draw_x, &stencil_draw_y);
469 stencil_tile_x = stencil_draw_x & tile_mask_x;
470 stencil_tile_y = stencil_draw_y & tile_mask_y;
471
472 if (depth_irb && depth_irb->mt == stencil_irb->mt) {
473 intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
474 intel_renderbuffer_set_draw_offset(depth_irb);
475 } else if (depth_irb && !rebase_depth) {
476 if (tile_x != stencil_tile_x ||
477 tile_y != stencil_tile_y) {
478 perf_debug("HW workaround: blitting depth level %d to a temporary "
479 "to match stencil level %d alignment (depth tile offset "
480 "%d,%d, stencil offset %d,%d)\n",
481 depth_irb->mt_level,
482 stencil_irb->mt_level,
483 tile_x, tile_y,
484 stencil_tile_x, stencil_tile_y);
485
486 intel_renderbuffer_move_to_temp(intel, depth_irb,
487 invalidate_depth);
488
489 tile_x = depth_irb->draw_x & tile_mask_x;
490 tile_y = depth_irb->draw_y & tile_mask_y;
491
492 if (stencil_irb && stencil_irb->mt == depth_mt) {
493 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
494 intel_renderbuffer_set_draw_offset(stencil_irb);
495 }
496
497 WARN_ONCE(stencil_tile_x != tile_x ||
498 stencil_tile_y != tile_y,
499 "Rebased stencil tile offset (%d,%d) doesn't match depth "
500 "tile offset (%d,%d).\n",
501 stencil_tile_x, stencil_tile_y,
502 tile_x, tile_y);
503 }
504 }
505 }
506
507 if (!depth_irb) {
508 tile_x = stencil_tile_x;
509 tile_y = stencil_tile_y;
510 }
511
512 /* While we just tried to get everything aligned, we may have failed to do
513 * so in the case of rendering to array or 3D textures, where nonzero faces
514 * will still have an offset post-rebase. At least give an informative
515 * warning.
516 */
517 WARN_ONCE((tile_x & 7) || (tile_y & 7),
518 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
519 "Truncating offset, bad rendering may occur.\n");
520 tile_x &= ~7;
521 tile_y &= ~7;
522
523 /* Now, after rebasing, save off the new dephtstencil state so the hardware
524 * packets can just dereference that without re-calculating tile offsets.
525 */
526 brw->depthstencil.tile_x = tile_x;
527 brw->depthstencil.tile_y = tile_y;
528 brw->depthstencil.depth_offset = 0;
529 brw->depthstencil.stencil_offset = 0;
530 brw->depthstencil.hiz_offset = 0;
531 brw->depthstencil.depth_mt = NULL;
532 brw->depthstencil.stencil_mt = NULL;
533 if (depth_irb) {
534 depth_mt = depth_irb->mt;
535 brw->depthstencil.depth_mt = depth_mt;
536 brw->depthstencil.depth_offset =
537 intel_region_get_aligned_offset(depth_mt->region,
538 depth_irb->draw_x & ~tile_mask_x,
539 depth_irb->draw_y & ~tile_mask_y,
540 false);
541 if (intel_renderbuffer_has_hiz(depth_irb)) {
542 brw->depthstencil.hiz_offset =
543 intel_region_get_aligned_offset(depth_mt->region,
544 depth_irb->draw_x & ~tile_mask_x,
545 (depth_irb->draw_y & ~tile_mask_y) /
546 2,
547 false);
548 }
549 }
550 if (stencil_irb) {
551 stencil_mt = get_stencil_miptree(stencil_irb);
552
553 brw->depthstencil.stencil_mt = stencil_mt;
554 if (stencil_mt->format == MESA_FORMAT_S8) {
555 /* Note: we can't compute the stencil offset using
556 * intel_region_get_aligned_offset(), because stencil_region claims
557 * that the region is untiled even though it's W tiled.
558 */
559 brw->depthstencil.stencil_offset =
560 (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
561 (stencil_draw_x & ~tile_mask_x) * 64;
562 }
563 }
564 }
565
566 void
567 brw_emit_depthbuffer(struct brw_context *brw)
568 {
569 struct intel_context *intel = &brw->intel;
570 struct gl_context *ctx = &intel->ctx;
571 struct gl_framebuffer *fb = ctx->DrawBuffer;
572 /* _NEW_BUFFERS */
573 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
574 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
575 struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
576 struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
577 uint32_t tile_x = brw->depthstencil.tile_x;
578 uint32_t tile_y = brw->depthstencil.tile_y;
579 bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
580 bool separate_stencil = false;
581 uint32_t depth_surface_type = BRW_SURFACE_NULL;
582 uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
583 uint32_t depth_offset = 0;
584 uint32_t width = 1, height = 1;
585
586 if (stencil_mt) {
587 separate_stencil = stencil_mt->format == MESA_FORMAT_S8;
588
589 /* Gen7 supports only separate stencil */
590 assert(separate_stencil || intel->gen < 7);
591 }
592
593 /* If there's a packed depth/stencil bound to stencil only, we need to
594 * emit the packed depth/stencil buffer packet.
595 */
596 if (!depth_irb && stencil_irb && !separate_stencil) {
597 depth_irb = stencil_irb;
598 depth_mt = stencil_mt;
599 }
600
601 if (depth_irb) {
602 struct intel_region *region = depth_mt->region;
603
604 /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
605 * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
606 * depthstencil format.
607 *
608 * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
609 * set to the same value. Gens after 7 implicitly always set
610 * Separate_Stencil_Enable; software cannot disable it.
611 */
612 if ((intel->gen < 7 && hiz) || intel->gen >= 7) {
613 assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
614 }
615
616 /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
617 assert(intel->gen >= 7 || !separate_stencil || hiz);
618
619 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
620 assert(!hiz || region->tiling == I915_TILING_Y);
621
622 depthbuffer_format = brw_depthbuffer_format(brw);
623 depth_surface_type = BRW_SURFACE_2D;
624 depth_offset = brw->depthstencil.depth_offset;
625 width = depth_irb->Base.Base.Width;
626 height = depth_irb->Base.Base.Height;
627 } else if (separate_stencil) {
628 /*
629 * There exists a separate stencil buffer but no depth buffer.
630 *
631 * The stencil buffer inherits most of its fields from
632 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
633 * height.
634 *
635 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
636 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
637 * [DevGT+]: This field must be set to TRUE.
638 */
639 assert(intel->has_separate_stencil);
640
641 depth_surface_type = BRW_SURFACE_2D;
642 width = stencil_irb->Base.Base.Width;
643 height = stencil_irb->Base.Base.Height;
644 }
645
646 intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
647 depthbuffer_format, depth_surface_type,
648 stencil_mt, hiz, separate_stencil,
649 width, height, tile_x, tile_y);
650 }
651
652 void
653 brw_emit_depth_stencil_hiz(struct brw_context *brw,
654 struct intel_mipmap_tree *depth_mt,
655 uint32_t depth_offset, uint32_t depthbuffer_format,
656 uint32_t depth_surface_type,
657 struct intel_mipmap_tree *stencil_mt,
658 bool hiz, bool separate_stencil,
659 uint32_t width, uint32_t height,
660 uint32_t tile_x, uint32_t tile_y)
661 {
662 struct intel_context *intel = &brw->intel;
663
664 /* Enable the hiz bit if we're doing separate stencil, because it and the
665 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
666 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
667 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
668 * Enable must also be enabled.
669 *
670 * [DevGT]: This field must be set to the same value (enabled or
671 * disabled) as Hierarchical Depth Buffer Enable
672 */
673 bool enable_hiz_ss = hiz || separate_stencil;
674
675
676 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
677 * non-pipelined state that will need the PIPE_CONTROL workaround.
678 */
679 if (intel->gen == 6) {
680 intel_emit_post_sync_nonzero_flush(intel);
681 intel_emit_depth_stall_flushes(intel);
682 }
683
684 unsigned int len;
685 if (intel->gen >= 6)
686 len = 7;
687 else if (intel->is_g4x || intel->gen == 5)
688 len = 6;
689 else
690 len = 5;
691
692 BEGIN_BATCH(len);
693 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
694 OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
695 (depthbuffer_format << 18) |
696 ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
697 ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
698 (BRW_TILEWALK_YMAJOR << 26) |
699 ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
700 << 27) |
701 (depth_surface_type << 29));
702
703 if (depth_mt) {
704 OUT_RELOC(depth_mt->region->bo,
705 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
706 depth_offset);
707 } else {
708 OUT_BATCH(0);
709 }
710
711 OUT_BATCH(((width + tile_x - 1) << 6) |
712 ((height + tile_y - 1) << 19));
713 OUT_BATCH(0);
714
715 if (intel->is_g4x || intel->gen >= 5)
716 OUT_BATCH(tile_x | (tile_y << 16));
717 else
718 assert(tile_x == 0 && tile_y == 0);
719
720 if (intel->gen >= 6)
721 OUT_BATCH(0);
722
723 ADVANCE_BATCH();
724
725 if (hiz || separate_stencil) {
726 /*
727 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
728 * stencil enable' and 'hiz enable' bits were set. Therefore we must
729 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
730 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
731 * failure to do so causes hangs on gen5 and a stall on gen6.
732 */
733
734 /* Emit hiz buffer. */
735 if (hiz) {
736 struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
737 BEGIN_BATCH(3);
738 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
739 OUT_BATCH(hiz_mt->region->pitch - 1);
740 OUT_RELOC(hiz_mt->region->bo,
741 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
742 brw->depthstencil.hiz_offset);
743 ADVANCE_BATCH();
744 } else {
745 BEGIN_BATCH(3);
746 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
747 OUT_BATCH(0);
748 OUT_BATCH(0);
749 ADVANCE_BATCH();
750 }
751
752 /* Emit stencil buffer. */
753 if (separate_stencil) {
754 struct intel_region *region = stencil_mt->region;
755
756 BEGIN_BATCH(3);
757 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
758 /* The stencil buffer has quirky pitch requirements. From Vol 2a,
759 * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
760 * The pitch must be set to 2x the value computed based on width, as
761 * the stencil buffer is stored with two rows interleaved.
762 */
763 OUT_BATCH(2 * region->pitch - 1);
764 OUT_RELOC(region->bo,
765 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
766 brw->depthstencil.stencil_offset);
767 ADVANCE_BATCH();
768 } else {
769 BEGIN_BATCH(3);
770 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
771 OUT_BATCH(0);
772 OUT_BATCH(0);
773 ADVANCE_BATCH();
774 }
775 }
776
777 /*
778 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
779 * params must be emitted.
780 *
781 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
782 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
783 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
784 */
785 if (intel->gen >= 6 || hiz) {
786 if (intel->gen == 6)
787 intel_emit_post_sync_nonzero_flush(intel);
788
789 BEGIN_BATCH(2);
790 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
791 GEN5_DEPTH_CLEAR_VALID |
792 (2 - 2));
793 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
794 ADVANCE_BATCH();
795 }
796 }
797
798 const struct brw_tracked_state brw_depthbuffer = {
799 .dirty = {
800 .mesa = _NEW_BUFFERS,
801 .brw = BRW_NEW_BATCH,
802 .cache = 0,
803 },
804 .emit = brw_emit_depthbuffer,
805 };
806
807
808
809 /***********************************************************************
810 * Polygon stipple packet
811 */
812
813 static void upload_polygon_stipple(struct brw_context *brw)
814 {
815 struct intel_context *intel = &brw->intel;
816 struct gl_context *ctx = &brw->intel.ctx;
817 GLuint i;
818
819 /* _NEW_POLYGON */
820 if (!ctx->Polygon.StippleFlag)
821 return;
822
823 if (intel->gen == 6)
824 intel_emit_post_sync_nonzero_flush(intel);
825
826 BEGIN_BATCH(33);
827 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
828
829 /* Polygon stipple is provided in OpenGL order, i.e. bottom
830 * row first. If we're rendering to a window (i.e. the
831 * default frame buffer object, 0), then we need to invert
832 * it to match our pixel layout. But if we're rendering
833 * to a FBO (i.e. any named frame buffer object), we *don't*
834 * need to invert - we already match the layout.
835 */
836 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
837 for (i = 0; i < 32; i++)
838 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
839 }
840 else {
841 for (i = 0; i < 32; i++)
842 OUT_BATCH(ctx->PolygonStipple[i]);
843 }
844 CACHED_BATCH();
845 }
846
847 const struct brw_tracked_state brw_polygon_stipple = {
848 .dirty = {
849 .mesa = (_NEW_POLYGONSTIPPLE |
850 _NEW_POLYGON),
851 .brw = BRW_NEW_CONTEXT,
852 .cache = 0
853 },
854 .emit = upload_polygon_stipple
855 };
856
857
858 /***********************************************************************
859 * Polygon stipple offset packet
860 */
861
862 static void upload_polygon_stipple_offset(struct brw_context *brw)
863 {
864 struct intel_context *intel = &brw->intel;
865 struct gl_context *ctx = &brw->intel.ctx;
866
867 /* _NEW_POLYGON */
868 if (!ctx->Polygon.StippleFlag)
869 return;
870
871 if (intel->gen == 6)
872 intel_emit_post_sync_nonzero_flush(intel);
873
874 BEGIN_BATCH(2);
875 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
876
877 /* _NEW_BUFFERS
878 *
879 * If we're drawing to a system window we have to invert the Y axis
880 * in order to match the OpenGL pixel coordinate system, and our
881 * offset must be matched to the window position. If we're drawing
882 * to a user-created FBO then our native pixel coordinate system
883 * works just fine, and there's no window system to worry about.
884 */
885 if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
886 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
887 else
888 OUT_BATCH(0);
889 CACHED_BATCH();
890 }
891
892 const struct brw_tracked_state brw_polygon_stipple_offset = {
893 .dirty = {
894 .mesa = (_NEW_BUFFERS |
895 _NEW_POLYGON),
896 .brw = BRW_NEW_CONTEXT,
897 .cache = 0
898 },
899 .emit = upload_polygon_stipple_offset
900 };
901
902 /**********************************************************************
903 * AA Line parameters
904 */
905 static void upload_aa_line_parameters(struct brw_context *brw)
906 {
907 struct intel_context *intel = &brw->intel;
908 struct gl_context *ctx = &brw->intel.ctx;
909
910 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
911 return;
912
913 if (intel->gen == 6)
914 intel_emit_post_sync_nonzero_flush(intel);
915
916 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
917 /* use legacy aa line coverage computation */
918 OUT_BATCH(0);
919 OUT_BATCH(0);
920 CACHED_BATCH();
921 }
922
923 const struct brw_tracked_state brw_aa_line_parameters = {
924 .dirty = {
925 .mesa = _NEW_LINE,
926 .brw = BRW_NEW_CONTEXT,
927 .cache = 0
928 },
929 .emit = upload_aa_line_parameters
930 };
931
932 /***********************************************************************
933 * Line stipple packet
934 */
935
936 static void upload_line_stipple(struct brw_context *brw)
937 {
938 struct intel_context *intel = &brw->intel;
939 struct gl_context *ctx = &brw->intel.ctx;
940 GLfloat tmp;
941 GLint tmpi;
942
943 if (!ctx->Line.StippleFlag)
944 return;
945
946 if (intel->gen == 6)
947 intel_emit_post_sync_nonzero_flush(intel);
948
949 BEGIN_BATCH(3);
950 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
951 OUT_BATCH(ctx->Line.StipplePattern);
952 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
953 tmpi = tmp * (1<<13);
954 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
955 CACHED_BATCH();
956 }
957
958 const struct brw_tracked_state brw_line_stipple = {
959 .dirty = {
960 .mesa = _NEW_LINE,
961 .brw = BRW_NEW_CONTEXT,
962 .cache = 0
963 },
964 .emit = upload_line_stipple
965 };
966
967
968 /***********************************************************************
969 * Misc invariant state packets
970 */
971
972 static void upload_invariant_state( struct brw_context *brw )
973 {
974 struct intel_context *intel = &brw->intel;
975
976 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
977 if (intel->gen == 6)
978 intel_emit_post_sync_nonzero_flush(intel);
979
980 /* Select the 3D pipeline (as opposed to media) */
981 BEGIN_BATCH(1);
982 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
983 ADVANCE_BATCH();
984
985 if (intel->gen < 6) {
986 /* Disable depth offset clamping. */
987 BEGIN_BATCH(2);
988 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
989 OUT_BATCH_F(0.0);
990 ADVANCE_BATCH();
991 }
992
993 if (intel->gen == 6) {
994 int i;
995
996 for (i = 0; i < 4; i++) {
997 BEGIN_BATCH(4);
998 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
999 OUT_BATCH(i << SVB_INDEX_SHIFT);
1000 OUT_BATCH(0);
1001 OUT_BATCH(0xffffffff);
1002 ADVANCE_BATCH();
1003 }
1004 }
1005
1006 BEGIN_BATCH(2);
1007 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
1008 OUT_BATCH(0);
1009 ADVANCE_BATCH();
1010
1011 BEGIN_BATCH(1);
1012 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
1013 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
1014 ADVANCE_BATCH();
1015 }
1016
1017 const struct brw_tracked_state brw_invariant_state = {
1018 .dirty = {
1019 .mesa = 0,
1020 .brw = BRW_NEW_CONTEXT,
1021 .cache = 0
1022 },
1023 .emit = upload_invariant_state
1024 };
1025
1026 /**
1027 * Define the base addresses which some state is referenced from.
1028 *
1029 * This allows us to avoid having to emit relocations for the objects,
1030 * and is actually required for binding table pointers on gen6.
1031 *
1032 * Surface state base address covers binding table pointers and
1033 * surface state objects, but not the surfaces that the surface state
1034 * objects point to.
1035 */
1036 static void upload_state_base_address( struct brw_context *brw )
1037 {
1038 struct intel_context *intel = &brw->intel;
1039
1040 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1041 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1042 * programmed prior to STATE_BASE_ADDRESS.
1043 *
1044 * However, given that the instruction SBA (general state base
1045 * address) on this chipset is always set to 0 across X and GL,
1046 * maybe this isn't required for us in particular.
1047 */
1048
1049 if (intel->gen >= 6) {
1050 if (intel->gen == 6)
1051 intel_emit_post_sync_nonzero_flush(intel);
1052
1053 BEGIN_BATCH(10);
1054 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
1055 /* General state base address: stateless DP read/write requests */
1056 OUT_BATCH(1);
1057 /* Surface state base address:
1058 * BINDING_TABLE_STATE
1059 * SURFACE_STATE
1060 */
1061 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
1062 /* Dynamic state base address:
1063 * SAMPLER_STATE
1064 * SAMPLER_BORDER_COLOR_STATE
1065 * CLIP, SF, WM/CC viewport state
1066 * COLOR_CALC_STATE
1067 * DEPTH_STENCIL_STATE
1068 * BLEND_STATE
1069 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1070 * Disable is clear, which we rely on)
1071 */
1072 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
1073 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
1074
1075 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1076 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1077 1); /* Instruction base address: shader kernels (incl. SIP) */
1078
1079 OUT_BATCH(1); /* General state upper bound */
1080 /* Dynamic state upper bound. Although the documentation says that
1081 * programming it to zero will cause it to be ignored, that is a lie.
1082 * If this isn't programmed to a real bound, the sampler border color
1083 * pointer is rejected, causing border color to mysteriously fail.
1084 */
1085 OUT_BATCH(0xfffff001);
1086 OUT_BATCH(1); /* Indirect object upper bound */
1087 OUT_BATCH(1); /* Instruction access upper bound */
1088 ADVANCE_BATCH();
1089 } else if (intel->gen == 5) {
1090 BEGIN_BATCH(8);
1091 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
1092 OUT_BATCH(1); /* General state base address */
1093 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1094 1); /* Surface state base address */
1095 OUT_BATCH(1); /* Indirect object base address */
1096 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1097 1); /* Instruction base address */
1098 OUT_BATCH(0xfffff001); /* General state upper bound */
1099 OUT_BATCH(1); /* Indirect object upper bound */
1100 OUT_BATCH(1); /* Instruction access upper bound */
1101 ADVANCE_BATCH();
1102 } else {
1103 BEGIN_BATCH(6);
1104 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
1105 OUT_BATCH(1); /* General state base address */
1106 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1107 1); /* Surface state base address */
1108 OUT_BATCH(1); /* Indirect object base address */
1109 OUT_BATCH(1); /* General state upper bound */
1110 OUT_BATCH(1); /* Indirect object upper bound */
1111 ADVANCE_BATCH();
1112 }
1113
1114 /* According to section 3.6.1 of VOL1 of the 965 PRM,
1115 * STATE_BASE_ADDRESS updates require a reissue of:
1116 *
1117 * 3DSTATE_PIPELINE_POINTERS
1118 * 3DSTATE_BINDING_TABLE_POINTERS
1119 * MEDIA_STATE_POINTERS
1120 *
1121 * and this continues through Ironlake. The Sandy Bridge PRM, vol
1122 * 1 part 1 says that the folowing packets must be reissued:
1123 *
1124 * 3DSTATE_CC_POINTERS
1125 * 3DSTATE_BINDING_TABLE_POINTERS
1126 * 3DSTATE_SAMPLER_STATE_POINTERS
1127 * 3DSTATE_VIEWPORT_STATE_POINTERS
1128 * MEDIA_STATE_POINTERS
1129 *
1130 * Those are always reissued following SBA updates anyway (new
1131 * batch time), except in the case of the program cache BO
1132 * changing. Having a separate state flag makes the sequence more
1133 * obvious.
1134 */
1135
1136 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
1137 }
1138
1139 const struct brw_tracked_state brw_state_base_address = {
1140 .dirty = {
1141 .mesa = 0,
1142 .brw = (BRW_NEW_BATCH |
1143 BRW_NEW_PROGRAM_CACHE),
1144 .cache = 0,
1145 },
1146 .emit = upload_state_base_address
1147 };