i965: Change signature of brw_get_depthstencil_tile_masks()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 #include "main/fbobject.h"
44 #include "main/glformats.h"
45
46 /* Constant single cliprect for framebuffer object or DRI2 drawing */
47 static void upload_drawing_rect(struct brw_context *brw)
48 {
49 struct intel_context *intel = &brw->intel;
50 struct gl_context *ctx = &intel->ctx;
51
52 BEGIN_BATCH(4);
53 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
54 OUT_BATCH(0); /* xmin, ymin */
55 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
56 ((ctx->DrawBuffer->Height - 1) << 16));
57 OUT_BATCH(0);
58 ADVANCE_BATCH();
59 }
60
61 const struct brw_tracked_state brw_drawing_rect = {
62 .dirty = {
63 .mesa = _NEW_BUFFERS,
64 .brw = BRW_NEW_CONTEXT,
65 .cache = 0
66 },
67 .emit = upload_drawing_rect
68 };
69
70 /**
71 * Upload the binding table pointers, which point each stage's array of surface
72 * state pointers.
73 *
74 * The binding table pointers are relative to the surface state base address,
75 * which points at the batchbuffer containing the streamed batch state.
76 */
77 static void upload_binding_table_pointers(struct brw_context *brw)
78 {
79 struct intel_context *intel = &brw->intel;
80
81 BEGIN_BATCH(6);
82 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
83 OUT_BATCH(brw->vs.bind_bo_offset);
84 OUT_BATCH(0); /* gs */
85 OUT_BATCH(0); /* clip */
86 OUT_BATCH(0); /* sf */
87 OUT_BATCH(brw->wm.bind_bo_offset);
88 ADVANCE_BATCH();
89 }
90
91 const struct brw_tracked_state brw_binding_table_pointers = {
92 .dirty = {
93 .mesa = 0,
94 .brw = (BRW_NEW_BATCH |
95 BRW_NEW_STATE_BASE_ADDRESS |
96 BRW_NEW_VS_BINDING_TABLE |
97 BRW_NEW_GS_BINDING_TABLE |
98 BRW_NEW_PS_BINDING_TABLE),
99 .cache = 0,
100 },
101 .emit = upload_binding_table_pointers,
102 };
103
104 /**
105 * Upload the binding table pointers, which point each stage's array of surface
106 * state pointers.
107 *
108 * The binding table pointers are relative to the surface state base address,
109 * which points at the batchbuffer containing the streamed batch state.
110 */
111 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
112 {
113 struct intel_context *intel = &brw->intel;
114
115 BEGIN_BATCH(4);
116 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
117 GEN6_BINDING_TABLE_MODIFY_VS |
118 GEN6_BINDING_TABLE_MODIFY_GS |
119 GEN6_BINDING_TABLE_MODIFY_PS |
120 (4 - 2));
121 OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
122 OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
123 OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
124 ADVANCE_BATCH();
125 }
126
127 const struct brw_tracked_state gen6_binding_table_pointers = {
128 .dirty = {
129 .mesa = 0,
130 .brw = (BRW_NEW_BATCH |
131 BRW_NEW_STATE_BASE_ADDRESS |
132 BRW_NEW_VS_BINDING_TABLE |
133 BRW_NEW_GS_BINDING_TABLE |
134 BRW_NEW_PS_BINDING_TABLE),
135 .cache = 0,
136 },
137 .emit = upload_gen6_binding_table_pointers,
138 };
139
140 /**
141 * Upload pointers to the per-stage state.
142 *
143 * The state pointers in this packet are all relative to the general state
144 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
145 */
146 static void upload_pipelined_state_pointers(struct brw_context *brw )
147 {
148 struct intel_context *intel = &brw->intel;
149
150 if (intel->gen == 5) {
151 /* Need to flush before changing clip max threads for errata. */
152 BEGIN_BATCH(1);
153 OUT_BATCH(MI_FLUSH);
154 ADVANCE_BATCH();
155 }
156
157 BEGIN_BATCH(7);
158 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
159 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160 brw->vs.state_offset);
161 if (brw->gs.prog_active)
162 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
163 brw->gs.state_offset | 1);
164 else
165 OUT_BATCH(0);
166 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
167 brw->clip.state_offset | 1);
168 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
169 brw->sf.state_offset);
170 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
171 brw->wm.state_offset);
172 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
173 brw->cc.state_offset);
174 ADVANCE_BATCH();
175
176 brw->state.dirty.brw |= BRW_NEW_PSP;
177 }
178
179 static void upload_psp_urb_cbs(struct brw_context *brw )
180 {
181 upload_pipelined_state_pointers(brw);
182 brw_upload_urb_fence(brw);
183 brw_upload_cs_urb_state(brw);
184 }
185
186 const struct brw_tracked_state brw_psp_urb_cbs = {
187 .dirty = {
188 .mesa = 0,
189 .brw = (BRW_NEW_URB_FENCE |
190 BRW_NEW_BATCH |
191 BRW_NEW_STATE_BASE_ADDRESS),
192 .cache = (CACHE_NEW_VS_UNIT |
193 CACHE_NEW_GS_UNIT |
194 CACHE_NEW_GS_PROG |
195 CACHE_NEW_CLIP_UNIT |
196 CACHE_NEW_SF_UNIT |
197 CACHE_NEW_WM_UNIT |
198 CACHE_NEW_CC_UNIT)
199 },
200 .emit = upload_psp_urb_cbs,
201 };
202
203 uint32_t
204 brw_depthbuffer_format(struct brw_context *brw)
205 {
206 struct intel_context *intel = &brw->intel;
207 struct gl_context *ctx = &intel->ctx;
208 struct gl_framebuffer *fb = ctx->DrawBuffer;
209 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
210 struct intel_renderbuffer *srb;
211
212 if (!drb &&
213 (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
214 !srb->mt->stencil_mt &&
215 (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
216 intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
217 drb = srb;
218 }
219
220 if (!drb)
221 return BRW_DEPTHFORMAT_D32_FLOAT;
222
223 switch (drb->mt->format) {
224 case MESA_FORMAT_Z16:
225 return BRW_DEPTHFORMAT_D16_UNORM;
226 case MESA_FORMAT_Z32_FLOAT:
227 return BRW_DEPTHFORMAT_D32_FLOAT;
228 case MESA_FORMAT_X8_Z24:
229 if (intel->gen >= 6) {
230 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
231 } else {
232 /* Use D24_UNORM_S8, not D24_UNORM_X8.
233 *
234 * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
235 * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
236 * 3DSTATE_DEPTH_BUFFER.Surface_Format).
237 *
238 * However, on Gen5, D24_UNORM_X8 may be used only if separate
239 * stencil is enabled, and we never enable it. From the Ironlake PRM,
240 * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
241 * If this field is disabled, the Surface Format of the depth
242 * buffer cannot be D24_UNORM_X8_UINT.
243 */
244 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
245 }
246 case MESA_FORMAT_S8_Z24:
247 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
248 case MESA_FORMAT_Z32_FLOAT_X24S8:
249 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
250 default:
251 _mesa_problem(ctx, "Unexpected depth format %s\n",
252 _mesa_get_format_name(intel_rb_format(drb)));
253 return BRW_DEPTHFORMAT_D16_UNORM;
254 }
255 }
256
257 /**
258 * Returns the mask of how many bits of x and y must be handled through the
259 * depthbuffer's draw offset x and y fields.
260 *
261 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
262 * between the depth, hiz, and stencil buffers. Because it can be hard to get
263 * all 3 to agree on this value, we want to do as much drawing offset
264 * adjustment as possible by moving the base offset of the 3 buffers, which is
265 * restricted to tile boundaries.
266 *
267 * For each buffer, the remainder must be applied through the x/y draw offset.
268 * This returns the worst-case mask of the low bits that have to go into the
269 * packet. If the 3 buffers don't agree on the drawing offset ANDed with this
270 * mask, then we're in trouble.
271 */
272 void
273 brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
274 uint32_t depth_level,
275 uint32_t depth_layer,
276 struct intel_mipmap_tree *stencil_mt,
277 uint32_t *out_tile_mask_x,
278 uint32_t *out_tile_mask_y)
279 {
280 uint32_t tile_mask_x = 0, tile_mask_y = 0;
281
282 if (depth_mt) {
283 intel_region_get_tile_masks(depth_mt->region,
284 &tile_mask_x, &tile_mask_y, false);
285
286 struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
287 if (hiz_mt) {
288 uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
289 intel_region_get_tile_masks(hiz_mt->region,
290 &hiz_tile_mask_x, &hiz_tile_mask_y, false);
291
292 /* Each HiZ row represents 2 rows of pixels */
293 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
294
295 tile_mask_x |= hiz_tile_mask_x;
296 tile_mask_y |= hiz_tile_mask_y;
297 }
298 }
299
300 if (stencil_mt) {
301 if (stencil_mt->stencil_mt)
302 stencil_mt = stencil_mt->stencil_mt;
303
304 if (stencil_mt->format == MESA_FORMAT_S8) {
305 /* Separate stencil buffer uses 64x64 tiles. */
306 tile_mask_x |= 63;
307 tile_mask_y |= 63;
308 } else {
309 uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
310 intel_region_get_tile_masks(stencil_mt->region,
311 &stencil_tile_mask_x,
312 &stencil_tile_mask_y, false);
313
314 tile_mask_x |= stencil_tile_mask_x;
315 tile_mask_y |= stencil_tile_mask_y;
316 }
317 }
318
319 *out_tile_mask_x = tile_mask_x;
320 *out_tile_mask_y = tile_mask_y;
321 }
322
323 static struct intel_mipmap_tree *
324 get_stencil_miptree(struct intel_renderbuffer *irb)
325 {
326 if (!irb)
327 return NULL;
328 if (irb->mt->stencil_mt)
329 return irb->mt->stencil_mt;
330 return irb->mt;
331 }
332
333 void
334 brw_workaround_depthstencil_alignment(struct brw_context *brw,
335 GLbitfield clear_mask)
336 {
337 struct intel_context *intel = &brw->intel;
338 struct gl_context *ctx = &intel->ctx;
339 struct gl_framebuffer *fb = ctx->DrawBuffer;
340 bool rebase_depth = false;
341 bool rebase_stencil = false;
342 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
343 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
344 struct intel_mipmap_tree *depth_mt = NULL;
345 struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
346 uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
347 uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
348 bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
349 bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
350
351 if (depth_irb)
352 depth_mt = depth_irb->mt;
353
354 /* Check if depth buffer is in depth/stencil format. If so, then it's only
355 * safe to invalidate it if we're also clearing stencil, and both depth_irb
356 * and stencil_irb point to the same miptree.
357 *
358 * Note: it's not sufficient to check for the case where
359 * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
360 * because this fails to catch depth/stencil buffers on hardware that uses
361 * separate stencil. To catch that case, we check whether
362 * depth_mt->stencil_mt is non-NULL.
363 */
364 if (depth_irb && invalidate_depth &&
365 (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
366 depth_mt->stencil_mt)) {
367 invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
368 && depth_irb->mt == stencil_irb->mt;
369 }
370
371 uint32_t tile_mask_x, tile_mask_y;
372 brw_get_depthstencil_tile_masks(depth_mt,
373 depth_mt ? depth_irb->mt_level : 0,
374 depth_mt ? depth_irb->mt_layer : 0,
375 stencil_mt,
376 &tile_mask_x, &tile_mask_y);
377
378 if (depth_irb) {
379 tile_x = depth_irb->draw_x & tile_mask_x;
380 tile_y = depth_irb->draw_y & tile_mask_y;
381
382 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
383 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
384 * Coordinate Offset X/Y":
385 *
386 * "The 3 LSBs of both offsets must be zero to ensure correct
387 * alignment"
388 */
389 if (tile_x & 7 || tile_y & 7)
390 rebase_depth = true;
391
392 /* We didn't even have intra-tile offsets before g45. */
393 if (intel->gen == 4 && !intel->is_g4x) {
394 if (tile_x || tile_y)
395 rebase_depth = true;
396 }
397
398 if (rebase_depth) {
399 perf_debug("HW workaround: blitting depth level %d to a temporary "
400 "to fix alignment (depth tile offset %d,%d)\n",
401 depth_irb->mt_level, tile_x, tile_y);
402 intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth);
403 /* In the case of stencil_irb being the same packed depth/stencil
404 * texture but not the same rb, make it point at our rebased mt, too.
405 */
406 if (stencil_irb &&
407 stencil_irb != depth_irb &&
408 stencil_irb->mt == depth_mt) {
409 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
410 intel_renderbuffer_set_draw_offset(stencil_irb);
411 }
412
413 stencil_mt = get_stencil_miptree(stencil_irb);
414
415 tile_x = depth_irb->draw_x & tile_mask_x;
416 tile_y = depth_irb->draw_y & tile_mask_y;
417 }
418
419 if (stencil_irb) {
420 stencil_mt = get_stencil_miptree(stencil_irb);
421 intel_miptree_get_image_offset(stencil_mt,
422 stencil_irb->mt_level,
423 stencil_irb->mt_layer,
424 &stencil_draw_x, &stencil_draw_y);
425 int stencil_tile_x = stencil_draw_x & tile_mask_x;
426 int stencil_tile_y = stencil_draw_y & tile_mask_y;
427
428 /* If stencil doesn't match depth, then we'll need to rebase stencil
429 * as well. (if we hadn't decided to rebase stencil before, the
430 * post-stencil depth test will also rebase depth to try to match it
431 * up).
432 */
433 if (tile_x != stencil_tile_x ||
434 tile_y != stencil_tile_y) {
435 rebase_stencil = true;
436 }
437 }
438 }
439
440 /* If we have (just) stencil, check it for ignored low bits as well */
441 if (stencil_irb) {
442 intel_miptree_get_image_offset(stencil_mt,
443 stencil_irb->mt_level,
444 stencil_irb->mt_layer,
445 &stencil_draw_x, &stencil_draw_y);
446 stencil_tile_x = stencil_draw_x & tile_mask_x;
447 stencil_tile_y = stencil_draw_y & tile_mask_y;
448
449 if (stencil_tile_x & 7 || stencil_tile_y & 7)
450 rebase_stencil = true;
451
452 if (intel->gen == 4 && !intel->is_g4x) {
453 if (stencil_tile_x || stencil_tile_y)
454 rebase_stencil = true;
455 }
456 }
457
458 if (rebase_stencil) {
459 perf_debug("HW workaround: blitting stencil level %d to a temporary "
460 "to fix alignment (stencil tile offset %d,%d)\n",
461 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
462
463 intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil);
464 stencil_mt = get_stencil_miptree(stencil_irb);
465
466 intel_miptree_get_image_offset(stencil_mt,
467 stencil_irb->mt_level,
468 stencil_irb->mt_layer,
469 &stencil_draw_x, &stencil_draw_y);
470 stencil_tile_x = stencil_draw_x & tile_mask_x;
471 stencil_tile_y = stencil_draw_y & tile_mask_y;
472
473 if (depth_irb && depth_irb->mt == stencil_irb->mt) {
474 intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
475 intel_renderbuffer_set_draw_offset(depth_irb);
476 } else if (depth_irb && !rebase_depth) {
477 if (tile_x != stencil_tile_x ||
478 tile_y != stencil_tile_y) {
479 perf_debug("HW workaround: blitting depth level %d to a temporary "
480 "to match stencil level %d alignment (depth tile offset "
481 "%d,%d, stencil offset %d,%d)\n",
482 depth_irb->mt_level,
483 stencil_irb->mt_level,
484 tile_x, tile_y,
485 stencil_tile_x, stencil_tile_y);
486
487 intel_renderbuffer_move_to_temp(intel, depth_irb,
488 invalidate_depth);
489
490 tile_x = depth_irb->draw_x & tile_mask_x;
491 tile_y = depth_irb->draw_y & tile_mask_y;
492
493 if (stencil_irb && stencil_irb->mt == depth_mt) {
494 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
495 intel_renderbuffer_set_draw_offset(stencil_irb);
496 }
497
498 WARN_ONCE(stencil_tile_x != tile_x ||
499 stencil_tile_y != tile_y,
500 "Rebased stencil tile offset (%d,%d) doesn't match depth "
501 "tile offset (%d,%d).\n",
502 stencil_tile_x, stencil_tile_y,
503 tile_x, tile_y);
504 }
505 }
506 }
507
508 if (!depth_irb) {
509 tile_x = stencil_tile_x;
510 tile_y = stencil_tile_y;
511 }
512
513 /* While we just tried to get everything aligned, we may have failed to do
514 * so in the case of rendering to array or 3D textures, where nonzero faces
515 * will still have an offset post-rebase. At least give an informative
516 * warning.
517 */
518 WARN_ONCE((tile_x & 7) || (tile_y & 7),
519 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
520 "Truncating offset, bad rendering may occur.\n");
521 tile_x &= ~7;
522 tile_y &= ~7;
523
524 /* Now, after rebasing, save off the new dephtstencil state so the hardware
525 * packets can just dereference that without re-calculating tile offsets.
526 */
527 brw->depthstencil.tile_x = tile_x;
528 brw->depthstencil.tile_y = tile_y;
529 brw->depthstencil.depth_offset = 0;
530 brw->depthstencil.stencil_offset = 0;
531 brw->depthstencil.hiz_offset = 0;
532 brw->depthstencil.depth_mt = NULL;
533 brw->depthstencil.stencil_mt = NULL;
534 brw->depthstencil.hiz_mt = NULL;
535 if (depth_irb) {
536 depth_mt = depth_irb->mt;
537 brw->depthstencil.depth_mt = depth_mt;
538 brw->depthstencil.depth_offset =
539 intel_region_get_aligned_offset(depth_mt->region,
540 depth_irb->draw_x & ~tile_mask_x,
541 depth_irb->draw_y & ~tile_mask_y,
542 false);
543 if (depth_mt->hiz_mt) {
544 brw->depthstencil.hiz_mt = depth_mt->hiz_mt;
545 brw->depthstencil.hiz_offset =
546 intel_region_get_aligned_offset(depth_mt->region,
547 depth_irb->draw_x & ~tile_mask_x,
548 (depth_irb->draw_y & ~tile_mask_y) /
549 2,
550 false);
551 }
552 }
553 if (stencil_irb) {
554 stencil_mt = get_stencil_miptree(stencil_irb);
555
556 brw->depthstencil.stencil_mt = stencil_mt;
557 if (stencil_mt->format == MESA_FORMAT_S8) {
558 /* Note: we can't compute the stencil offset using
559 * intel_region_get_aligned_offset(), because stencil_region claims
560 * that the region is untiled even though it's W tiled.
561 */
562 brw->depthstencil.stencil_offset =
563 (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
564 (stencil_draw_x & ~tile_mask_x) * 64;
565 }
566 }
567 }
568
569 void
570 brw_emit_depthbuffer(struct brw_context *brw)
571 {
572 struct intel_context *intel = &brw->intel;
573 struct gl_context *ctx = &intel->ctx;
574 struct gl_framebuffer *fb = ctx->DrawBuffer;
575 /* _NEW_BUFFERS */
576 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
577 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
578 struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
579 struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
580 struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
581 uint32_t tile_x = brw->depthstencil.tile_x;
582 uint32_t tile_y = brw->depthstencil.tile_y;
583 bool separate_stencil = false;
584 uint32_t depth_surface_type = BRW_SURFACE_NULL;
585 uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
586 uint32_t depth_offset = 0;
587 uint32_t width = 1, height = 1;
588
589 if (stencil_mt) {
590 separate_stencil = stencil_mt->format == MESA_FORMAT_S8;
591
592 /* Gen7 supports only separate stencil */
593 assert(separate_stencil || intel->gen < 7);
594 }
595
596 /* If there's a packed depth/stencil bound to stencil only, we need to
597 * emit the packed depth/stencil buffer packet.
598 */
599 if (!depth_irb && stencil_irb && !separate_stencil) {
600 depth_irb = stencil_irb;
601 depth_mt = stencil_mt;
602 }
603
604 if (depth_irb) {
605 struct intel_region *region = depth_mt->region;
606
607 /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
608 * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
609 * depthstencil format.
610 *
611 * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
612 * set to the same value. Gens after 7 implicitly always set
613 * Separate_Stencil_Enable; software cannot disable it.
614 */
615 if ((intel->gen < 7 && depth_mt->hiz_mt) || intel->gen >= 7) {
616 assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
617 }
618
619 /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
620 assert(intel->gen >= 7 || !separate_stencil || hiz_mt);
621
622 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
623 assert(!hiz_mt || region->tiling == I915_TILING_Y);
624
625 depthbuffer_format = brw_depthbuffer_format(brw);
626 depth_surface_type = BRW_SURFACE_2D;
627 depth_offset = brw->depthstencil.depth_offset;
628 width = depth_irb->Base.Base.Width;
629 height = depth_irb->Base.Base.Height;
630 } else if (separate_stencil) {
631 /*
632 * There exists a separate stencil buffer but no depth buffer.
633 *
634 * The stencil buffer inherits most of its fields from
635 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
636 * height.
637 *
638 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
639 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
640 * [DevGT+]: This field must be set to TRUE.
641 */
642 assert(intel->has_separate_stencil);
643
644 depth_surface_type = BRW_SURFACE_2D;
645 width = stencil_irb->Base.Base.Width;
646 height = stencil_irb->Base.Base.Height;
647 }
648
649 intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
650 depthbuffer_format, depth_surface_type,
651 stencil_mt, hiz_mt, separate_stencil,
652 width, height, tile_x, tile_y);
653 }
654
655 void
656 brw_emit_depth_stencil_hiz(struct brw_context *brw,
657 struct intel_mipmap_tree *depth_mt,
658 uint32_t depth_offset, uint32_t depthbuffer_format,
659 uint32_t depth_surface_type,
660 struct intel_mipmap_tree *stencil_mt,
661 struct intel_mipmap_tree *hiz_mt,
662 bool separate_stencil, uint32_t width,
663 uint32_t height, uint32_t tile_x, uint32_t tile_y)
664 {
665 struct intel_context *intel = &brw->intel;
666
667 /* Enable the hiz bit if we're doing separate stencil, because it and the
668 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
669 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
670 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
671 * Enable must also be enabled.
672 *
673 * [DevGT]: This field must be set to the same value (enabled or
674 * disabled) as Hierarchical Depth Buffer Enable
675 */
676 bool enable_hiz_ss = hiz_mt || separate_stencil;
677
678
679 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
680 * non-pipelined state that will need the PIPE_CONTROL workaround.
681 */
682 if (intel->gen == 6) {
683 intel_emit_post_sync_nonzero_flush(intel);
684 intel_emit_depth_stall_flushes(intel);
685 }
686
687 unsigned int len;
688 if (intel->gen >= 6)
689 len = 7;
690 else if (intel->is_g4x || intel->gen == 5)
691 len = 6;
692 else
693 len = 5;
694
695 BEGIN_BATCH(len);
696 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
697 OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
698 (depthbuffer_format << 18) |
699 ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
700 ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
701 (BRW_TILEWALK_YMAJOR << 26) |
702 ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
703 << 27) |
704 (depth_surface_type << 29));
705
706 if (depth_mt) {
707 OUT_RELOC(depth_mt->region->bo,
708 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
709 depth_offset);
710 } else {
711 OUT_BATCH(0);
712 }
713
714 OUT_BATCH(((width + tile_x - 1) << 6) |
715 ((height + tile_y - 1) << 19));
716 OUT_BATCH(0);
717
718 if (intel->is_g4x || intel->gen >= 5)
719 OUT_BATCH(tile_x | (tile_y << 16));
720 else
721 assert(tile_x == 0 && tile_y == 0);
722
723 if (intel->gen >= 6)
724 OUT_BATCH(0);
725
726 ADVANCE_BATCH();
727
728 if (hiz_mt || separate_stencil) {
729 /*
730 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
731 * stencil enable' and 'hiz enable' bits were set. Therefore we must
732 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
733 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
734 * failure to do so causes hangs on gen5 and a stall on gen6.
735 */
736
737 /* Emit hiz buffer. */
738 if (hiz_mt) {
739 BEGIN_BATCH(3);
740 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
741 OUT_BATCH(hiz_mt->region->pitch - 1);
742 OUT_RELOC(hiz_mt->region->bo,
743 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
744 brw->depthstencil.hiz_offset);
745 ADVANCE_BATCH();
746 } else {
747 BEGIN_BATCH(3);
748 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
749 OUT_BATCH(0);
750 OUT_BATCH(0);
751 ADVANCE_BATCH();
752 }
753
754 /* Emit stencil buffer. */
755 if (separate_stencil) {
756 struct intel_region *region = stencil_mt->region;
757
758 BEGIN_BATCH(3);
759 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
760 /* The stencil buffer has quirky pitch requirements. From Vol 2a,
761 * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
762 * The pitch must be set to 2x the value computed based on width, as
763 * the stencil buffer is stored with two rows interleaved.
764 */
765 OUT_BATCH(2 * region->pitch - 1);
766 OUT_RELOC(region->bo,
767 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
768 brw->depthstencil.stencil_offset);
769 ADVANCE_BATCH();
770 } else {
771 BEGIN_BATCH(3);
772 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
773 OUT_BATCH(0);
774 OUT_BATCH(0);
775 ADVANCE_BATCH();
776 }
777 }
778
779 /*
780 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
781 * params must be emitted.
782 *
783 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
784 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
785 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
786 */
787 if (intel->gen >= 6 || hiz_mt) {
788 if (intel->gen == 6)
789 intel_emit_post_sync_nonzero_flush(intel);
790
791 BEGIN_BATCH(2);
792 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
793 GEN5_DEPTH_CLEAR_VALID |
794 (2 - 2));
795 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
796 ADVANCE_BATCH();
797 }
798 }
799
800 const struct brw_tracked_state brw_depthbuffer = {
801 .dirty = {
802 .mesa = _NEW_BUFFERS,
803 .brw = BRW_NEW_BATCH,
804 .cache = 0,
805 },
806 .emit = brw_emit_depthbuffer,
807 };
808
809
810
811 /***********************************************************************
812 * Polygon stipple packet
813 */
814
815 static void upload_polygon_stipple(struct brw_context *brw)
816 {
817 struct intel_context *intel = &brw->intel;
818 struct gl_context *ctx = &brw->intel.ctx;
819 GLuint i;
820
821 /* _NEW_POLYGON */
822 if (!ctx->Polygon.StippleFlag)
823 return;
824
825 if (intel->gen == 6)
826 intel_emit_post_sync_nonzero_flush(intel);
827
828 BEGIN_BATCH(33);
829 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
830
831 /* Polygon stipple is provided in OpenGL order, i.e. bottom
832 * row first. If we're rendering to a window (i.e. the
833 * default frame buffer object, 0), then we need to invert
834 * it to match our pixel layout. But if we're rendering
835 * to a FBO (i.e. any named frame buffer object), we *don't*
836 * need to invert - we already match the layout.
837 */
838 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
839 for (i = 0; i < 32; i++)
840 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
841 }
842 else {
843 for (i = 0; i < 32; i++)
844 OUT_BATCH(ctx->PolygonStipple[i]);
845 }
846 CACHED_BATCH();
847 }
848
849 const struct brw_tracked_state brw_polygon_stipple = {
850 .dirty = {
851 .mesa = (_NEW_POLYGONSTIPPLE |
852 _NEW_POLYGON),
853 .brw = BRW_NEW_CONTEXT,
854 .cache = 0
855 },
856 .emit = upload_polygon_stipple
857 };
858
859
860 /***********************************************************************
861 * Polygon stipple offset packet
862 */
863
864 static void upload_polygon_stipple_offset(struct brw_context *brw)
865 {
866 struct intel_context *intel = &brw->intel;
867 struct gl_context *ctx = &brw->intel.ctx;
868
869 /* _NEW_POLYGON */
870 if (!ctx->Polygon.StippleFlag)
871 return;
872
873 if (intel->gen == 6)
874 intel_emit_post_sync_nonzero_flush(intel);
875
876 BEGIN_BATCH(2);
877 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
878
879 /* _NEW_BUFFERS
880 *
881 * If we're drawing to a system window we have to invert the Y axis
882 * in order to match the OpenGL pixel coordinate system, and our
883 * offset must be matched to the window position. If we're drawing
884 * to a user-created FBO then our native pixel coordinate system
885 * works just fine, and there's no window system to worry about.
886 */
887 if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
888 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
889 else
890 OUT_BATCH(0);
891 CACHED_BATCH();
892 }
893
894 const struct brw_tracked_state brw_polygon_stipple_offset = {
895 .dirty = {
896 .mesa = (_NEW_BUFFERS |
897 _NEW_POLYGON),
898 .brw = BRW_NEW_CONTEXT,
899 .cache = 0
900 },
901 .emit = upload_polygon_stipple_offset
902 };
903
904 /**********************************************************************
905 * AA Line parameters
906 */
907 static void upload_aa_line_parameters(struct brw_context *brw)
908 {
909 struct intel_context *intel = &brw->intel;
910 struct gl_context *ctx = &brw->intel.ctx;
911
912 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
913 return;
914
915 if (intel->gen == 6)
916 intel_emit_post_sync_nonzero_flush(intel);
917
918 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
919 /* use legacy aa line coverage computation */
920 OUT_BATCH(0);
921 OUT_BATCH(0);
922 CACHED_BATCH();
923 }
924
925 const struct brw_tracked_state brw_aa_line_parameters = {
926 .dirty = {
927 .mesa = _NEW_LINE,
928 .brw = BRW_NEW_CONTEXT,
929 .cache = 0
930 },
931 .emit = upload_aa_line_parameters
932 };
933
934 /***********************************************************************
935 * Line stipple packet
936 */
937
938 static void upload_line_stipple(struct brw_context *brw)
939 {
940 struct intel_context *intel = &brw->intel;
941 struct gl_context *ctx = &brw->intel.ctx;
942 GLfloat tmp;
943 GLint tmpi;
944
945 if (!ctx->Line.StippleFlag)
946 return;
947
948 if (intel->gen == 6)
949 intel_emit_post_sync_nonzero_flush(intel);
950
951 BEGIN_BATCH(3);
952 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
953 OUT_BATCH(ctx->Line.StipplePattern);
954 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
955 tmpi = tmp * (1<<13);
956 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
957 CACHED_BATCH();
958 }
959
960 const struct brw_tracked_state brw_line_stipple = {
961 .dirty = {
962 .mesa = _NEW_LINE,
963 .brw = BRW_NEW_CONTEXT,
964 .cache = 0
965 },
966 .emit = upload_line_stipple
967 };
968
969
970 /***********************************************************************
971 * Misc invariant state packets
972 */
973
974 static void upload_invariant_state( struct brw_context *brw )
975 {
976 struct intel_context *intel = &brw->intel;
977
978 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
979 if (intel->gen == 6)
980 intel_emit_post_sync_nonzero_flush(intel);
981
982 /* Select the 3D pipeline (as opposed to media) */
983 BEGIN_BATCH(1);
984 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
985 ADVANCE_BATCH();
986
987 if (intel->gen < 6) {
988 /* Disable depth offset clamping. */
989 BEGIN_BATCH(2);
990 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
991 OUT_BATCH_F(0.0);
992 ADVANCE_BATCH();
993 }
994
995 if (intel->gen == 6) {
996 int i;
997
998 for (i = 0; i < 4; i++) {
999 BEGIN_BATCH(4);
1000 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
1001 OUT_BATCH(i << SVB_INDEX_SHIFT);
1002 OUT_BATCH(0);
1003 OUT_BATCH(0xffffffff);
1004 ADVANCE_BATCH();
1005 }
1006 }
1007
1008 BEGIN_BATCH(2);
1009 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
1010 OUT_BATCH(0);
1011 ADVANCE_BATCH();
1012
1013 BEGIN_BATCH(1);
1014 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
1015 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
1016 ADVANCE_BATCH();
1017 }
1018
1019 const struct brw_tracked_state brw_invariant_state = {
1020 .dirty = {
1021 .mesa = 0,
1022 .brw = BRW_NEW_CONTEXT,
1023 .cache = 0
1024 },
1025 .emit = upload_invariant_state
1026 };
1027
1028 /**
1029 * Define the base addresses which some state is referenced from.
1030 *
1031 * This allows us to avoid having to emit relocations for the objects,
1032 * and is actually required for binding table pointers on gen6.
1033 *
1034 * Surface state base address covers binding table pointers and
1035 * surface state objects, but not the surfaces that the surface state
1036 * objects point to.
1037 */
1038 static void upload_state_base_address( struct brw_context *brw )
1039 {
1040 struct intel_context *intel = &brw->intel;
1041
1042 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1043 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1044 * programmed prior to STATE_BASE_ADDRESS.
1045 *
1046 * However, given that the instruction SBA (general state base
1047 * address) on this chipset is always set to 0 across X and GL,
1048 * maybe this isn't required for us in particular.
1049 */
1050
1051 if (intel->gen >= 6) {
1052 if (intel->gen == 6)
1053 intel_emit_post_sync_nonzero_flush(intel);
1054
1055 BEGIN_BATCH(10);
1056 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
1057 /* General state base address: stateless DP read/write requests */
1058 OUT_BATCH(1);
1059 /* Surface state base address:
1060 * BINDING_TABLE_STATE
1061 * SURFACE_STATE
1062 */
1063 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
1064 /* Dynamic state base address:
1065 * SAMPLER_STATE
1066 * SAMPLER_BORDER_COLOR_STATE
1067 * CLIP, SF, WM/CC viewport state
1068 * COLOR_CALC_STATE
1069 * DEPTH_STENCIL_STATE
1070 * BLEND_STATE
1071 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1072 * Disable is clear, which we rely on)
1073 */
1074 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
1075 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
1076
1077 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1078 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1079 1); /* Instruction base address: shader kernels (incl. SIP) */
1080
1081 OUT_BATCH(1); /* General state upper bound */
1082 /* Dynamic state upper bound. Although the documentation says that
1083 * programming it to zero will cause it to be ignored, that is a lie.
1084 * If this isn't programmed to a real bound, the sampler border color
1085 * pointer is rejected, causing border color to mysteriously fail.
1086 */
1087 OUT_BATCH(0xfffff001);
1088 OUT_BATCH(1); /* Indirect object upper bound */
1089 OUT_BATCH(1); /* Instruction access upper bound */
1090 ADVANCE_BATCH();
1091 } else if (intel->gen == 5) {
1092 BEGIN_BATCH(8);
1093 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
1094 OUT_BATCH(1); /* General state base address */
1095 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1096 1); /* Surface state base address */
1097 OUT_BATCH(1); /* Indirect object base address */
1098 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1099 1); /* Instruction base address */
1100 OUT_BATCH(0xfffff001); /* General state upper bound */
1101 OUT_BATCH(1); /* Indirect object upper bound */
1102 OUT_BATCH(1); /* Instruction access upper bound */
1103 ADVANCE_BATCH();
1104 } else {
1105 BEGIN_BATCH(6);
1106 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
1107 OUT_BATCH(1); /* General state base address */
1108 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1109 1); /* Surface state base address */
1110 OUT_BATCH(1); /* Indirect object base address */
1111 OUT_BATCH(1); /* General state upper bound */
1112 OUT_BATCH(1); /* Indirect object upper bound */
1113 ADVANCE_BATCH();
1114 }
1115
1116 /* According to section 3.6.1 of VOL1 of the 965 PRM,
1117 * STATE_BASE_ADDRESS updates require a reissue of:
1118 *
1119 * 3DSTATE_PIPELINE_POINTERS
1120 * 3DSTATE_BINDING_TABLE_POINTERS
1121 * MEDIA_STATE_POINTERS
1122 *
1123 * and this continues through Ironlake. The Sandy Bridge PRM, vol
1124 * 1 part 1 says that the folowing packets must be reissued:
1125 *
1126 * 3DSTATE_CC_POINTERS
1127 * 3DSTATE_BINDING_TABLE_POINTERS
1128 * 3DSTATE_SAMPLER_STATE_POINTERS
1129 * 3DSTATE_VIEWPORT_STATE_POINTERS
1130 * MEDIA_STATE_POINTERS
1131 *
1132 * Those are always reissued following SBA updates anyway (new
1133 * batch time), except in the case of the program cache BO
1134 * changing. Having a separate state flag makes the sequence more
1135 * obvious.
1136 */
1137
1138 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
1139 }
1140
1141 const struct brw_tracked_state brw_state_base_address = {
1142 .dirty = {
1143 .mesa = 0,
1144 .brw = (BRW_NEW_BATCH |
1145 BRW_NEW_PROGRAM_CACHE),
1146 .cache = 0,
1147 },
1148 .emit = upload_state_base_address
1149 };