i965: Shorten context base class dereference chains.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 #include "main/fbobject.h"
44 #include "main/glformats.h"
45
46 /* Constant single cliprect for framebuffer object or DRI2 drawing */
47 static void upload_drawing_rect(struct brw_context *brw)
48 {
49 struct intel_context *intel = &brw->intel;
50 struct gl_context *ctx = &intel->ctx;
51
52 BEGIN_BATCH(4);
53 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
54 OUT_BATCH(0); /* xmin, ymin */
55 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
56 ((ctx->DrawBuffer->Height - 1) << 16));
57 OUT_BATCH(0);
58 ADVANCE_BATCH();
59 }
60
61 const struct brw_tracked_state brw_drawing_rect = {
62 .dirty = {
63 .mesa = _NEW_BUFFERS,
64 .brw = BRW_NEW_CONTEXT,
65 .cache = 0
66 },
67 .emit = upload_drawing_rect
68 };
69
70 /**
71 * Upload the binding table pointers, which point each stage's array of surface
72 * state pointers.
73 *
74 * The binding table pointers are relative to the surface state base address,
75 * which points at the batchbuffer containing the streamed batch state.
76 */
77 static void upload_binding_table_pointers(struct brw_context *brw)
78 {
79 BEGIN_BATCH(6);
80 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
81 OUT_BATCH(brw->vs.bind_bo_offset);
82 OUT_BATCH(0); /* gs */
83 OUT_BATCH(0); /* clip */
84 OUT_BATCH(0); /* sf */
85 OUT_BATCH(brw->wm.bind_bo_offset);
86 ADVANCE_BATCH();
87 }
88
89 const struct brw_tracked_state brw_binding_table_pointers = {
90 .dirty = {
91 .mesa = 0,
92 .brw = (BRW_NEW_BATCH |
93 BRW_NEW_STATE_BASE_ADDRESS |
94 BRW_NEW_VS_BINDING_TABLE |
95 BRW_NEW_GS_BINDING_TABLE |
96 BRW_NEW_PS_BINDING_TABLE),
97 .cache = 0,
98 },
99 .emit = upload_binding_table_pointers,
100 };
101
102 /**
103 * Upload the binding table pointers, which point each stage's array of surface
104 * state pointers.
105 *
106 * The binding table pointers are relative to the surface state base address,
107 * which points at the batchbuffer containing the streamed batch state.
108 */
109 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
110 {
111 BEGIN_BATCH(4);
112 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
113 GEN6_BINDING_TABLE_MODIFY_VS |
114 GEN6_BINDING_TABLE_MODIFY_GS |
115 GEN6_BINDING_TABLE_MODIFY_PS |
116 (4 - 2));
117 OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
118 OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
119 OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
120 ADVANCE_BATCH();
121 }
122
123 const struct brw_tracked_state gen6_binding_table_pointers = {
124 .dirty = {
125 .mesa = 0,
126 .brw = (BRW_NEW_BATCH |
127 BRW_NEW_STATE_BASE_ADDRESS |
128 BRW_NEW_VS_BINDING_TABLE |
129 BRW_NEW_GS_BINDING_TABLE |
130 BRW_NEW_PS_BINDING_TABLE),
131 .cache = 0,
132 },
133 .emit = upload_gen6_binding_table_pointers,
134 };
135
136 /**
137 * Upload pointers to the per-stage state.
138 *
139 * The state pointers in this packet are all relative to the general state
140 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
141 */
142 static void upload_pipelined_state_pointers(struct brw_context *brw )
143 {
144 struct intel_context *intel = &brw->intel;
145
146 if (intel->gen == 5) {
147 /* Need to flush before changing clip max threads for errata. */
148 BEGIN_BATCH(1);
149 OUT_BATCH(MI_FLUSH);
150 ADVANCE_BATCH();
151 }
152
153 BEGIN_BATCH(7);
154 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
155 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
156 brw->vs.state_offset);
157 if (brw->gs.prog_active)
158 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
159 brw->gs.state_offset | 1);
160 else
161 OUT_BATCH(0);
162 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
163 brw->clip.state_offset | 1);
164 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
165 brw->sf.state_offset);
166 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
167 brw->wm.state_offset);
168 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
169 brw->cc.state_offset);
170 ADVANCE_BATCH();
171
172 brw->state.dirty.brw |= BRW_NEW_PSP;
173 }
174
175 static void upload_psp_urb_cbs(struct brw_context *brw )
176 {
177 upload_pipelined_state_pointers(brw);
178 brw_upload_urb_fence(brw);
179 brw_upload_cs_urb_state(brw);
180 }
181
182 const struct brw_tracked_state brw_psp_urb_cbs = {
183 .dirty = {
184 .mesa = 0,
185 .brw = (BRW_NEW_URB_FENCE |
186 BRW_NEW_BATCH |
187 BRW_NEW_STATE_BASE_ADDRESS),
188 .cache = (CACHE_NEW_VS_UNIT |
189 CACHE_NEW_GS_UNIT |
190 CACHE_NEW_GS_PROG |
191 CACHE_NEW_CLIP_UNIT |
192 CACHE_NEW_SF_UNIT |
193 CACHE_NEW_WM_UNIT |
194 CACHE_NEW_CC_UNIT)
195 },
196 .emit = upload_psp_urb_cbs,
197 };
198
199 uint32_t
200 brw_depthbuffer_format(struct brw_context *brw)
201 {
202 struct intel_context *intel = &brw->intel;
203 struct gl_context *ctx = &intel->ctx;
204 struct gl_framebuffer *fb = ctx->DrawBuffer;
205 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
206 struct intel_renderbuffer *srb;
207
208 if (!drb &&
209 (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
210 !srb->mt->stencil_mt &&
211 (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
212 intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
213 drb = srb;
214 }
215
216 if (!drb)
217 return BRW_DEPTHFORMAT_D32_FLOAT;
218
219 switch (drb->mt->format) {
220 case MESA_FORMAT_Z16:
221 return BRW_DEPTHFORMAT_D16_UNORM;
222 case MESA_FORMAT_Z32_FLOAT:
223 return BRW_DEPTHFORMAT_D32_FLOAT;
224 case MESA_FORMAT_X8_Z24:
225 if (intel->gen >= 6) {
226 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
227 } else {
228 /* Use D24_UNORM_S8, not D24_UNORM_X8.
229 *
230 * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
231 * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
232 * 3DSTATE_DEPTH_BUFFER.Surface_Format).
233 *
234 * However, on Gen5, D24_UNORM_X8 may be used only if separate
235 * stencil is enabled, and we never enable it. From the Ironlake PRM,
236 * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
237 * If this field is disabled, the Surface Format of the depth
238 * buffer cannot be D24_UNORM_X8_UINT.
239 */
240 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
241 }
242 case MESA_FORMAT_S8_Z24:
243 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
244 case MESA_FORMAT_Z32_FLOAT_X24S8:
245 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
246 default:
247 _mesa_problem(ctx, "Unexpected depth format %s\n",
248 _mesa_get_format_name(intel_rb_format(drb)));
249 return BRW_DEPTHFORMAT_D16_UNORM;
250 }
251 }
252
253 /**
254 * Returns the mask of how many bits of x and y must be handled through the
255 * depthbuffer's draw offset x and y fields.
256 *
257 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
258 * between the depth, hiz, and stencil buffers. Because it can be hard to get
259 * all 3 to agree on this value, we want to do as much drawing offset
260 * adjustment as possible by moving the base offset of the 3 buffers, which is
261 * restricted to tile boundaries.
262 *
263 * For each buffer, the remainder must be applied through the x/y draw offset.
264 * This returns the worst-case mask of the low bits that have to go into the
265 * packet. If the 3 buffers don't agree on the drawing offset ANDed with this
266 * mask, then we're in trouble.
267 */
268 void
269 brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
270 uint32_t depth_level,
271 uint32_t depth_layer,
272 struct intel_mipmap_tree *stencil_mt,
273 uint32_t *out_tile_mask_x,
274 uint32_t *out_tile_mask_y)
275 {
276 uint32_t tile_mask_x = 0, tile_mask_y = 0;
277
278 if (depth_mt) {
279 intel_region_get_tile_masks(depth_mt->region,
280 &tile_mask_x, &tile_mask_y, false);
281
282 if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) {
283 uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
284 intel_region_get_tile_masks(depth_mt->hiz_mt->region,
285 &hiz_tile_mask_x, &hiz_tile_mask_y, false);
286
287 /* Each HiZ row represents 2 rows of pixels */
288 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
289
290 tile_mask_x |= hiz_tile_mask_x;
291 tile_mask_y |= hiz_tile_mask_y;
292 }
293 }
294
295 if (stencil_mt) {
296 if (stencil_mt->stencil_mt)
297 stencil_mt = stencil_mt->stencil_mt;
298
299 if (stencil_mt->format == MESA_FORMAT_S8) {
300 /* Separate stencil buffer uses 64x64 tiles. */
301 tile_mask_x |= 63;
302 tile_mask_y |= 63;
303 } else {
304 uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
305 intel_region_get_tile_masks(stencil_mt->region,
306 &stencil_tile_mask_x,
307 &stencil_tile_mask_y, false);
308
309 tile_mask_x |= stencil_tile_mask_x;
310 tile_mask_y |= stencil_tile_mask_y;
311 }
312 }
313
314 *out_tile_mask_x = tile_mask_x;
315 *out_tile_mask_y = tile_mask_y;
316 }
317
318 static struct intel_mipmap_tree *
319 get_stencil_miptree(struct intel_renderbuffer *irb)
320 {
321 if (!irb)
322 return NULL;
323 if (irb->mt->stencil_mt)
324 return irb->mt->stencil_mt;
325 return irb->mt;
326 }
327
328 void
329 brw_workaround_depthstencil_alignment(struct brw_context *brw,
330 GLbitfield clear_mask)
331 {
332 struct intel_context *intel = &brw->intel;
333 struct gl_context *ctx = &intel->ctx;
334 struct gl_framebuffer *fb = ctx->DrawBuffer;
335 bool rebase_depth = false;
336 bool rebase_stencil = false;
337 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
338 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
339 struct intel_mipmap_tree *depth_mt = NULL;
340 struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
341 uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
342 uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
343 bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
344 bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
345
346 if (depth_irb)
347 depth_mt = depth_irb->mt;
348
349 /* Check if depth buffer is in depth/stencil format. If so, then it's only
350 * safe to invalidate it if we're also clearing stencil, and both depth_irb
351 * and stencil_irb point to the same miptree.
352 *
353 * Note: it's not sufficient to check for the case where
354 * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
355 * because this fails to catch depth/stencil buffers on hardware that uses
356 * separate stencil. To catch that case, we check whether
357 * depth_mt->stencil_mt is non-NULL.
358 */
359 if (depth_irb && invalidate_depth &&
360 (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
361 depth_mt->stencil_mt)) {
362 invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
363 && depth_irb->mt == stencil_irb->mt;
364 }
365
366 uint32_t tile_mask_x, tile_mask_y;
367 brw_get_depthstencil_tile_masks(depth_mt,
368 depth_mt ? depth_irb->mt_level : 0,
369 depth_mt ? depth_irb->mt_layer : 0,
370 stencil_mt,
371 &tile_mask_x, &tile_mask_y);
372
373 if (depth_irb) {
374 tile_x = depth_irb->draw_x & tile_mask_x;
375 tile_y = depth_irb->draw_y & tile_mask_y;
376
377 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
378 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
379 * Coordinate Offset X/Y":
380 *
381 * "The 3 LSBs of both offsets must be zero to ensure correct
382 * alignment"
383 */
384 if (tile_x & 7 || tile_y & 7)
385 rebase_depth = true;
386
387 /* We didn't even have intra-tile offsets before g45. */
388 if (intel->gen == 4 && !intel->is_g4x) {
389 if (tile_x || tile_y)
390 rebase_depth = true;
391 }
392
393 if (rebase_depth) {
394 perf_debug("HW workaround: blitting depth level %d to a temporary "
395 "to fix alignment (depth tile offset %d,%d)\n",
396 depth_irb->mt_level, tile_x, tile_y);
397 intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
398 /* In the case of stencil_irb being the same packed depth/stencil
399 * texture but not the same rb, make it point at our rebased mt, too.
400 */
401 if (stencil_irb &&
402 stencil_irb != depth_irb &&
403 stencil_irb->mt == depth_mt) {
404 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
405 intel_renderbuffer_set_draw_offset(stencil_irb);
406 }
407
408 stencil_mt = get_stencil_miptree(stencil_irb);
409
410 tile_x = depth_irb->draw_x & tile_mask_x;
411 tile_y = depth_irb->draw_y & tile_mask_y;
412 }
413
414 if (stencil_irb) {
415 stencil_mt = get_stencil_miptree(stencil_irb);
416 intel_miptree_get_image_offset(stencil_mt,
417 stencil_irb->mt_level,
418 stencil_irb->mt_layer,
419 &stencil_draw_x, &stencil_draw_y);
420 int stencil_tile_x = stencil_draw_x & tile_mask_x;
421 int stencil_tile_y = stencil_draw_y & tile_mask_y;
422
423 /* If stencil doesn't match depth, then we'll need to rebase stencil
424 * as well. (if we hadn't decided to rebase stencil before, the
425 * post-stencil depth test will also rebase depth to try to match it
426 * up).
427 */
428 if (tile_x != stencil_tile_x ||
429 tile_y != stencil_tile_y) {
430 rebase_stencil = true;
431 }
432 }
433 }
434
435 /* If we have (just) stencil, check it for ignored low bits as well */
436 if (stencil_irb) {
437 intel_miptree_get_image_offset(stencil_mt,
438 stencil_irb->mt_level,
439 stencil_irb->mt_layer,
440 &stencil_draw_x, &stencil_draw_y);
441 stencil_tile_x = stencil_draw_x & tile_mask_x;
442 stencil_tile_y = stencil_draw_y & tile_mask_y;
443
444 if (stencil_tile_x & 7 || stencil_tile_y & 7)
445 rebase_stencil = true;
446
447 if (intel->gen == 4 && !intel->is_g4x) {
448 if (stencil_tile_x || stencil_tile_y)
449 rebase_stencil = true;
450 }
451 }
452
453 if (rebase_stencil) {
454 perf_debug("HW workaround: blitting stencil level %d to a temporary "
455 "to fix alignment (stencil tile offset %d,%d)\n",
456 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
457
458 intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil);
459 stencil_mt = get_stencil_miptree(stencil_irb);
460
461 intel_miptree_get_image_offset(stencil_mt,
462 stencil_irb->mt_level,
463 stencil_irb->mt_layer,
464 &stencil_draw_x, &stencil_draw_y);
465 stencil_tile_x = stencil_draw_x & tile_mask_x;
466 stencil_tile_y = stencil_draw_y & tile_mask_y;
467
468 if (depth_irb && depth_irb->mt == stencil_irb->mt) {
469 intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
470 intel_renderbuffer_set_draw_offset(depth_irb);
471 } else if (depth_irb && !rebase_depth) {
472 if (tile_x != stencil_tile_x ||
473 tile_y != stencil_tile_y) {
474 perf_debug("HW workaround: blitting depth level %d to a temporary "
475 "to match stencil level %d alignment (depth tile offset "
476 "%d,%d, stencil offset %d,%d)\n",
477 depth_irb->mt_level,
478 stencil_irb->mt_level,
479 tile_x, tile_y,
480 stencil_tile_x, stencil_tile_y);
481
482 intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
483
484 tile_x = depth_irb->draw_x & tile_mask_x;
485 tile_y = depth_irb->draw_y & tile_mask_y;
486
487 if (stencil_irb && stencil_irb->mt == depth_mt) {
488 intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
489 intel_renderbuffer_set_draw_offset(stencil_irb);
490 }
491
492 WARN_ONCE(stencil_tile_x != tile_x ||
493 stencil_tile_y != tile_y,
494 "Rebased stencil tile offset (%d,%d) doesn't match depth "
495 "tile offset (%d,%d).\n",
496 stencil_tile_x, stencil_tile_y,
497 tile_x, tile_y);
498 }
499 }
500 }
501
502 if (!depth_irb) {
503 tile_x = stencil_tile_x;
504 tile_y = stencil_tile_y;
505 }
506
507 /* While we just tried to get everything aligned, we may have failed to do
508 * so in the case of rendering to array or 3D textures, where nonzero faces
509 * will still have an offset post-rebase. At least give an informative
510 * warning.
511 */
512 WARN_ONCE((tile_x & 7) || (tile_y & 7),
513 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
514 "Truncating offset, bad rendering may occur.\n");
515 tile_x &= ~7;
516 tile_y &= ~7;
517
518 /* Now, after rebasing, save off the new dephtstencil state so the hardware
519 * packets can just dereference that without re-calculating tile offsets.
520 */
521 brw->depthstencil.tile_x = tile_x;
522 brw->depthstencil.tile_y = tile_y;
523 brw->depthstencil.depth_offset = 0;
524 brw->depthstencil.stencil_offset = 0;
525 brw->depthstencil.hiz_offset = 0;
526 brw->depthstencil.depth_mt = NULL;
527 brw->depthstencil.stencil_mt = NULL;
528 if (depth_irb) {
529 depth_mt = depth_irb->mt;
530 brw->depthstencil.depth_mt = depth_mt;
531 brw->depthstencil.depth_offset =
532 intel_region_get_aligned_offset(depth_mt->region,
533 depth_irb->draw_x & ~tile_mask_x,
534 depth_irb->draw_y & ~tile_mask_y,
535 false);
536 if (intel_renderbuffer_has_hiz(depth_irb)) {
537 brw->depthstencil.hiz_offset =
538 intel_region_get_aligned_offset(depth_mt->region,
539 depth_irb->draw_x & ~tile_mask_x,
540 (depth_irb->draw_y & ~tile_mask_y) /
541 2,
542 false);
543 }
544 }
545 if (stencil_irb) {
546 stencil_mt = get_stencil_miptree(stencil_irb);
547
548 brw->depthstencil.stencil_mt = stencil_mt;
549 if (stencil_mt->format == MESA_FORMAT_S8) {
550 /* Note: we can't compute the stencil offset using
551 * intel_region_get_aligned_offset(), because stencil_region claims
552 * that the region is untiled even though it's W tiled.
553 */
554 brw->depthstencil.stencil_offset =
555 (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
556 (stencil_draw_x & ~tile_mask_x) * 64;
557 }
558 }
559 }
560
561 void
562 brw_emit_depthbuffer(struct brw_context *brw)
563 {
564 struct intel_context *intel = &brw->intel;
565 struct gl_context *ctx = &intel->ctx;
566 struct gl_framebuffer *fb = ctx->DrawBuffer;
567 /* _NEW_BUFFERS */
568 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
569 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
570 struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
571 struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
572 uint32_t tile_x = brw->depthstencil.tile_x;
573 uint32_t tile_y = brw->depthstencil.tile_y;
574 bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
575 bool separate_stencil = false;
576 uint32_t depth_surface_type = BRW_SURFACE_NULL;
577 uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
578 uint32_t depth_offset = 0;
579 uint32_t width = 1, height = 1;
580
581 if (stencil_mt) {
582 separate_stencil = stencil_mt->format == MESA_FORMAT_S8;
583
584 /* Gen7 supports only separate stencil */
585 assert(separate_stencil || intel->gen < 7);
586 }
587
588 /* If there's a packed depth/stencil bound to stencil only, we need to
589 * emit the packed depth/stencil buffer packet.
590 */
591 if (!depth_irb && stencil_irb && !separate_stencil) {
592 depth_irb = stencil_irb;
593 depth_mt = stencil_mt;
594 }
595
596 if (depth_irb && depth_mt) {
597 /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
598 * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
599 * depthstencil format.
600 *
601 * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
602 * set to the same value. Gens after 7 implicitly always set
603 * Separate_Stencil_Enable; software cannot disable it.
604 */
605 if ((intel->gen < 7 && hiz) || intel->gen >= 7) {
606 assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
607 }
608
609 /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
610 assert(intel->gen >= 7 || !separate_stencil || hiz);
611
612 assert(intel->gen < 6 || depth_mt->region->tiling == I915_TILING_Y);
613 assert(!hiz || depth_mt->region->tiling == I915_TILING_Y);
614
615 depthbuffer_format = brw_depthbuffer_format(brw);
616 depth_surface_type = BRW_SURFACE_2D;
617 depth_offset = brw->depthstencil.depth_offset;
618 width = depth_irb->Base.Base.Width;
619 height = depth_irb->Base.Base.Height;
620 } else if (separate_stencil) {
621 /*
622 * There exists a separate stencil buffer but no depth buffer.
623 *
624 * The stencil buffer inherits most of its fields from
625 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
626 * height.
627 *
628 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
629 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
630 * [DevGT+]: This field must be set to TRUE.
631 */
632 assert(intel->has_separate_stencil);
633
634 depth_surface_type = BRW_SURFACE_2D;
635 width = stencil_irb->Base.Base.Width;
636 height = stencil_irb->Base.Base.Height;
637 }
638
639 brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
640 depthbuffer_format, depth_surface_type,
641 stencil_mt, hiz, separate_stencil,
642 width, height, tile_x, tile_y);
643 }
644
645 void
646 brw_emit_depth_stencil_hiz(struct brw_context *brw,
647 struct intel_mipmap_tree *depth_mt,
648 uint32_t depth_offset, uint32_t depthbuffer_format,
649 uint32_t depth_surface_type,
650 struct intel_mipmap_tree *stencil_mt,
651 bool hiz, bool separate_stencil,
652 uint32_t width, uint32_t height,
653 uint32_t tile_x, uint32_t tile_y)
654 {
655 struct intel_context *intel = &brw->intel;
656
657 /* Enable the hiz bit if we're doing separate stencil, because it and the
658 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
659 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
660 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
661 * Enable must also be enabled.
662 *
663 * [DevGT]: This field must be set to the same value (enabled or
664 * disabled) as Hierarchical Depth Buffer Enable
665 */
666 bool enable_hiz_ss = hiz || separate_stencil;
667
668
669 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
670 * non-pipelined state that will need the PIPE_CONTROL workaround.
671 */
672 if (intel->gen == 6) {
673 intel_emit_post_sync_nonzero_flush(brw);
674 intel_emit_depth_stall_flushes(brw);
675 }
676
677 unsigned int len;
678 if (intel->gen >= 6)
679 len = 7;
680 else if (intel->is_g4x || intel->gen == 5)
681 len = 6;
682 else
683 len = 5;
684
685 BEGIN_BATCH(len);
686 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
687 OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
688 (depthbuffer_format << 18) |
689 ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
690 ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
691 (BRW_TILEWALK_YMAJOR << 26) |
692 ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
693 << 27) |
694 (depth_surface_type << 29));
695
696 if (depth_mt) {
697 OUT_RELOC(depth_mt->region->bo,
698 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
699 depth_offset);
700 } else {
701 OUT_BATCH(0);
702 }
703
704 OUT_BATCH(((width + tile_x - 1) << 6) |
705 ((height + tile_y - 1) << 19));
706 OUT_BATCH(0);
707
708 if (intel->is_g4x || intel->gen >= 5)
709 OUT_BATCH(tile_x | (tile_y << 16));
710 else
711 assert(tile_x == 0 && tile_y == 0);
712
713 if (intel->gen >= 6)
714 OUT_BATCH(0);
715
716 ADVANCE_BATCH();
717
718 if (hiz || separate_stencil) {
719 /*
720 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
721 * stencil enable' and 'hiz enable' bits were set. Therefore we must
722 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
723 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
724 * failure to do so causes hangs on gen5 and a stall on gen6.
725 */
726
727 /* Emit hiz buffer. */
728 if (hiz) {
729 struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
730 BEGIN_BATCH(3);
731 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
732 OUT_BATCH(hiz_mt->region->pitch - 1);
733 OUT_RELOC(hiz_mt->region->bo,
734 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
735 brw->depthstencil.hiz_offset);
736 ADVANCE_BATCH();
737 } else {
738 BEGIN_BATCH(3);
739 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
740 OUT_BATCH(0);
741 OUT_BATCH(0);
742 ADVANCE_BATCH();
743 }
744
745 /* Emit stencil buffer. */
746 if (separate_stencil) {
747 struct intel_region *region = stencil_mt->region;
748
749 BEGIN_BATCH(3);
750 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
751 /* The stencil buffer has quirky pitch requirements. From Vol 2a,
752 * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
753 * The pitch must be set to 2x the value computed based on width, as
754 * the stencil buffer is stored with two rows interleaved.
755 */
756 OUT_BATCH(2 * region->pitch - 1);
757 OUT_RELOC(region->bo,
758 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
759 brw->depthstencil.stencil_offset);
760 ADVANCE_BATCH();
761 } else {
762 BEGIN_BATCH(3);
763 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
764 OUT_BATCH(0);
765 OUT_BATCH(0);
766 ADVANCE_BATCH();
767 }
768 }
769
770 /*
771 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
772 * params must be emitted.
773 *
774 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
775 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
776 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
777 */
778 if (intel->gen >= 6 || hiz) {
779 if (intel->gen == 6)
780 intel_emit_post_sync_nonzero_flush(brw);
781
782 BEGIN_BATCH(2);
783 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
784 GEN5_DEPTH_CLEAR_VALID |
785 (2 - 2));
786 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
787 ADVANCE_BATCH();
788 }
789 }
790
791 const struct brw_tracked_state brw_depthbuffer = {
792 .dirty = {
793 .mesa = _NEW_BUFFERS,
794 .brw = BRW_NEW_BATCH,
795 .cache = 0,
796 },
797 .emit = brw_emit_depthbuffer,
798 };
799
800
801
802 /***********************************************************************
803 * Polygon stipple packet
804 */
805
806 static void upload_polygon_stipple(struct brw_context *brw)
807 {
808 struct intel_context *intel = &brw->intel;
809 struct gl_context *ctx = &brw->intel.ctx;
810 GLuint i;
811
812 /* _NEW_POLYGON */
813 if (!ctx->Polygon.StippleFlag)
814 return;
815
816 if (intel->gen == 6)
817 intel_emit_post_sync_nonzero_flush(brw);
818
819 BEGIN_BATCH(33);
820 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
821
822 /* Polygon stipple is provided in OpenGL order, i.e. bottom
823 * row first. If we're rendering to a window (i.e. the
824 * default frame buffer object, 0), then we need to invert
825 * it to match our pixel layout. But if we're rendering
826 * to a FBO (i.e. any named frame buffer object), we *don't*
827 * need to invert - we already match the layout.
828 */
829 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
830 for (i = 0; i < 32; i++)
831 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
832 }
833 else {
834 for (i = 0; i < 32; i++)
835 OUT_BATCH(ctx->PolygonStipple[i]);
836 }
837 CACHED_BATCH();
838 }
839
840 const struct brw_tracked_state brw_polygon_stipple = {
841 .dirty = {
842 .mesa = (_NEW_POLYGONSTIPPLE |
843 _NEW_POLYGON),
844 .brw = BRW_NEW_CONTEXT,
845 .cache = 0
846 },
847 .emit = upload_polygon_stipple
848 };
849
850
851 /***********************************************************************
852 * Polygon stipple offset packet
853 */
854
855 static void upload_polygon_stipple_offset(struct brw_context *brw)
856 {
857 struct intel_context *intel = &brw->intel;
858 struct gl_context *ctx = &brw->intel.ctx;
859
860 /* _NEW_POLYGON */
861 if (!ctx->Polygon.StippleFlag)
862 return;
863
864 if (intel->gen == 6)
865 intel_emit_post_sync_nonzero_flush(brw);
866
867 BEGIN_BATCH(2);
868 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
869
870 /* _NEW_BUFFERS
871 *
872 * If we're drawing to a system window we have to invert the Y axis
873 * in order to match the OpenGL pixel coordinate system, and our
874 * offset must be matched to the window position. If we're drawing
875 * to a user-created FBO then our native pixel coordinate system
876 * works just fine, and there's no window system to worry about.
877 */
878 if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
879 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
880 else
881 OUT_BATCH(0);
882 CACHED_BATCH();
883 }
884
885 const struct brw_tracked_state brw_polygon_stipple_offset = {
886 .dirty = {
887 .mesa = (_NEW_BUFFERS |
888 _NEW_POLYGON),
889 .brw = BRW_NEW_CONTEXT,
890 .cache = 0
891 },
892 .emit = upload_polygon_stipple_offset
893 };
894
895 /**********************************************************************
896 * AA Line parameters
897 */
898 static void upload_aa_line_parameters(struct brw_context *brw)
899 {
900 struct intel_context *intel = &brw->intel;
901 struct gl_context *ctx = &brw->intel.ctx;
902
903 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
904 return;
905
906 if (intel->gen == 6)
907 intel_emit_post_sync_nonzero_flush(brw);
908
909 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
910 /* use legacy aa line coverage computation */
911 OUT_BATCH(0);
912 OUT_BATCH(0);
913 CACHED_BATCH();
914 }
915
916 const struct brw_tracked_state brw_aa_line_parameters = {
917 .dirty = {
918 .mesa = _NEW_LINE,
919 .brw = BRW_NEW_CONTEXT,
920 .cache = 0
921 },
922 .emit = upload_aa_line_parameters
923 };
924
925 /***********************************************************************
926 * Line stipple packet
927 */
928
929 static void upload_line_stipple(struct brw_context *brw)
930 {
931 struct intel_context *intel = &brw->intel;
932 struct gl_context *ctx = &brw->intel.ctx;
933 GLfloat tmp;
934 GLint tmpi;
935
936 if (!ctx->Line.StippleFlag)
937 return;
938
939 if (intel->gen == 6)
940 intel_emit_post_sync_nonzero_flush(brw);
941
942 BEGIN_BATCH(3);
943 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
944 OUT_BATCH(ctx->Line.StipplePattern);
945
946 if (intel->gen >= 7) {
947 /* in U1.16 */
948 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
949 tmpi = tmp * (1<<16);
950 OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
951 }
952 else {
953 /* in U1.13 */
954 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
955 tmpi = tmp * (1<<13);
956 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
957 }
958
959 CACHED_BATCH();
960 }
961
962 const struct brw_tracked_state brw_line_stipple = {
963 .dirty = {
964 .mesa = _NEW_LINE,
965 .brw = BRW_NEW_CONTEXT,
966 .cache = 0
967 },
968 .emit = upload_line_stipple
969 };
970
971
972 /***********************************************************************
973 * Misc invariant state packets
974 */
975
976 void
977 brw_upload_invariant_state(struct brw_context *brw)
978 {
979 struct intel_context *intel = &brw->intel;
980
981 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
982 if (intel->gen == 6)
983 intel_emit_post_sync_nonzero_flush(brw);
984
985 /* Select the 3D pipeline (as opposed to media) */
986 BEGIN_BATCH(1);
987 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
988 ADVANCE_BATCH();
989
990 if (intel->gen < 6) {
991 /* Disable depth offset clamping. */
992 BEGIN_BATCH(2);
993 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
994 OUT_BATCH_F(0.0);
995 ADVANCE_BATCH();
996 }
997
998 BEGIN_BATCH(2);
999 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
1000 OUT_BATCH(0);
1001 ADVANCE_BATCH();
1002
1003 BEGIN_BATCH(1);
1004 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
1005 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
1006 ADVANCE_BATCH();
1007 }
1008
1009 const struct brw_tracked_state brw_invariant_state = {
1010 .dirty = {
1011 .mesa = 0,
1012 .brw = BRW_NEW_CONTEXT,
1013 .cache = 0
1014 },
1015 .emit = brw_upload_invariant_state
1016 };
1017
1018 /**
1019 * Define the base addresses which some state is referenced from.
1020 *
1021 * This allows us to avoid having to emit relocations for the objects,
1022 * and is actually required for binding table pointers on gen6.
1023 *
1024 * Surface state base address covers binding table pointers and
1025 * surface state objects, but not the surfaces that the surface state
1026 * objects point to.
1027 */
1028 static void upload_state_base_address( struct brw_context *brw )
1029 {
1030 struct intel_context *intel = &brw->intel;
1031
1032 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1033 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1034 * programmed prior to STATE_BASE_ADDRESS.
1035 *
1036 * However, given that the instruction SBA (general state base
1037 * address) on this chipset is always set to 0 across X and GL,
1038 * maybe this isn't required for us in particular.
1039 */
1040
1041 if (intel->gen >= 6) {
1042 if (intel->gen == 6)
1043 intel_emit_post_sync_nonzero_flush(brw);
1044
1045 BEGIN_BATCH(10);
1046 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
1047 /* General state base address: stateless DP read/write requests */
1048 OUT_BATCH(1);
1049 /* Surface state base address:
1050 * BINDING_TABLE_STATE
1051 * SURFACE_STATE
1052 */
1053 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
1054 /* Dynamic state base address:
1055 * SAMPLER_STATE
1056 * SAMPLER_BORDER_COLOR_STATE
1057 * CLIP, SF, WM/CC viewport state
1058 * COLOR_CALC_STATE
1059 * DEPTH_STENCIL_STATE
1060 * BLEND_STATE
1061 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1062 * Disable is clear, which we rely on)
1063 */
1064 OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
1065 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
1066
1067 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1068 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1069 1); /* Instruction base address: shader kernels (incl. SIP) */
1070
1071 OUT_BATCH(1); /* General state upper bound */
1072 /* Dynamic state upper bound. Although the documentation says that
1073 * programming it to zero will cause it to be ignored, that is a lie.
1074 * If this isn't programmed to a real bound, the sampler border color
1075 * pointer is rejected, causing border color to mysteriously fail.
1076 */
1077 OUT_BATCH(0xfffff001);
1078 OUT_BATCH(1); /* Indirect object upper bound */
1079 OUT_BATCH(1); /* Instruction access upper bound */
1080 ADVANCE_BATCH();
1081 } else if (intel->gen == 5) {
1082 BEGIN_BATCH(8);
1083 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
1084 OUT_BATCH(1); /* General state base address */
1085 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1086 1); /* Surface state base address */
1087 OUT_BATCH(1); /* Indirect object base address */
1088 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1089 1); /* Instruction base address */
1090 OUT_BATCH(0xfffff001); /* General state upper bound */
1091 OUT_BATCH(1); /* Indirect object upper bound */
1092 OUT_BATCH(1); /* Instruction access upper bound */
1093 ADVANCE_BATCH();
1094 } else {
1095 BEGIN_BATCH(6);
1096 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
1097 OUT_BATCH(1); /* General state base address */
1098 OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1099 1); /* Surface state base address */
1100 OUT_BATCH(1); /* Indirect object base address */
1101 OUT_BATCH(1); /* General state upper bound */
1102 OUT_BATCH(1); /* Indirect object upper bound */
1103 ADVANCE_BATCH();
1104 }
1105
1106 /* According to section 3.6.1 of VOL1 of the 965 PRM,
1107 * STATE_BASE_ADDRESS updates require a reissue of:
1108 *
1109 * 3DSTATE_PIPELINE_POINTERS
1110 * 3DSTATE_BINDING_TABLE_POINTERS
1111 * MEDIA_STATE_POINTERS
1112 *
1113 * and this continues through Ironlake. The Sandy Bridge PRM, vol
1114 * 1 part 1 says that the folowing packets must be reissued:
1115 *
1116 * 3DSTATE_CC_POINTERS
1117 * 3DSTATE_BINDING_TABLE_POINTERS
1118 * 3DSTATE_SAMPLER_STATE_POINTERS
1119 * 3DSTATE_VIEWPORT_STATE_POINTERS
1120 * MEDIA_STATE_POINTERS
1121 *
1122 * Those are always reissued following SBA updates anyway (new
1123 * batch time), except in the case of the program cache BO
1124 * changing. Having a separate state flag makes the sequence more
1125 * obvious.
1126 */
1127
1128 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
1129 }
1130
1131 const struct brw_tracked_state brw_state_base_address = {
1132 .dirty = {
1133 .mesa = 0,
1134 .brw = (BRW_NEW_BATCH |
1135 BRW_NEW_PROGRAM_CACHE),
1136 .cache = 0,
1137 },
1138 .emit = upload_state_base_address
1139 };