2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
43 #include "main/fbobject.h"
44 #include "main/glformats.h"
46 /* Constant single cliprect for framebuffer object or DRI2 drawing */
47 static void upload_drawing_rect(struct brw_context
*brw
)
49 struct intel_context
*intel
= &brw
->intel
;
50 struct gl_context
*ctx
= &intel
->ctx
;
53 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE
<< 16 | (4 - 2));
54 OUT_BATCH(0); /* xmin, ymin */
55 OUT_BATCH(((ctx
->DrawBuffer
->Width
- 1) & 0xffff) |
56 ((ctx
->DrawBuffer
->Height
- 1) << 16));
61 const struct brw_tracked_state brw_drawing_rect
= {
64 .brw
= BRW_NEW_CONTEXT
,
67 .emit
= upload_drawing_rect
71 * Upload the binding table pointers, which point each stage's array of surface
74 * The binding table pointers are relative to the surface state base address,
75 * which points at the batchbuffer containing the streamed batch state.
77 static void upload_binding_table_pointers(struct brw_context
*brw
)
80 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS
<< 16 | (6 - 2));
81 OUT_BATCH(brw
->vs
.bind_bo_offset
);
82 OUT_BATCH(0); /* gs */
83 OUT_BATCH(0); /* clip */
84 OUT_BATCH(0); /* sf */
85 OUT_BATCH(brw
->wm
.bind_bo_offset
);
89 const struct brw_tracked_state brw_binding_table_pointers
= {
92 .brw
= (BRW_NEW_BATCH
|
93 BRW_NEW_STATE_BASE_ADDRESS
|
94 BRW_NEW_VS_BINDING_TABLE
|
95 BRW_NEW_GS_BINDING_TABLE
|
96 BRW_NEW_PS_BINDING_TABLE
),
99 .emit
= upload_binding_table_pointers
,
103 * Upload the binding table pointers, which point each stage's array of surface
106 * The binding table pointers are relative to the surface state base address,
107 * which points at the batchbuffer containing the streamed batch state.
109 static void upload_gen6_binding_table_pointers(struct brw_context
*brw
)
112 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS
<< 16 |
113 GEN6_BINDING_TABLE_MODIFY_VS
|
114 GEN6_BINDING_TABLE_MODIFY_GS
|
115 GEN6_BINDING_TABLE_MODIFY_PS
|
117 OUT_BATCH(brw
->vs
.bind_bo_offset
); /* vs */
118 OUT_BATCH(brw
->gs
.bind_bo_offset
); /* gs */
119 OUT_BATCH(brw
->wm
.bind_bo_offset
); /* wm/ps */
123 const struct brw_tracked_state gen6_binding_table_pointers
= {
126 .brw
= (BRW_NEW_BATCH
|
127 BRW_NEW_STATE_BASE_ADDRESS
|
128 BRW_NEW_VS_BINDING_TABLE
|
129 BRW_NEW_GS_BINDING_TABLE
|
130 BRW_NEW_PS_BINDING_TABLE
),
133 .emit
= upload_gen6_binding_table_pointers
,
137 * Upload pointers to the per-stage state.
139 * The state pointers in this packet are all relative to the general state
140 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
142 static void upload_pipelined_state_pointers(struct brw_context
*brw
)
144 struct intel_context
*intel
= &brw
->intel
;
146 if (intel
->gen
== 5) {
147 /* Need to flush before changing clip max threads for errata. */
154 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS
<< 16 | (7 - 2));
155 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
156 brw
->vs
.state_offset
);
157 if (brw
->gs
.prog_active
)
158 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
159 brw
->gs
.state_offset
| 1);
162 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
163 brw
->clip
.state_offset
| 1);
164 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
165 brw
->sf
.state_offset
);
166 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
167 brw
->wm
.state_offset
);
168 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
169 brw
->cc
.state_offset
);
172 brw
->state
.dirty
.brw
|= BRW_NEW_PSP
;
175 static void upload_psp_urb_cbs(struct brw_context
*brw
)
177 upload_pipelined_state_pointers(brw
);
178 brw_upload_urb_fence(brw
);
179 brw_upload_cs_urb_state(brw
);
182 const struct brw_tracked_state brw_psp_urb_cbs
= {
185 .brw
= (BRW_NEW_URB_FENCE
|
187 BRW_NEW_STATE_BASE_ADDRESS
),
188 .cache
= (CACHE_NEW_VS_UNIT
|
191 CACHE_NEW_CLIP_UNIT
|
196 .emit
= upload_psp_urb_cbs
,
200 brw_depthbuffer_format(struct brw_context
*brw
)
202 struct intel_context
*intel
= &brw
->intel
;
203 struct gl_context
*ctx
= &intel
->ctx
;
204 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
205 struct intel_renderbuffer
*drb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
206 struct intel_renderbuffer
*srb
;
209 (srb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
)) &&
210 !srb
->mt
->stencil_mt
&&
211 (intel_rb_format(srb
) == MESA_FORMAT_S8_Z24
||
212 intel_rb_format(srb
) == MESA_FORMAT_Z32_FLOAT_X24S8
)) {
217 return BRW_DEPTHFORMAT_D32_FLOAT
;
219 switch (drb
->mt
->format
) {
220 case MESA_FORMAT_Z16
:
221 return BRW_DEPTHFORMAT_D16_UNORM
;
222 case MESA_FORMAT_Z32_FLOAT
:
223 return BRW_DEPTHFORMAT_D32_FLOAT
;
224 case MESA_FORMAT_X8_Z24
:
225 if (intel
->gen
>= 6) {
226 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
228 /* Use D24_UNORM_S8, not D24_UNORM_X8.
230 * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
231 * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
232 * 3DSTATE_DEPTH_BUFFER.Surface_Format).
234 * However, on Gen5, D24_UNORM_X8 may be used only if separate
235 * stencil is enabled, and we never enable it. From the Ironlake PRM,
236 * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
237 * If this field is disabled, the Surface Format of the depth
238 * buffer cannot be D24_UNORM_X8_UINT.
240 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
242 case MESA_FORMAT_S8_Z24
:
243 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
244 case MESA_FORMAT_Z32_FLOAT_X24S8
:
245 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
247 _mesa_problem(ctx
, "Unexpected depth format %s\n",
248 _mesa_get_format_name(intel_rb_format(drb
)));
249 return BRW_DEPTHFORMAT_D16_UNORM
;
254 * Returns the mask of how many bits of x and y must be handled through the
255 * depthbuffer's draw offset x and y fields.
257 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
258 * between the depth, hiz, and stencil buffers. Because it can be hard to get
259 * all 3 to agree on this value, we want to do as much drawing offset
260 * adjustment as possible by moving the base offset of the 3 buffers, which is
261 * restricted to tile boundaries.
263 * For each buffer, the remainder must be applied through the x/y draw offset.
264 * This returns the worst-case mask of the low bits that have to go into the
265 * packet. If the 3 buffers don't agree on the drawing offset ANDed with this
266 * mask, then we're in trouble.
269 brw_get_depthstencil_tile_masks(struct intel_mipmap_tree
*depth_mt
,
270 uint32_t depth_level
,
271 uint32_t depth_layer
,
272 struct intel_mipmap_tree
*stencil_mt
,
273 uint32_t *out_tile_mask_x
,
274 uint32_t *out_tile_mask_y
)
276 uint32_t tile_mask_x
= 0, tile_mask_y
= 0;
279 intel_region_get_tile_masks(depth_mt
->region
,
280 &tile_mask_x
, &tile_mask_y
, false);
282 if (intel_miptree_slice_has_hiz(depth_mt
, depth_level
, depth_layer
)) {
283 uint32_t hiz_tile_mask_x
, hiz_tile_mask_y
;
284 intel_region_get_tile_masks(depth_mt
->hiz_mt
->region
,
285 &hiz_tile_mask_x
, &hiz_tile_mask_y
, false);
287 /* Each HiZ row represents 2 rows of pixels */
288 hiz_tile_mask_y
= hiz_tile_mask_y
<< 1 | 1;
290 tile_mask_x
|= hiz_tile_mask_x
;
291 tile_mask_y
|= hiz_tile_mask_y
;
296 if (stencil_mt
->stencil_mt
)
297 stencil_mt
= stencil_mt
->stencil_mt
;
299 if (stencil_mt
->format
== MESA_FORMAT_S8
) {
300 /* Separate stencil buffer uses 64x64 tiles. */
304 uint32_t stencil_tile_mask_x
, stencil_tile_mask_y
;
305 intel_region_get_tile_masks(stencil_mt
->region
,
306 &stencil_tile_mask_x
,
307 &stencil_tile_mask_y
, false);
309 tile_mask_x
|= stencil_tile_mask_x
;
310 tile_mask_y
|= stencil_tile_mask_y
;
314 *out_tile_mask_x
= tile_mask_x
;
315 *out_tile_mask_y
= tile_mask_y
;
318 static struct intel_mipmap_tree
*
319 get_stencil_miptree(struct intel_renderbuffer
*irb
)
323 if (irb
->mt
->stencil_mt
)
324 return irb
->mt
->stencil_mt
;
329 brw_workaround_depthstencil_alignment(struct brw_context
*brw
,
330 GLbitfield clear_mask
)
332 struct intel_context
*intel
= &brw
->intel
;
333 struct gl_context
*ctx
= &intel
->ctx
;
334 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
335 bool rebase_depth
= false;
336 bool rebase_stencil
= false;
337 struct intel_renderbuffer
*depth_irb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
338 struct intel_renderbuffer
*stencil_irb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
);
339 struct intel_mipmap_tree
*depth_mt
= NULL
;
340 struct intel_mipmap_tree
*stencil_mt
= get_stencil_miptree(stencil_irb
);
341 uint32_t tile_x
= 0, tile_y
= 0, stencil_tile_x
= 0, stencil_tile_y
= 0;
342 uint32_t stencil_draw_x
= 0, stencil_draw_y
= 0;
343 bool invalidate_depth
= clear_mask
& BUFFER_BIT_DEPTH
;
344 bool invalidate_stencil
= clear_mask
& BUFFER_BIT_STENCIL
;
347 depth_mt
= depth_irb
->mt
;
349 /* Check if depth buffer is in depth/stencil format. If so, then it's only
350 * safe to invalidate it if we're also clearing stencil, and both depth_irb
351 * and stencil_irb point to the same miptree.
353 * Note: it's not sufficient to check for the case where
354 * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
355 * because this fails to catch depth/stencil buffers on hardware that uses
356 * separate stencil. To catch that case, we check whether
357 * depth_mt->stencil_mt is non-NULL.
359 if (depth_irb
&& invalidate_depth
&&
360 (_mesa_get_format_base_format(depth_mt
->format
) == GL_DEPTH_STENCIL
||
361 depth_mt
->stencil_mt
)) {
362 invalidate_depth
= invalidate_stencil
&& depth_irb
&& stencil_irb
363 && depth_irb
->mt
== stencil_irb
->mt
;
366 uint32_t tile_mask_x
, tile_mask_y
;
367 brw_get_depthstencil_tile_masks(depth_mt
,
368 depth_mt
? depth_irb
->mt_level
: 0,
369 depth_mt
? depth_irb
->mt_layer
: 0,
371 &tile_mask_x
, &tile_mask_y
);
374 tile_x
= depth_irb
->draw_x
& tile_mask_x
;
375 tile_y
= depth_irb
->draw_y
& tile_mask_y
;
377 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
378 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
379 * Coordinate Offset X/Y":
381 * "The 3 LSBs of both offsets must be zero to ensure correct
384 if (tile_x
& 7 || tile_y
& 7)
387 /* We didn't even have intra-tile offsets before g45. */
388 if (intel
->gen
== 4 && !intel
->is_g4x
) {
389 if (tile_x
|| tile_y
)
394 perf_debug("HW workaround: blitting depth level %d to a temporary "
395 "to fix alignment (depth tile offset %d,%d)\n",
396 depth_irb
->mt_level
, tile_x
, tile_y
);
397 intel_renderbuffer_move_to_temp(brw
, depth_irb
, invalidate_depth
);
398 /* In the case of stencil_irb being the same packed depth/stencil
399 * texture but not the same rb, make it point at our rebased mt, too.
402 stencil_irb
!= depth_irb
&&
403 stencil_irb
->mt
== depth_mt
) {
404 intel_miptree_reference(&stencil_irb
->mt
, depth_irb
->mt
);
405 intel_renderbuffer_set_draw_offset(stencil_irb
);
408 stencil_mt
= get_stencil_miptree(stencil_irb
);
410 tile_x
= depth_irb
->draw_x
& tile_mask_x
;
411 tile_y
= depth_irb
->draw_y
& tile_mask_y
;
415 stencil_mt
= get_stencil_miptree(stencil_irb
);
416 intel_miptree_get_image_offset(stencil_mt
,
417 stencil_irb
->mt_level
,
418 stencil_irb
->mt_layer
,
419 &stencil_draw_x
, &stencil_draw_y
);
420 int stencil_tile_x
= stencil_draw_x
& tile_mask_x
;
421 int stencil_tile_y
= stencil_draw_y
& tile_mask_y
;
423 /* If stencil doesn't match depth, then we'll need to rebase stencil
424 * as well. (if we hadn't decided to rebase stencil before, the
425 * post-stencil depth test will also rebase depth to try to match it
428 if (tile_x
!= stencil_tile_x
||
429 tile_y
!= stencil_tile_y
) {
430 rebase_stencil
= true;
435 /* If we have (just) stencil, check it for ignored low bits as well */
437 intel_miptree_get_image_offset(stencil_mt
,
438 stencil_irb
->mt_level
,
439 stencil_irb
->mt_layer
,
440 &stencil_draw_x
, &stencil_draw_y
);
441 stencil_tile_x
= stencil_draw_x
& tile_mask_x
;
442 stencil_tile_y
= stencil_draw_y
& tile_mask_y
;
444 if (stencil_tile_x
& 7 || stencil_tile_y
& 7)
445 rebase_stencil
= true;
447 if (intel
->gen
== 4 && !intel
->is_g4x
) {
448 if (stencil_tile_x
|| stencil_tile_y
)
449 rebase_stencil
= true;
453 if (rebase_stencil
) {
454 perf_debug("HW workaround: blitting stencil level %d to a temporary "
455 "to fix alignment (stencil tile offset %d,%d)\n",
456 stencil_irb
->mt_level
, stencil_tile_x
, stencil_tile_y
);
458 intel_renderbuffer_move_to_temp(brw
, stencil_irb
, invalidate_stencil
);
459 stencil_mt
= get_stencil_miptree(stencil_irb
);
461 intel_miptree_get_image_offset(stencil_mt
,
462 stencil_irb
->mt_level
,
463 stencil_irb
->mt_layer
,
464 &stencil_draw_x
, &stencil_draw_y
);
465 stencil_tile_x
= stencil_draw_x
& tile_mask_x
;
466 stencil_tile_y
= stencil_draw_y
& tile_mask_y
;
468 if (depth_irb
&& depth_irb
->mt
== stencil_irb
->mt
) {
469 intel_miptree_reference(&depth_irb
->mt
, stencil_irb
->mt
);
470 intel_renderbuffer_set_draw_offset(depth_irb
);
471 } else if (depth_irb
&& !rebase_depth
) {
472 if (tile_x
!= stencil_tile_x
||
473 tile_y
!= stencil_tile_y
) {
474 perf_debug("HW workaround: blitting depth level %d to a temporary "
475 "to match stencil level %d alignment (depth tile offset "
476 "%d,%d, stencil offset %d,%d)\n",
478 stencil_irb
->mt_level
,
480 stencil_tile_x
, stencil_tile_y
);
482 intel_renderbuffer_move_to_temp(brw
, depth_irb
, invalidate_depth
);
484 tile_x
= depth_irb
->draw_x
& tile_mask_x
;
485 tile_y
= depth_irb
->draw_y
& tile_mask_y
;
487 if (stencil_irb
&& stencil_irb
->mt
== depth_mt
) {
488 intel_miptree_reference(&stencil_irb
->mt
, depth_irb
->mt
);
489 intel_renderbuffer_set_draw_offset(stencil_irb
);
492 WARN_ONCE(stencil_tile_x
!= tile_x
||
493 stencil_tile_y
!= tile_y
,
494 "Rebased stencil tile offset (%d,%d) doesn't match depth "
495 "tile offset (%d,%d).\n",
496 stencil_tile_x
, stencil_tile_y
,
503 tile_x
= stencil_tile_x
;
504 tile_y
= stencil_tile_y
;
507 /* While we just tried to get everything aligned, we may have failed to do
508 * so in the case of rendering to array or 3D textures, where nonzero faces
509 * will still have an offset post-rebase. At least give an informative
512 WARN_ONCE((tile_x
& 7) || (tile_y
& 7),
513 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
514 "Truncating offset, bad rendering may occur.\n");
518 /* Now, after rebasing, save off the new dephtstencil state so the hardware
519 * packets can just dereference that without re-calculating tile offsets.
521 brw
->depthstencil
.tile_x
= tile_x
;
522 brw
->depthstencil
.tile_y
= tile_y
;
523 brw
->depthstencil
.depth_offset
= 0;
524 brw
->depthstencil
.stencil_offset
= 0;
525 brw
->depthstencil
.hiz_offset
= 0;
526 brw
->depthstencil
.depth_mt
= NULL
;
527 brw
->depthstencil
.stencil_mt
= NULL
;
529 depth_mt
= depth_irb
->mt
;
530 brw
->depthstencil
.depth_mt
= depth_mt
;
531 brw
->depthstencil
.depth_offset
=
532 intel_region_get_aligned_offset(depth_mt
->region
,
533 depth_irb
->draw_x
& ~tile_mask_x
,
534 depth_irb
->draw_y
& ~tile_mask_y
,
536 if (intel_renderbuffer_has_hiz(depth_irb
)) {
537 brw
->depthstencil
.hiz_offset
=
538 intel_region_get_aligned_offset(depth_mt
->region
,
539 depth_irb
->draw_x
& ~tile_mask_x
,
540 (depth_irb
->draw_y
& ~tile_mask_y
) /
546 stencil_mt
= get_stencil_miptree(stencil_irb
);
548 brw
->depthstencil
.stencil_mt
= stencil_mt
;
549 if (stencil_mt
->format
== MESA_FORMAT_S8
) {
550 /* Note: we can't compute the stencil offset using
551 * intel_region_get_aligned_offset(), because stencil_region claims
552 * that the region is untiled even though it's W tiled.
554 brw
->depthstencil
.stencil_offset
=
555 (stencil_draw_y
& ~tile_mask_y
) * stencil_mt
->region
->pitch
+
556 (stencil_draw_x
& ~tile_mask_x
) * 64;
562 brw_emit_depthbuffer(struct brw_context
*brw
)
564 struct intel_context
*intel
= &brw
->intel
;
565 struct gl_context
*ctx
= &intel
->ctx
;
566 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
568 struct intel_renderbuffer
*depth_irb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
569 struct intel_renderbuffer
*stencil_irb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
);
570 struct intel_mipmap_tree
*depth_mt
= brw
->depthstencil
.depth_mt
;
571 struct intel_mipmap_tree
*stencil_mt
= brw
->depthstencil
.stencil_mt
;
572 uint32_t tile_x
= brw
->depthstencil
.tile_x
;
573 uint32_t tile_y
= brw
->depthstencil
.tile_y
;
574 bool hiz
= depth_irb
&& intel_renderbuffer_has_hiz(depth_irb
);
575 bool separate_stencil
= false;
576 uint32_t depth_surface_type
= BRW_SURFACE_NULL
;
577 uint32_t depthbuffer_format
= BRW_DEPTHFORMAT_D32_FLOAT
;
578 uint32_t depth_offset
= 0;
579 uint32_t width
= 1, height
= 1;
582 separate_stencil
= stencil_mt
->format
== MESA_FORMAT_S8
;
584 /* Gen7 supports only separate stencil */
585 assert(separate_stencil
|| intel
->gen
< 7);
588 /* If there's a packed depth/stencil bound to stencil only, we need to
589 * emit the packed depth/stencil buffer packet.
591 if (!depth_irb
&& stencil_irb
&& !separate_stencil
) {
592 depth_irb
= stencil_irb
;
593 depth_mt
= stencil_mt
;
596 if (depth_irb
&& depth_mt
) {
597 /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
598 * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
599 * depthstencil format.
601 * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
602 * set to the same value. Gens after 7 implicitly always set
603 * Separate_Stencil_Enable; software cannot disable it.
605 if ((intel
->gen
< 7 && hiz
) || intel
->gen
>= 7) {
606 assert(!_mesa_is_format_packed_depth_stencil(depth_mt
->format
));
609 /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
610 assert(intel
->gen
>= 7 || !separate_stencil
|| hiz
);
612 assert(intel
->gen
< 6 || depth_mt
->region
->tiling
== I915_TILING_Y
);
613 assert(!hiz
|| depth_mt
->region
->tiling
== I915_TILING_Y
);
615 depthbuffer_format
= brw_depthbuffer_format(brw
);
616 depth_surface_type
= BRW_SURFACE_2D
;
617 depth_offset
= brw
->depthstencil
.depth_offset
;
618 width
= depth_irb
->Base
.Base
.Width
;
619 height
= depth_irb
->Base
.Base
.Height
;
620 } else if (separate_stencil
) {
622 * There exists a separate stencil buffer but no depth buffer.
624 * The stencil buffer inherits most of its fields from
625 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
628 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
629 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
630 * [DevGT+]: This field must be set to TRUE.
632 assert(intel
->has_separate_stencil
);
634 depth_surface_type
= BRW_SURFACE_2D
;
635 width
= stencil_irb
->Base
.Base
.Width
;
636 height
= stencil_irb
->Base
.Base
.Height
;
639 brw
->vtbl
.emit_depth_stencil_hiz(brw
, depth_mt
, depth_offset
,
640 depthbuffer_format
, depth_surface_type
,
641 stencil_mt
, hiz
, separate_stencil
,
642 width
, height
, tile_x
, tile_y
);
646 brw_emit_depth_stencil_hiz(struct brw_context
*brw
,
647 struct intel_mipmap_tree
*depth_mt
,
648 uint32_t depth_offset
, uint32_t depthbuffer_format
,
649 uint32_t depth_surface_type
,
650 struct intel_mipmap_tree
*stencil_mt
,
651 bool hiz
, bool separate_stencil
,
652 uint32_t width
, uint32_t height
,
653 uint32_t tile_x
, uint32_t tile_y
)
655 struct intel_context
*intel
= &brw
->intel
;
657 /* Enable the hiz bit if we're doing separate stencil, because it and the
658 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
659 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
660 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
661 * Enable must also be enabled.
663 * [DevGT]: This field must be set to the same value (enabled or
664 * disabled) as Hierarchical Depth Buffer Enable
666 bool enable_hiz_ss
= hiz
|| separate_stencil
;
669 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
670 * non-pipelined state that will need the PIPE_CONTROL workaround.
672 if (intel
->gen
== 6) {
673 intel_emit_post_sync_nonzero_flush(brw
);
674 intel_emit_depth_stall_flushes(brw
);
680 else if (intel
->is_g4x
|| intel
->gen
== 5)
686 OUT_BATCH(_3DSTATE_DEPTH_BUFFER
<< 16 | (len
- 2));
687 OUT_BATCH((depth_mt
? depth_mt
->region
->pitch
- 1 : 0) |
688 (depthbuffer_format
<< 18) |
689 ((enable_hiz_ss
? 1 : 0) << 21) | /* separate stencil enable */
690 ((enable_hiz_ss
? 1 : 0) << 22) | /* hiz enable */
691 (BRW_TILEWALK_YMAJOR
<< 26) |
692 ((depth_mt
? depth_mt
->region
->tiling
!= I915_TILING_NONE
: 1)
694 (depth_surface_type
<< 29));
697 OUT_RELOC(depth_mt
->region
->bo
,
698 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
704 OUT_BATCH(((width
+ tile_x
- 1) << 6) |
705 ((height
+ tile_y
- 1) << 19));
708 if (intel
->is_g4x
|| intel
->gen
>= 5)
709 OUT_BATCH(tile_x
| (tile_y
<< 16));
711 assert(tile_x
== 0 && tile_y
== 0);
718 if (hiz
|| separate_stencil
) {
720 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
721 * stencil enable' and 'hiz enable' bits were set. Therefore we must
722 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
723 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
724 * failure to do so causes hangs on gen5 and a stall on gen6.
727 /* Emit hiz buffer. */
729 struct intel_mipmap_tree
*hiz_mt
= depth_mt
->hiz_mt
;
731 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER
<< 16) | (3 - 2));
732 OUT_BATCH(hiz_mt
->region
->pitch
- 1);
733 OUT_RELOC(hiz_mt
->region
->bo
,
734 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
735 brw
->depthstencil
.hiz_offset
);
739 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER
<< 16) | (3 - 2));
745 /* Emit stencil buffer. */
746 if (separate_stencil
) {
747 struct intel_region
*region
= stencil_mt
->region
;
750 OUT_BATCH((_3DSTATE_STENCIL_BUFFER
<< 16) | (3 - 2));
751 /* The stencil buffer has quirky pitch requirements. From Vol 2a,
752 * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
753 * The pitch must be set to 2x the value computed based on width, as
754 * the stencil buffer is stored with two rows interleaved.
756 OUT_BATCH(2 * region
->pitch
- 1);
757 OUT_RELOC(region
->bo
,
758 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
759 brw
->depthstencil
.stencil_offset
);
763 OUT_BATCH((_3DSTATE_STENCIL_BUFFER
<< 16) | (3 - 2));
771 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
772 * params must be emitted.
774 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
775 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
776 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
778 if (intel
->gen
>= 6 || hiz
) {
780 intel_emit_post_sync_nonzero_flush(brw
);
783 OUT_BATCH(_3DSTATE_CLEAR_PARAMS
<< 16 |
784 GEN5_DEPTH_CLEAR_VALID
|
786 OUT_BATCH(depth_mt
? depth_mt
->depth_clear_value
: 0);
791 const struct brw_tracked_state brw_depthbuffer
= {
793 .mesa
= _NEW_BUFFERS
,
794 .brw
= BRW_NEW_BATCH
,
797 .emit
= brw_emit_depthbuffer
,
802 /***********************************************************************
803 * Polygon stipple packet
806 static void upload_polygon_stipple(struct brw_context
*brw
)
808 struct intel_context
*intel
= &brw
->intel
;
809 struct gl_context
*ctx
= &brw
->intel
.ctx
;
813 if (!ctx
->Polygon
.StippleFlag
)
817 intel_emit_post_sync_nonzero_flush(brw
);
820 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN
<< 16 | (33 - 2));
822 /* Polygon stipple is provided in OpenGL order, i.e. bottom
823 * row first. If we're rendering to a window (i.e. the
824 * default frame buffer object, 0), then we need to invert
825 * it to match our pixel layout. But if we're rendering
826 * to a FBO (i.e. any named frame buffer object), we *don't*
827 * need to invert - we already match the layout.
829 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
830 for (i
= 0; i
< 32; i
++)
831 OUT_BATCH(ctx
->PolygonStipple
[31 - i
]); /* invert */
834 for (i
= 0; i
< 32; i
++)
835 OUT_BATCH(ctx
->PolygonStipple
[i
]);
840 const struct brw_tracked_state brw_polygon_stipple
= {
842 .mesa
= (_NEW_POLYGONSTIPPLE
|
844 .brw
= BRW_NEW_CONTEXT
,
847 .emit
= upload_polygon_stipple
851 /***********************************************************************
852 * Polygon stipple offset packet
855 static void upload_polygon_stipple_offset(struct brw_context
*brw
)
857 struct intel_context
*intel
= &brw
->intel
;
858 struct gl_context
*ctx
= &brw
->intel
.ctx
;
861 if (!ctx
->Polygon
.StippleFlag
)
865 intel_emit_post_sync_nonzero_flush(brw
);
868 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET
<< 16 | (2-2));
872 * If we're drawing to a system window we have to invert the Y axis
873 * in order to match the OpenGL pixel coordinate system, and our
874 * offset must be matched to the window position. If we're drawing
875 * to a user-created FBO then our native pixel coordinate system
876 * works just fine, and there's no window system to worry about.
878 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
))
879 OUT_BATCH((32 - (ctx
->DrawBuffer
->Height
& 31)) & 31);
885 const struct brw_tracked_state brw_polygon_stipple_offset
= {
887 .mesa
= (_NEW_BUFFERS
|
889 .brw
= BRW_NEW_CONTEXT
,
892 .emit
= upload_polygon_stipple_offset
895 /**********************************************************************
898 static void upload_aa_line_parameters(struct brw_context
*brw
)
900 struct intel_context
*intel
= &brw
->intel
;
901 struct gl_context
*ctx
= &brw
->intel
.ctx
;
903 if (!ctx
->Line
.SmoothFlag
|| !brw
->has_aa_line_parameters
)
907 intel_emit_post_sync_nonzero_flush(brw
);
909 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS
<< 16 | (3 - 2));
910 /* use legacy aa line coverage computation */
916 const struct brw_tracked_state brw_aa_line_parameters
= {
919 .brw
= BRW_NEW_CONTEXT
,
922 .emit
= upload_aa_line_parameters
925 /***********************************************************************
926 * Line stipple packet
929 static void upload_line_stipple(struct brw_context
*brw
)
931 struct intel_context
*intel
= &brw
->intel
;
932 struct gl_context
*ctx
= &brw
->intel
.ctx
;
936 if (!ctx
->Line
.StippleFlag
)
940 intel_emit_post_sync_nonzero_flush(brw
);
943 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN
<< 16 | (3 - 2));
944 OUT_BATCH(ctx
->Line
.StipplePattern
);
946 if (intel
->gen
>= 7) {
948 tmp
= 1.0 / (GLfloat
) ctx
->Line
.StippleFactor
;
949 tmpi
= tmp
* (1<<16);
950 OUT_BATCH(tmpi
<< 15 | ctx
->Line
.StippleFactor
);
954 tmp
= 1.0 / (GLfloat
) ctx
->Line
.StippleFactor
;
955 tmpi
= tmp
* (1<<13);
956 OUT_BATCH(tmpi
<< 16 | ctx
->Line
.StippleFactor
);
962 const struct brw_tracked_state brw_line_stipple
= {
965 .brw
= BRW_NEW_CONTEXT
,
968 .emit
= upload_line_stipple
972 /***********************************************************************
973 * Misc invariant state packets
977 brw_upload_invariant_state(struct brw_context
*brw
)
979 struct intel_context
*intel
= &brw
->intel
;
981 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
983 intel_emit_post_sync_nonzero_flush(brw
);
985 /* Select the 3D pipeline (as opposed to media) */
987 OUT_BATCH(brw
->CMD_PIPELINE_SELECT
<< 16 | 0);
990 if (intel
->gen
< 6) {
991 /* Disable depth offset clamping. */
993 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP
<< 16 | (2 - 2));
999 OUT_BATCH(CMD_STATE_SIP
<< 16 | (2 - 2));
1004 OUT_BATCH(brw
->CMD_VF_STATISTICS
<< 16 |
1005 (unlikely(INTEL_DEBUG
& DEBUG_STATS
) ? 1 : 0));
1009 const struct brw_tracked_state brw_invariant_state
= {
1012 .brw
= BRW_NEW_CONTEXT
,
1015 .emit
= brw_upload_invariant_state
1019 * Define the base addresses which some state is referenced from.
1021 * This allows us to avoid having to emit relocations for the objects,
1022 * and is actually required for binding table pointers on gen6.
1024 * Surface state base address covers binding table pointers and
1025 * surface state objects, but not the surfaces that the surface state
1028 static void upload_state_base_address( struct brw_context
*brw
)
1030 struct intel_context
*intel
= &brw
->intel
;
1032 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1033 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1034 * programmed prior to STATE_BASE_ADDRESS.
1036 * However, given that the instruction SBA (general state base
1037 * address) on this chipset is always set to 0 across X and GL,
1038 * maybe this isn't required for us in particular.
1041 if (intel
->gen
>= 6) {
1042 if (intel
->gen
== 6)
1043 intel_emit_post_sync_nonzero_flush(brw
);
1046 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (10 - 2));
1047 /* General state base address: stateless DP read/write requests */
1049 /* Surface state base address:
1050 * BINDING_TABLE_STATE
1053 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0, 1);
1054 /* Dynamic state base address:
1056 * SAMPLER_BORDER_COLOR_STATE
1057 * CLIP, SF, WM/CC viewport state
1059 * DEPTH_STENCIL_STATE
1061 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1062 * Disable is clear, which we rely on)
1064 OUT_RELOC(brw
->batch
.bo
, (I915_GEM_DOMAIN_RENDER
|
1065 I915_GEM_DOMAIN_INSTRUCTION
), 0, 1);
1067 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1068 OUT_RELOC(brw
->cache
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
1069 1); /* Instruction base address: shader kernels (incl. SIP) */
1071 OUT_BATCH(1); /* General state upper bound */
1072 /* Dynamic state upper bound. Although the documentation says that
1073 * programming it to zero will cause it to be ignored, that is a lie.
1074 * If this isn't programmed to a real bound, the sampler border color
1075 * pointer is rejected, causing border color to mysteriously fail.
1077 OUT_BATCH(0xfffff001);
1078 OUT_BATCH(1); /* Indirect object upper bound */
1079 OUT_BATCH(1); /* Instruction access upper bound */
1081 } else if (intel
->gen
== 5) {
1083 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (8 - 2));
1084 OUT_BATCH(1); /* General state base address */
1085 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0,
1086 1); /* Surface state base address */
1087 OUT_BATCH(1); /* Indirect object base address */
1088 OUT_RELOC(brw
->cache
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
1089 1); /* Instruction base address */
1090 OUT_BATCH(0xfffff001); /* General state upper bound */
1091 OUT_BATCH(1); /* Indirect object upper bound */
1092 OUT_BATCH(1); /* Instruction access upper bound */
1096 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (6 - 2));
1097 OUT_BATCH(1); /* General state base address */
1098 OUT_RELOC(brw
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0,
1099 1); /* Surface state base address */
1100 OUT_BATCH(1); /* Indirect object base address */
1101 OUT_BATCH(1); /* General state upper bound */
1102 OUT_BATCH(1); /* Indirect object upper bound */
1106 /* According to section 3.6.1 of VOL1 of the 965 PRM,
1107 * STATE_BASE_ADDRESS updates require a reissue of:
1109 * 3DSTATE_PIPELINE_POINTERS
1110 * 3DSTATE_BINDING_TABLE_POINTERS
1111 * MEDIA_STATE_POINTERS
1113 * and this continues through Ironlake. The Sandy Bridge PRM, vol
1114 * 1 part 1 says that the folowing packets must be reissued:
1116 * 3DSTATE_CC_POINTERS
1117 * 3DSTATE_BINDING_TABLE_POINTERS
1118 * 3DSTATE_SAMPLER_STATE_POINTERS
1119 * 3DSTATE_VIEWPORT_STATE_POINTERS
1120 * MEDIA_STATE_POINTERS
1122 * Those are always reissued following SBA updates anyway (new
1123 * batch time), except in the case of the program cache BO
1124 * changing. Having a separate state flag makes the sequence more
1128 brw
->state
.dirty
.brw
|= BRW_NEW_STATE_BASE_ADDRESS
;
1131 const struct brw_tracked_state brw_state_base_address
= {
1134 .brw
= (BRW_NEW_BATCH
|
1135 BRW_NEW_PROGRAM_CACHE
),
1138 .emit
= upload_state_base_address