2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "intel_batchbuffer.h"
25 #include "intel_mipmap_tree.h"
26 #include "intel_regions.h"
27 #include "intel_fbo.h"
28 #include "brw_context.h"
29 #include "brw_state.h"
30 #include "brw_defines.h"
32 static void emit_depthbuffer(struct brw_context
*brw
)
34 struct intel_context
*intel
= &brw
->intel
;
35 struct gl_context
*ctx
= &intel
->ctx
;
36 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
39 struct intel_renderbuffer
*drb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
40 struct intel_renderbuffer
*srb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
);
41 struct intel_mipmap_tree
*depth_mt
= NULL
,
45 /* Amount by which drawing should be offset in order to draw to the
46 * appropriate miplevel/zoffset/cubeface. We will extract these values
47 * from depth_irb or stencil_irb once we determine which is present.
49 uint32_t draw_x
= 0, draw_y
= 0;
51 /* Masks used to determine how much of the draw_x and draw_y offsets should
52 * be performed using the fine adjustment of "depth coordinate offset X/Y"
53 * (dw5 of 3DSTATE_DEPTH_BUFFER). Any remaining coarse adjustment will be
54 * performed by changing the base addresses of the buffers.
56 * Since the HiZ, depth, and stencil buffers all use the same "depth
57 * coordinate offset X/Y" values, we need to make sure that the coarse
58 * adjustment will be possible to apply to all three buffers. Since coarse
59 * adjustment can only be applied in multiples of the tile size, we will OR
60 * together the tile masks of all the buffers to determine which offsets to
61 * perform as fine adjustments.
63 uint32_t tile_mask_x
= 0, tile_mask_y
= 0;
69 hiz_mt
= depth_mt
->hiz_mt
;
71 intel_region_get_tile_masks(depth_mt
->region
,
72 &tile_mask_x
, &tile_mask_y
);
75 uint32_t hiz_tile_mask_x
, hiz_tile_mask_y
;
76 intel_region_get_tile_masks(hiz_mt
->region
,
77 &hiz_tile_mask_x
, &hiz_tile_mask_y
);
79 /* Each HiZ row represents 2 rows of pixels */
80 hiz_tile_mask_y
= hiz_tile_mask_y
<< 1 | 1;
82 tile_mask_x
|= hiz_tile_mask_x
;
83 tile_mask_y
|= hiz_tile_mask_y
;
89 if (stencil_mt
->stencil_mt
)
90 stencil_mt
= stencil_mt
->stencil_mt
;
92 assert(stencil_mt
->format
== MESA_FORMAT_S8
);
94 /* Stencil buffer uses 64x64 tiles. */
99 /* Gen7 doesn't support packed depth/stencil */
100 assert(stencil_mt
== NULL
|| depth_mt
!= stencil_mt
);
101 assert(!depth_mt
|| !_mesa_is_format_packed_depth_stencil(depth_mt
->format
));
103 intel_emit_depth_stall_flushes(intel
);
105 if (depth_mt
== NULL
) {
106 uint32_t dw1
= BRW_DEPTHFORMAT_D32_FLOAT
<< 18;
108 uint32_t tile_x
, tile_y
;
110 if (stencil_mt
== NULL
) {
111 dw1
|= (BRW_SURFACE_NULL
<< 29);
113 /* _NEW_STENCIL: enable stencil buffer writes */
114 dw1
|= ((ctx
->Stencil
.WriteMask
!= 0) << 27);
116 draw_x
= srb
->draw_x
;
117 draw_y
= srb
->draw_y
;
118 tile_x
= draw_x
& tile_mask_x
;
119 tile_y
= draw_y
& tile_mask_y
;
121 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
122 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
123 * Coordinate Offset X/Y":
125 * "The 3 LSBs of both offsets must be zero to ensure correct
128 * We have no guarantee that tile_x and tile_y are correctly aligned,
129 * since they are determined by the mipmap layout, which is only
130 * aligned to multiples of 4.
132 * So, to avoid hanging the GPU, just smash the low order 3 bits of
133 * tile_x and tile_y to 0. This is a temporary workaround until we
134 * come up with a better solution.
139 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
140 dw1
|= (BRW_SURFACE_2D
<< 29);
141 dw3
= ((srb
->Base
.Base
.Width
+ tile_x
- 1) << 4) |
142 ((srb
->Base
.Base
.Height
+ tile_y
- 1) << 18);
146 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (7 - 2));
151 OUT_BATCH(tile_x
| (tile_y
<< 16));
155 struct intel_region
*region
= depth_mt
->region
;
156 uint32_t tile_x
, tile_y
, offset
;
158 draw_x
= drb
->draw_x
;
159 draw_y
= drb
->draw_y
;
160 tile_x
= draw_x
& tile_mask_x
;
161 tile_y
= draw_y
& tile_mask_y
;
163 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
164 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
165 * Coordinate Offset X/Y":
167 * "The 3 LSBs of both offsets must be zero to ensure correct
170 * We have no guarantee that tile_x and tile_y are correctly aligned,
171 * since they are determined by the mipmap layout, which is only aligned
174 * So, to avoid hanging the GPU, just smash the low order 3 bits of
175 * tile_x and tile_y to 0. This is a temporary workaround until we come
176 * up with a better solution.
181 offset
= intel_region_get_aligned_offset(region
,
182 draw_x
& ~tile_mask_x
,
183 draw_y
& ~tile_mask_y
);
185 assert(region
->tiling
== I915_TILING_Y
);
187 /* _NEW_DEPTH, _NEW_STENCIL */
189 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (7 - 2));
190 OUT_BATCH(((region
->pitch
* region
->cpp
) - 1) |
191 (brw_depthbuffer_format(brw
) << 18) |
192 ((hiz_mt
? 1 : 0) << 22) | /* hiz enable */
193 ((stencil_mt
!= NULL
&& ctx
->Stencil
.WriteMask
!= 0) << 27) |
194 ((ctx
->Depth
.Mask
!= 0) << 28) |
195 (BRW_SURFACE_2D
<< 29));
196 OUT_RELOC(region
->bo
,
197 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
199 OUT_BATCH((((drb
->Base
.Base
.Width
+ tile_x
) - 1) << 4) |
200 (((drb
->Base
.Base
.Height
+ tile_y
) - 1) << 18));
202 OUT_BATCH(tile_x
| (tile_y
<< 16));
207 if (hiz_mt
== NULL
) {
209 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16 | (3 - 2));
214 uint32_t hiz_offset
=
215 intel_region_get_aligned_offset(hiz_mt
->region
,
216 draw_x
& ~tile_mask_x
,
217 (draw_y
& ~tile_mask_y
) / 2);
219 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16 | (3 - 2));
220 OUT_BATCH(hiz_mt
->region
->pitch
* hiz_mt
->region
->cpp
- 1);
221 OUT_RELOC(hiz_mt
->region
->bo
,
222 I915_GEM_DOMAIN_RENDER
,
223 I915_GEM_DOMAIN_RENDER
,
228 if (stencil_mt
== NULL
) {
230 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER
<< 16 | (3 - 2));
235 const int enabled
= intel
->is_haswell
? HSW_STENCIL_ENABLED
: 0;
237 /* Note: We can't compute the stencil offset using
238 * intel_region_get_aligned_offset(), because the stencil region claims
239 * that the region is untiled; in fact it's W tiled.
241 uint32_t stencil_offset
=
242 (draw_y
& ~tile_mask_y
) * stencil_mt
->region
->pitch
+
243 (draw_x
& ~tile_mask_x
) * 64;
246 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER
<< 16 | (3 - 2));
247 /* The stencil buffer has quirky pitch requirements. From the Graphics
248 * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
249 * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
250 * field "Surface Pitch":
252 * The pitch must be set to 2x the value computed based on width, as
253 * the stencil buffer is stored with two rows interleaved.
255 * (Note that it is not 100% clear whether this intended to apply to
256 * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
257 * imply that it doesn't), however the comment appears on a "DevIVB+"
258 * page (which would imply that it does). Experiments with the hardware
259 * indicate that it does.
262 (2 * stencil_mt
->region
->pitch
* stencil_mt
->region
->cpp
- 1));
263 OUT_RELOC(stencil_mt
->region
->bo
,
264 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
270 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS
<< 16 | (3 - 2));
277 * \see brw_context.state.depth_region
279 const struct brw_tracked_state gen7_depthbuffer
= {
281 .mesa
= (_NEW_BUFFERS
| _NEW_DEPTH
| _NEW_STENCIL
),
282 .brw
= BRW_NEW_BATCH
,
285 .emit
= emit_depthbuffer
,