ilo: fix and enable fast depth clear
[mesa.git] / src / gallium / drivers / ilo / ilo_blitter_rectlist.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_draw.h"
29 #include "util/u_pack_color.h"
30
31 #include "ilo_blitter.h"
32 #include "ilo_3d.h"
33 #include "ilo_3d_pipeline.h"
34 #include "ilo_blit.h"
35 #include "ilo_gpe.h"
36 #include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and
37 zs_align_surface */
38
39 /**
40 * Set the states that are invariant between all ops.
41 */
42 static bool
43 ilo_blitter_set_invariants(struct ilo_blitter *blitter)
44 {
45 struct pipe_screen *screen = blitter->ilo->base.screen;
46 struct pipe_resource templ;
47 struct pipe_vertex_element velems[2];
48 struct pipe_viewport_state vp;
49
50 if (blitter->initialized)
51 return true;
52
53 blitter->buffer.size = 4096;
54
55 /* allocate the vertex buffer */
56 memset(&templ, 0, sizeof(templ));
57 templ.target = PIPE_BUFFER;
58 templ.width0 = blitter->buffer.size;
59 templ.usage = PIPE_USAGE_STREAM;
60 templ.bind = PIPE_BIND_VERTEX_BUFFER;
61 blitter->buffer.res = screen->resource_create(screen, &templ);
62 if (!blitter->buffer.res)
63 return false;
64
65 /* do not increase reference count */
66 blitter->vb.states[0].buffer = blitter->buffer.res;
67
68 /* only vertex X and Y */
69 blitter->vb.states[0].stride = 2 * sizeof(float);
70 blitter->vb.enabled_mask = 0x1;
71 memset(&velems, 0, sizeof(velems));
72 velems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
73 ilo_gpe_init_ve(blitter->ilo->dev, 2, velems, &blitter->ve);
74
75 /* override first VE to be VUE header */
76 ve_init_cso_with_components(blitter->ilo->dev,
77 BRW_VE1_COMPONENT_STORE_0, /* Reserved */
78 BRW_VE1_COMPONENT_STORE_0, /* Render Target Array Index */
79 BRW_VE1_COMPONENT_STORE_0, /* Viewport Index */
80 BRW_VE1_COMPONENT_STORE_0, /* Point Width */
81 &blitter->ve.cso[0]);
82
83 /* a rectangle has 3 vertices in a RECTLIST */
84 util_draw_init_info(&blitter->draw);
85 blitter->draw.count = 3;
86
87 /**
88 * From the Haswell PRM, volume 7, page 615:
89 *
90 * "The clear value must be between the min and max depth values
91 * (inclusive) defined in the CC_VIEWPORT."
92 *
93 * Even though clipping and viewport transformation will be disabled, we
94 * still need to set up the viewport states.
95 */
96 memset(&vp, 0, sizeof(vp));
97 vp.scale[0] = 1.0f;
98 vp.scale[1] = 1.0f;
99 vp.scale[2] = 1.0f;
100 vp.scale[3] = 1.0f;
101 ilo_gpe_set_viewport_cso(blitter->ilo->dev, &vp, &blitter->viewport);
102
103 blitter->initialized = true;
104
105 return true;
106 }
107
108 static void
109 ilo_blitter_set_op(struct ilo_blitter *blitter,
110 enum ilo_blitter_rectlist_op op)
111 {
112 blitter->op = op;
113 }
114
115 /**
116 * Set the rectangle primitive.
117 */
118 static void
119 ilo_blitter_set_rectlist(struct ilo_blitter *blitter,
120 unsigned x, unsigned y,
121 unsigned width, unsigned height)
122 {
123 unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED;
124 float vertices[3][2];
125 struct pipe_box box;
126
127 /*
128 * From the Sandy Bridge PRM, volume 2 part 1, page 11:
129 *
130 * "(RECTLIST) A list of independent rectangles, where only 3 vertices
131 * are provided per rectangle object, with the fourth vertex implied
132 * by the definition of a rectangle. V0=LowerRight, V1=LowerLeft,
133 * V2=UpperLeft. Implied V3 = V0- V1+V2."
134 */
135 vertices[0][0] = (float) (x + width);
136 vertices[0][1] = (float) (y + height);
137 vertices[1][0] = (float) x;
138 vertices[1][1] = (float) (y + height);
139 vertices[2][0] = (float) x;
140 vertices[2][1] = (float) y;
141
142 /* buffer is full */
143 if (blitter->buffer.offset + sizeof(vertices) > blitter->buffer.size) {
144 if (!ilo_buffer_alloc_bo(ilo_buffer(blitter->buffer.res)))
145 usage &= ~PIPE_TRANSFER_UNSYNCHRONIZED;
146
147 blitter->buffer.offset = 0;
148 }
149
150 u_box_1d(blitter->buffer.offset, sizeof(vertices), &box);
151
152 blitter->ilo->base.transfer_inline_write(&blitter->ilo->base,
153 blitter->buffer.res, 0, usage, &box, vertices, 0, 0);
154
155 blitter->vb.states[0].buffer_offset = blitter->buffer.offset;
156 blitter->buffer.offset += sizeof(vertices);
157 }
158
159 static void
160 ilo_blitter_set_clear_values(struct ilo_blitter *blitter,
161 uint32_t depth, ubyte stencil)
162 {
163 blitter->depth_clear_value = depth;
164 blitter->cc.stencil_ref.ref_value[0] = stencil;
165 }
166
167 static void
168 ilo_blitter_set_dsa(struct ilo_blitter *blitter,
169 const struct pipe_depth_stencil_alpha_state *state)
170 {
171 ilo_gpe_init_dsa(blitter->ilo->dev, state, &blitter->dsa);
172 }
173
174 static void
175 ilo_blitter_set_fb(struct ilo_blitter *blitter,
176 const struct pipe_resource *res, unsigned level,
177 const struct ilo_surface_cso *cso)
178 {
179 blitter->fb.width = u_minify(res->width0, level);
180 blitter->fb.height = u_minify(res->height0, level);
181
182 blitter->fb.num_samples = res->nr_samples;
183 if (!blitter->fb.num_samples)
184 blitter->fb.num_samples = 1;
185
186 memcpy(&blitter->fb.dst, cso, sizeof(*cso));
187 }
188
189 static void
190 ilo_blitter_set_fb_from_surface(struct ilo_blitter *blitter,
191 struct pipe_surface *surf)
192 {
193 ilo_blitter_set_fb(blitter, surf->texture, surf->u.tex.level,
194 (const struct ilo_surface_cso *) surf);
195 }
196
197 static void
198 ilo_blitter_set_fb_from_resource(struct ilo_blitter *blitter,
199 struct pipe_resource *res,
200 enum pipe_format format,
201 unsigned level, unsigned slice)
202 {
203 struct pipe_surface templ, *surf;
204
205 memset(&templ, 0, sizeof(templ));
206 templ.format = format;
207 templ.u.tex.level = level;
208 templ.u.tex.first_layer = slice;
209 templ.u.tex.last_layer = slice;
210
211 /* if we did not call create_surface(), it would never fail */
212 surf = blitter->ilo->base.create_surface(&blitter->ilo->base, res, &templ);
213 assert(surf);
214
215 ilo_blitter_set_fb(blitter, res, level,
216 (const struct ilo_surface_cso *) surf);
217
218 pipe_surface_reference(&surf, NULL);
219 }
220
221 static void
222 ilo_blitter_set_uses(struct ilo_blitter *blitter, uint32_t uses)
223 {
224 blitter->uses = uses;
225 }
226
227 static void
228 hiz_align_fb(struct ilo_blitter *blitter)
229 {
230 unsigned align_w, align_h;
231
232 switch (blitter->op) {
233 case ILO_BLITTER_RECTLIST_CLEAR_ZS:
234 case ILO_BLITTER_RECTLIST_RESOLVE_Z:
235 break;
236 default:
237 return;
238 break;
239 }
240
241 /*
242 * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
243 *
244 * "A rectangle primitive representing the clear area is delivered. The
245 * primitive must adhere to the following restrictions on size:
246 *
247 * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
248 * aligned to an 8x4 pixel block relative to the upper left corner
249 * of the depth buffer, and contain an integer number of these pixel
250 * blocks, and all 8x4 pixels must be lit.
251 *
252 * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
253 * aligned to a 4x2 pixel block (8x4 sample block) relative to the
254 * upper left corner of the depth buffer, and contain an integer
255 * number of these pixel blocks, and all samples of the 4x2 pixels
256 * must be lit
257 *
258 * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
259 * aligned to a 2x2 pixel block (8x4 sample block) relative to the
260 * upper left corner of the depth buffer, and contain an integer
261 * number of these pixel blocks, and all samples of the 2x2 pixels
262 * must be list."
263 *
264 * "The following is required when performing a depth buffer resolve:
265 *
266 * - A rectangle primitive of the same size as the previous depth
267 * buffer clear operation must be delivered, and depth buffer state
268 * cannot have changed since the previous depth buffer clear
269 * operation."
270 */
271 switch (blitter->fb.num_samples) {
272 case 1:
273 align_w = 8;
274 align_h = 4;
275 break;
276 case 2:
277 align_w = 4;
278 align_h = 4;
279 break;
280 case 4:
281 align_w = 4;
282 align_h = 2;
283 break;
284 case 8:
285 default:
286 align_w = 2;
287 align_h = 2;
288 break;
289 }
290
291 if (blitter->fb.width % align_w || blitter->fb.height % align_h) {
292 blitter->fb.width = align(blitter->fb.width, align_w);
293 blitter->fb.height = align(blitter->fb.width, align_h);
294
295 assert(!blitter->fb.dst.is_rt);
296 zs_align_surface(blitter->ilo->dev, align_w, align_h,
297 &blitter->fb.dst.u.zs);
298 }
299 }
300
301 static void
302 hiz_emit_rectlist(struct ilo_blitter *blitter)
303 {
304 struct ilo_3d *hw3d = blitter->ilo->hw3d;
305 struct ilo_3d_pipeline *p = hw3d->pipeline;
306
307 hiz_align_fb(blitter);
308
309 ilo_blitter_set_rectlist(blitter, 0, 0,
310 blitter->fb.width, blitter->fb.height);
311
312 ilo_3d_own_render_ring(hw3d);
313
314 /*
315 * From the Sandy Bridge PRM, volume 2 part 1, page 313:
316 *
317 * "If other rendering operations have preceded this clear, a
318 * PIPE_CONTROL with write cache flush enabled and Z-inhibit
319 * disabled must be issued before the rectangle primitive used for
320 * the depth buffer clear operation."
321 *
322 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
323 *
324 * "Depth buffer clear pass must be followed by a PIPE_CONTROL
325 * command with DEPTH_STALL bit set and Then followed by Depth
326 * FLUSH"
327 *
328 * But the pipeline has to be flushed both before and after not only
329 * because of these workarounds. We need them for reasons such as
330 *
331 * - we may sample from a texture that was rendered to
332 * - we may sample from the fb shortly after
333 */
334 if (!ilo_cp_empty(p->cp))
335 ilo_3d_pipeline_emit_flush(p);
336
337 ilo_3d_pipeline_emit_rectlist(p, blitter);
338
339 ilo_3d_pipeline_emit_flush(p);
340 }
341
342 static bool
343 hiz_can_clear_zs(const struct ilo_blitter *blitter,
344 const struct ilo_texture *tex)
345 {
346 /*
347 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
348 *
349 * "Several cases exist where Depth Buffer Clear cannot be enabled (the
350 * legacy method of clearing must be performed):
351 *
352 * - If the depth buffer format is D32_FLOAT_S8X24_UINT or
353 * D24_UNORM_S8_UINT.
354 *
355 * - If stencil test is enabled but the separate stencil buffer is
356 * disabled.
357 *
358 * - [DevSNB-A{W/A}]: ...
359 *
360 * - [DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
361 * width of the map (LOD0) is not multiple of 16, fast clear
362 * optimization must be disabled."
363 *
364 * From the Ivy Bridge PRM, volume 2 part 1, page 313:
365 *
366 * "Several cases exist where Depth Buffer Clear cannot be enabled (the
367 * legacy method of clearing must be performed):
368 *
369 * - If the depth buffer format is D32_FLOAT_S8X24_UINT or
370 * D24_UNORM_S8_UINT.
371 *
372 * - If stencil test is enabled but the separate stencil buffer is
373 * disabled."
374 *
375 * The truth is when HiZ is enabled, separate stencil is also enabled on
376 * all GENs. The depth buffer format cannot be combined depth/stencil.
377 */
378 switch (tex->bo_format) {
379 case PIPE_FORMAT_Z16_UNORM:
380 if (blitter->ilo->dev->gen == ILO_GEN(6) && tex->base.width0 % 16)
381 return false;
382 break;
383 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
384 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
385 assert(!"HiZ with combined depth/stencil");
386 return false;
387 break;
388 default:
389 break;
390 }
391
392 return true;
393 }
394
395 bool
396 ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
397 struct pipe_surface *zs,
398 unsigned clear_flags,
399 double depth, unsigned stencil)
400 {
401 struct ilo_texture *tex = ilo_texture(zs->texture);
402 struct pipe_depth_stencil_alpha_state dsa_state;
403 uint32_t uses, clear_value;
404
405 if (!ilo_texture_can_enable_hiz(tex,
406 zs->u.tex.level, zs->u.tex.first_layer,
407 zs->u.tex.last_layer - zs->u.tex.first_layer + 1))
408 return false;
409
410 if (!hiz_can_clear_zs(blitter, tex))
411 return false;
412
413 clear_value = util_pack_z(tex->bo_format, depth);
414
415 ilo_blit_resolve_surface(blitter->ilo, zs,
416 ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR);
417 ilo_texture_set_slice_clear_value(tex, zs->u.tex.level,
418 zs->u.tex.first_layer,
419 zs->u.tex.last_layer - zs->u.tex.first_layer + 1,
420 clear_value);
421
422 /*
423 * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
424 *
425 * "- Depth Test Enable must be disabled and Depth Buffer Write Enable
426 * must be enabled (if depth is being cleared).
427 *
428 * - Stencil buffer clear can be performed at the same time by
429 * enabling Stencil Buffer Write Enable. Stencil Test Enable must
430 * be enabled and Stencil Pass Depth Pass Op set to REPLACE, and the
431 * clear value that is placed in the stencil buffer is the Stencil
432 * Reference Value from COLOR_CALC_STATE.
433 *
434 * - Note also that stencil buffer clear can be performed without
435 * depth buffer clear. For stencil only clear, Depth Test Enable and
436 * Depth Buffer Write Enable must be disabled.
437 *
438 * - [DevSNB] errata: For stencil buffer only clear, the previous
439 * depth clear value must be delivered during the clear."
440 */
441 memset(&dsa_state, 0, sizeof(dsa_state));
442
443 if (clear_flags & PIPE_CLEAR_DEPTH)
444 dsa_state.depth.writemask = true;
445
446 if (clear_flags & PIPE_CLEAR_STENCIL) {
447 dsa_state.stencil[0].enabled = true;
448 dsa_state.stencil[0].func = PIPE_FUNC_ALWAYS;
449 dsa_state.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP;
450 dsa_state.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
451 dsa_state.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP;
452
453 /*
454 * From the Ivy Bridge PRM, volume 2 part 1, page 277:
455 *
456 * "Additionally the following must be set to the correct values.
457 *
458 * - DEPTH_STENCIL_STATE::Stencil Write Mask must be 0xFF
459 * - DEPTH_STENCIL_STATE::Stencil Test Mask must be 0xFF
460 * - DEPTH_STENCIL_STATE::Back Face Stencil Write Mask must be 0xFF
461 * - DEPTH_STENCIL_STATE::Back Face Stencil Test Mask must be 0xFF"
462 */
463 dsa_state.stencil[0].valuemask = 0xff;
464 dsa_state.stencil[0].writemask = 0xff;
465 dsa_state.stencil[1].valuemask = 0xff;
466 dsa_state.stencil[1].writemask = 0xff;
467 }
468
469 ilo_blitter_set_invariants(blitter);
470 ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_CLEAR_ZS);
471
472 ilo_blitter_set_dsa(blitter, &dsa_state);
473 ilo_blitter_set_clear_values(blitter, clear_value, (ubyte) stencil);
474 ilo_blitter_set_fb_from_surface(blitter, zs);
475
476 uses = ILO_BLITTER_USE_DSA;
477 if (clear_flags & PIPE_CLEAR_DEPTH)
478 uses |= ILO_BLITTER_USE_VIEWPORT | ILO_BLITTER_USE_FB_DEPTH;
479 if (clear_flags & PIPE_CLEAR_STENCIL)
480 uses |= ILO_BLITTER_USE_CC | ILO_BLITTER_USE_FB_STENCIL;
481 ilo_blitter_set_uses(blitter, uses);
482
483 hiz_emit_rectlist(blitter);
484
485 return true;
486 }
487
488 void
489 ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter,
490 struct pipe_resource *res,
491 unsigned level, unsigned slice)
492 {
493 struct ilo_texture *tex = ilo_texture(res);
494 struct pipe_depth_stencil_alpha_state dsa_state;
495 const struct ilo_texture_slice *s =
496 ilo_texture_get_slice(tex, level, slice);
497
498 if (!ilo_texture_can_enable_hiz(tex, level, slice, 1))
499 return;
500
501 /*
502 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
503 *
504 * "Depth Test Enable must be enabled with the Depth Test Function set
505 * to NEVER. Depth Buffer Write Enable must be enabled. Stencil Test
506 * Enable and Stencil Buffer Write Enable must be disabled."
507 */
508 memset(&dsa_state, 0, sizeof(dsa_state));
509 dsa_state.depth.writemask = true;
510 dsa_state.depth.enabled = true;
511 dsa_state.depth.func = PIPE_FUNC_NEVER;
512
513 ilo_blitter_set_invariants(blitter);
514 ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_Z);
515
516 ilo_blitter_set_dsa(blitter, &dsa_state);
517 ilo_blitter_set_clear_values(blitter, s->clear_value, 0);
518 ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
519 ilo_blitter_set_uses(blitter,
520 ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
521
522 hiz_emit_rectlist(blitter);
523 }
524
525 void
526 ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter,
527 struct pipe_resource *res,
528 unsigned level, unsigned slice)
529 {
530 struct ilo_texture *tex = ilo_texture(res);
531 struct pipe_depth_stencil_alpha_state dsa_state;
532
533 if (!ilo_texture_can_enable_hiz(tex, level, slice, 1))
534 return;
535
536 /*
537 * From the Sandy Bridge PRM, volume 2 part 1, page 315:
538 *
539 * "(Hierarchical Depth Buffer Resolve) Depth Test Enable must be
540 * disabled. Depth Buffer Write Enable must be enabled. Stencil Test
541 * Enable and Stencil Buffer Write Enable must be disabled."
542 */
543 memset(&dsa_state, 0, sizeof(dsa_state));
544 dsa_state.depth.writemask = true;
545
546 ilo_blitter_set_invariants(blitter);
547 ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_HIZ);
548
549 ilo_blitter_set_dsa(blitter, &dsa_state);
550 ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
551 ilo_blitter_set_uses(blitter,
552 ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
553
554 hiz_emit_rectlist(blitter);
555 }