2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/context.h"
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/fbobject.h"
28 #include "main/renderbuffer.h"
29 #include "main/glformats.h"
31 #include "brw_blorp.h"
32 #include "brw_context.h"
33 #include "brw_defines.h"
34 #include "brw_meta_util.h"
35 #include "brw_state.h"
36 #include "intel_fbo.h"
37 #include "common/gen_debug.h"
39 #define FILE_DEBUG_FLAG DEBUG_BLORP
42 brw_blorp_lookup_shader(struct blorp_context
*blorp
,
43 const void *key
, uint32_t key_size
,
44 uint32_t *kernel_out
, void *prog_data_out
)
46 struct brw_context
*brw
= blorp
->driver_ctx
;
47 return brw_search_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
,
48 key
, key_size
, kernel_out
, prog_data_out
);
52 brw_blorp_upload_shader(struct blorp_context
*blorp
,
53 const void *key
, uint32_t key_size
,
54 const void *kernel
, uint32_t kernel_size
,
55 const struct brw_stage_prog_data
*prog_data
,
56 uint32_t prog_data_size
,
57 uint32_t *kernel_out
, void *prog_data_out
)
59 struct brw_context
*brw
= blorp
->driver_ctx
;
60 brw_upload_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
, key
, key_size
,
61 kernel
, kernel_size
, prog_data
, prog_data_size
,
62 kernel_out
, prog_data_out
);
67 brw_blorp_init(struct brw_context
*brw
)
69 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
71 blorp_init(&brw
->blorp
, brw
, &brw
->isl_dev
);
73 brw
->blorp
.compiler
= brw
->screen
->compiler
;
75 switch (devinfo
->gen
) {
77 if (devinfo
->is_g4x
) {
78 brw
->blorp
.exec
= gen45_blorp_exec
;
80 brw
->blorp
.exec
= gen4_blorp_exec
;
84 brw
->blorp
.exec
= gen5_blorp_exec
;
87 brw
->blorp
.mocs
.tex
= 0;
88 brw
->blorp
.mocs
.rb
= 0;
89 brw
->blorp
.mocs
.vb
= 0;
90 brw
->blorp
.exec
= gen6_blorp_exec
;
93 brw
->blorp
.mocs
.tex
= GEN7_MOCS_L3
;
94 brw
->blorp
.mocs
.rb
= GEN7_MOCS_L3
;
95 brw
->blorp
.mocs
.vb
= GEN7_MOCS_L3
;
96 if (devinfo
->is_haswell
) {
97 brw
->blorp
.exec
= gen75_blorp_exec
;
99 brw
->blorp
.exec
= gen7_blorp_exec
;
103 brw
->blorp
.mocs
.tex
= BDW_MOCS_WB
;
104 brw
->blorp
.mocs
.rb
= BDW_MOCS_PTE
;
105 brw
->blorp
.mocs
.vb
= BDW_MOCS_WB
;
106 brw
->blorp
.exec
= gen8_blorp_exec
;
109 brw
->blorp
.mocs
.tex
= SKL_MOCS_WB
;
110 brw
->blorp
.mocs
.rb
= SKL_MOCS_PTE
;
111 brw
->blorp
.mocs
.vb
= SKL_MOCS_WB
;
112 brw
->blorp
.exec
= gen9_blorp_exec
;
115 brw
->blorp
.mocs
.tex
= CNL_MOCS_WB
;
116 brw
->blorp
.mocs
.rb
= CNL_MOCS_PTE
;
117 brw
->blorp
.mocs
.vb
= CNL_MOCS_WB
;
118 brw
->blorp
.exec
= gen10_blorp_exec
;
121 unreachable("Invalid gen");
124 brw
->blorp
.lookup_shader
= brw_blorp_lookup_shader
;
125 brw
->blorp
.upload_shader
= brw_blorp_upload_shader
;
129 blorp_surf_for_miptree(struct brw_context
*brw
,
130 struct blorp_surf
*surf
,
131 struct intel_mipmap_tree
*mt
,
132 enum isl_aux_usage aux_usage
,
133 bool is_render_target
,
135 unsigned start_layer
, unsigned num_layers
,
136 struct isl_surf tmp_surfs
[1])
138 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
140 if (mt
->surf
.msaa_layout
== ISL_MSAA_LAYOUT_ARRAY
) {
141 const unsigned num_samples
= mt
->surf
.samples
;
142 for (unsigned i
= 0; i
< num_layers
; i
++) {
143 for (unsigned s
= 0; s
< num_samples
; s
++) {
144 const unsigned phys_layer
= (start_layer
+ i
) * num_samples
+ s
;
145 intel_miptree_check_level_layer(mt
, *level
, phys_layer
);
149 for (unsigned i
= 0; i
< num_layers
; i
++)
150 intel_miptree_check_level_layer(mt
, *level
, start_layer
+ i
);
153 surf
->surf
= &mt
->surf
;
154 surf
->addr
= (struct blorp_address
) {
156 .offset
= mt
->offset
,
157 .reloc_flags
= is_render_target
? EXEC_OBJECT_WRITE
: 0,
160 surf
->aux_usage
= aux_usage
;
162 struct isl_surf
*aux_surf
= NULL
;
164 aux_surf
= &mt
->mcs_buf
->surf
;
165 else if (mt
->hiz_buf
)
166 aux_surf
= &mt
->hiz_buf
->surf
;
168 if (mt
->format
== MESA_FORMAT_S_UINT8
&& is_render_target
&&
170 mt
->r8stencil_needs_update
= true;
172 if (surf
->aux_usage
== ISL_AUX_USAGE_HIZ
&&
173 !intel_miptree_level_has_hiz(mt
, *level
))
174 surf
->aux_usage
= ISL_AUX_USAGE_NONE
;
176 if (surf
->aux_usage
!= ISL_AUX_USAGE_NONE
) {
177 /* We only really need a clear color if we also have an auxiliary
178 * surface. Without one, it does nothing.
180 surf
->clear_color
= mt
->fast_clear_color
;
182 surf
->aux_surf
= aux_surf
;
183 surf
->aux_addr
= (struct blorp_address
) {
184 .reloc_flags
= is_render_target
? EXEC_OBJECT_WRITE
: 0,
188 surf
->aux_addr
.buffer
= mt
->mcs_buf
->bo
;
189 surf
->aux_addr
.offset
= mt
->mcs_buf
->offset
;
192 assert(surf
->aux_usage
== ISL_AUX_USAGE_HIZ
);
194 surf
->aux_addr
.buffer
= mt
->hiz_buf
->bo
;
195 surf
->aux_addr
.offset
= mt
->hiz_buf
->offset
;
198 surf
->aux_addr
= (struct blorp_address
) {
201 memset(&surf
->clear_color
, 0, sizeof(surf
->clear_color
));
203 assert((surf
->aux_usage
== ISL_AUX_USAGE_NONE
) ==
204 (surf
->aux_addr
.buffer
== NULL
));
206 /* ISL wants real levels, not offset ones. */
207 *level
-= mt
->first_level
;
210 static enum isl_format
211 brw_blorp_to_isl_format(struct brw_context
*brw
, mesa_format format
,
212 bool is_render_target
)
215 case MESA_FORMAT_NONE
:
216 return ISL_FORMAT_UNSUPPORTED
;
217 case MESA_FORMAT_S_UINT8
:
218 return ISL_FORMAT_R8_UINT
;
219 case MESA_FORMAT_Z24_UNORM_X8_UINT
:
220 case MESA_FORMAT_Z24_UNORM_S8_UINT
:
221 return ISL_FORMAT_R24_UNORM_X8_TYPELESS
;
222 case MESA_FORMAT_Z_FLOAT32
:
223 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT
:
224 return ISL_FORMAT_R32_FLOAT
;
225 case MESA_FORMAT_Z_UNORM16
:
226 return ISL_FORMAT_R16_UNORM
;
228 if (is_render_target
) {
229 assert(brw
->mesa_format_supports_render
[format
]);
230 return brw
->mesa_to_isl_render_format
[format
];
232 return brw_isl_format_for_mesa_format(format
);
240 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
241 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
243 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
246 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
248 * which is simply adding 4 then modding by 8 (or anding with 7).
250 * We then may need to apply workarounds for textureGather hardware bugs.
252 static enum isl_channel_select
253 swizzle_to_scs(GLenum swizzle
)
255 return (enum isl_channel_select
)((swizzle
+ 4) & 7);
259 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
260 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
261 * the physical layer holding sample 0. So, for example, if
262 * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
266 brw_blorp_blit_miptrees(struct brw_context
*brw
,
267 struct intel_mipmap_tree
*src_mt
,
268 unsigned src_level
, unsigned src_layer
,
269 mesa_format src_format
, int src_swizzle
,
270 struct intel_mipmap_tree
*dst_mt
,
271 unsigned dst_level
, unsigned dst_layer
,
272 mesa_format dst_format
,
273 float src_x0
, float src_y0
,
274 float src_x1
, float src_y1
,
275 float dst_x0
, float dst_y0
,
276 float dst_x1
, float dst_y1
,
277 GLenum filter
, bool mirror_x
, bool mirror_y
,
278 bool decode_srgb
, bool encode_srgb
)
280 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
282 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)"
283 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
285 src_mt
->surf
.samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
286 src_level
, src_layer
, src_x0
, src_y0
, src_x1
, src_y1
,
287 dst_mt
->surf
.samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
288 dst_level
, dst_layer
, dst_x0
, dst_y0
, dst_x1
, dst_y1
,
291 if (!decode_srgb
&& _mesa_get_format_color_encoding(src_format
) == GL_SRGB
)
292 src_format
= _mesa_get_srgb_format_linear(src_format
);
294 if (!encode_srgb
&& _mesa_get_format_color_encoding(dst_format
) == GL_SRGB
)
295 dst_format
= _mesa_get_srgb_format_linear(dst_format
);
297 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
298 * texture, the above code configures the source format for L32_FLOAT or
299 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
300 * the SAMPLE message appears to handle multisampled L32_FLOAT and
301 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
302 * around the problem by using a source format of R32_FLOAT. This
303 * shouldn't affect rendering correctness, since the destination format is
304 * R32_FLOAT, so only the contents of the red channel matters.
306 if (devinfo
->gen
== 6 &&
307 src_mt
->surf
.samples
> 1 && dst_mt
->surf
.samples
<= 1 &&
308 src_mt
->format
== dst_mt
->format
&&
309 (dst_format
== MESA_FORMAT_L_FLOAT32
||
310 dst_format
== MESA_FORMAT_I_FLOAT32
)) {
311 src_format
= dst_format
= MESA_FORMAT_R_FLOAT32
;
314 enum isl_format src_isl_format
= brw_isl_format_for_mesa_format(src_format
);
315 enum isl_aux_usage src_aux_usage
=
316 intel_miptree_texture_aux_usage(brw
, src_mt
, src_isl_format
);
317 /* We do format workarounds for some depth formats so we can't reliably
318 * sample with HiZ. One of these days, we should fix that.
320 if (src_aux_usage
== ISL_AUX_USAGE_HIZ
)
321 src_aux_usage
= ISL_AUX_USAGE_NONE
;
322 const bool src_clear_supported
=
323 src_aux_usage
!= ISL_AUX_USAGE_NONE
&& src_mt
->format
== src_format
;
324 intel_miptree_prepare_access(brw
, src_mt
, src_level
, 1, src_layer
, 1,
325 src_aux_usage
, src_clear_supported
);
327 enum isl_aux_usage dst_aux_usage
=
328 intel_miptree_render_aux_usage(brw
, dst_mt
, encode_srgb
, false);
329 const bool dst_clear_supported
= dst_aux_usage
!= ISL_AUX_USAGE_NONE
;
330 intel_miptree_prepare_access(brw
, dst_mt
, dst_level
, 1, dst_layer
, 1,
331 dst_aux_usage
, dst_clear_supported
);
333 struct isl_surf tmp_surfs
[2];
334 struct blorp_surf src_surf
, dst_surf
;
335 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, src_aux_usage
, false,
336 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
337 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, dst_aux_usage
, true,
338 &dst_level
, dst_layer
, 1, &tmp_surfs
[1]);
340 struct isl_swizzle src_isl_swizzle
= {
341 .r
= swizzle_to_scs(GET_SWZ(src_swizzle
, 0)),
342 .g
= swizzle_to_scs(GET_SWZ(src_swizzle
, 1)),
343 .b
= swizzle_to_scs(GET_SWZ(src_swizzle
, 2)),
344 .a
= swizzle_to_scs(GET_SWZ(src_swizzle
, 3)),
347 struct blorp_batch batch
;
348 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
349 blorp_blit(&batch
, &src_surf
, src_level
, src_layer
,
350 brw_blorp_to_isl_format(brw
, src_format
, false), src_isl_swizzle
,
351 &dst_surf
, dst_level
, dst_layer
,
352 brw_blorp_to_isl_format(brw
, dst_format
, true),
353 ISL_SWIZZLE_IDENTITY
,
354 src_x0
, src_y0
, src_x1
, src_y1
,
355 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
356 filter
, mirror_x
, mirror_y
);
357 blorp_batch_finish(&batch
);
359 intel_miptree_finish_write(brw
, dst_mt
, dst_level
, dst_layer
, 1,
364 brw_blorp_copy_miptrees(struct brw_context
*brw
,
365 struct intel_mipmap_tree
*src_mt
,
366 unsigned src_level
, unsigned src_layer
,
367 struct intel_mipmap_tree
*dst_mt
,
368 unsigned dst_level
, unsigned dst_layer
,
369 unsigned src_x
, unsigned src_y
,
370 unsigned dst_x
, unsigned dst_y
,
371 unsigned src_width
, unsigned src_height
)
373 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
375 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
376 "to %dx %s mt %p %d %d (%d,%d)\n",
378 src_mt
->surf
.samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
379 src_level
, src_layer
, src_x
, src_y
, src_width
, src_height
,
380 dst_mt
->surf
.samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
381 dst_level
, dst_layer
, dst_x
, dst_y
);
383 enum isl_aux_usage src_aux_usage
, dst_aux_usage
;
384 bool src_clear_supported
, dst_clear_supported
;
386 switch (src_mt
->aux_usage
) {
387 case ISL_AUX_USAGE_MCS
:
388 case ISL_AUX_USAGE_CCS_E
:
389 src_aux_usage
= src_mt
->aux_usage
;
390 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
391 * we're going to re-interpret the format as an integer format possibly
392 * with a different number of components, we can't handle clear colors
395 src_clear_supported
= devinfo
->gen
>= 9;
398 src_aux_usage
= ISL_AUX_USAGE_NONE
;
399 src_clear_supported
= false;
403 switch (dst_mt
->aux_usage
) {
404 case ISL_AUX_USAGE_MCS
:
405 case ISL_AUX_USAGE_CCS_E
:
406 dst_aux_usage
= dst_mt
->aux_usage
;
407 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
408 * we're going to re-interpret the format as an integer format possibly
409 * with a different number of components, we can't handle clear colors
412 dst_clear_supported
= devinfo
->gen
>= 9;
415 dst_aux_usage
= ISL_AUX_USAGE_NONE
;
416 dst_clear_supported
= false;
420 intel_miptree_prepare_access(brw
, src_mt
, src_level
, 1, src_layer
, 1,
421 src_aux_usage
, src_clear_supported
);
422 intel_miptree_prepare_access(brw
, dst_mt
, dst_level
, 1, dst_layer
, 1,
423 dst_aux_usage
, dst_clear_supported
);
425 struct isl_surf tmp_surfs
[2];
426 struct blorp_surf src_surf
, dst_surf
;
427 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, src_aux_usage
, false,
428 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
429 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, dst_aux_usage
, true,
430 &dst_level
, dst_layer
, 1, &tmp_surfs
[1]);
432 struct blorp_batch batch
;
433 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
434 blorp_copy(&batch
, &src_surf
, src_level
, src_layer
,
435 &dst_surf
, dst_level
, dst_layer
,
436 src_x
, src_y
, dst_x
, dst_y
, src_width
, src_height
);
437 blorp_batch_finish(&batch
);
439 intel_miptree_finish_write(brw
, dst_mt
, dst_level
, dst_layer
, 1,
444 brw_blorp_copy_buffers(struct brw_context
*brw
,
445 struct brw_bo
*src_bo
,
447 struct brw_bo
*dst_bo
,
451 DBG("%s %d bytes from %p[%d] to %p[%d]",
452 __func__
, size
, src_bo
, src_offset
, dst_bo
, dst_offset
);
454 struct blorp_batch batch
;
455 struct blorp_address src
= { .buffer
= src_bo
, .offset
= src_offset
};
456 struct blorp_address dst
= { .buffer
= dst_bo
, .offset
= dst_offset
};
458 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
459 blorp_buffer_copy(&batch
, src
, dst
, size
);
460 blorp_batch_finish(&batch
);
464 static struct intel_mipmap_tree
*
465 find_miptree(GLbitfield buffer_bit
, struct intel_renderbuffer
*irb
)
467 struct intel_mipmap_tree
*mt
= irb
->mt
;
468 if (buffer_bit
== GL_STENCIL_BUFFER_BIT
&& mt
->stencil_mt
)
474 blorp_get_texture_swizzle(const struct intel_renderbuffer
*irb
)
476 return irb
->Base
.Base
._BaseFormat
== GL_RGB
?
477 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
) :
482 do_blorp_blit(struct brw_context
*brw
, GLbitfield buffer_bit
,
483 struct intel_renderbuffer
*src_irb
, mesa_format src_format
,
484 struct intel_renderbuffer
*dst_irb
, mesa_format dst_format
,
485 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
486 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
487 GLenum filter
, bool mirror_x
, bool mirror_y
)
489 const struct gl_context
*ctx
= &brw
->ctx
;
491 /* Find source/dst miptrees */
492 struct intel_mipmap_tree
*src_mt
= find_miptree(buffer_bit
, src_irb
);
493 struct intel_mipmap_tree
*dst_mt
= find_miptree(buffer_bit
, dst_irb
);
495 const bool do_srgb
= ctx
->Color
.sRGBEnabled
;
498 brw_blorp_blit_miptrees(brw
,
499 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
500 src_format
, blorp_get_texture_swizzle(src_irb
),
501 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
,
503 srcX0
, srcY0
, srcX1
, srcY1
,
504 dstX0
, dstY0
, dstX1
, dstY1
,
505 filter
, mirror_x
, mirror_y
,
508 dst_irb
->need_downsample
= true;
512 try_blorp_blit(struct brw_context
*brw
,
513 const struct gl_framebuffer
*read_fb
,
514 const struct gl_framebuffer
*draw_fb
,
515 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
516 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
517 GLenum filter
, GLbitfield buffer_bit
)
519 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
520 struct gl_context
*ctx
= &brw
->ctx
;
522 /* Sync up the state of window system buffers. We need to do this before
523 * we go looking for the buffers.
525 intel_prepare_render(brw
);
527 bool mirror_x
, mirror_y
;
528 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
529 &srcX0
, &srcY0
, &srcX1
, &srcY1
,
530 &dstX0
, &dstY0
, &dstX1
, &dstY1
,
531 &mirror_x
, &mirror_y
))
535 struct intel_renderbuffer
*src_irb
;
536 struct intel_renderbuffer
*dst_irb
;
537 struct intel_mipmap_tree
*src_mt
;
538 struct intel_mipmap_tree
*dst_mt
;
539 switch (buffer_bit
) {
540 case GL_COLOR_BUFFER_BIT
:
541 src_irb
= intel_renderbuffer(read_fb
->_ColorReadBuffer
);
542 for (unsigned i
= 0; i
< draw_fb
->_NumColorDrawBuffers
; ++i
) {
543 dst_irb
= intel_renderbuffer(draw_fb
->_ColorDrawBuffers
[i
]);
545 do_blorp_blit(brw
, buffer_bit
,
546 src_irb
, src_irb
->Base
.Base
.Format
,
547 dst_irb
, dst_irb
->Base
.Base
.Format
,
548 srcX0
, srcY0
, srcX1
, srcY1
,
549 dstX0
, dstY0
, dstX1
, dstY1
,
550 filter
, mirror_x
, mirror_y
);
553 case GL_DEPTH_BUFFER_BIT
:
555 intel_renderbuffer(read_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
557 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
558 src_mt
= find_miptree(buffer_bit
, src_irb
);
559 dst_mt
= find_miptree(buffer_bit
, dst_irb
);
561 /* We can't handle format conversions between Z24 and other formats
562 * since we have to lie about the surface format. See the comments in
563 * brw_blorp_surface_info::set().
565 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
566 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
))
569 /* We also can't handle any combined depth-stencil formats because we
570 * have to reinterpret as a color format.
572 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
573 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
576 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
577 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
578 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
579 filter
, mirror_x
, mirror_y
);
581 case GL_STENCIL_BUFFER_BIT
:
582 /* Blorp doesn't support combined depth stencil which is all we have
585 if (devinfo
->gen
< 6)
589 intel_renderbuffer(read_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
591 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
592 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
593 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
594 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
595 filter
, mirror_x
, mirror_y
);
598 unreachable("not reached");
605 brw_blorp_copytexsubimage(struct brw_context
*brw
,
606 struct gl_renderbuffer
*src_rb
,
607 struct gl_texture_image
*dst_image
,
609 int srcX0
, int srcY0
,
610 int dstX0
, int dstY0
,
611 int width
, int height
)
613 struct gl_context
*ctx
= &brw
->ctx
;
614 struct intel_renderbuffer
*src_irb
= intel_renderbuffer(src_rb
);
615 struct intel_texture_image
*intel_image
= intel_texture_image(dst_image
);
617 /* No pixel transfer operations (zoom, bias, mapping), just a blit */
618 if (brw
->ctx
._ImageTransferState
)
621 /* Sync up the state of window system buffers. We need to do this before
622 * we go looking at the src renderbuffer's miptree.
624 intel_prepare_render(brw
);
626 struct intel_mipmap_tree
*src_mt
= src_irb
->mt
;
627 struct intel_mipmap_tree
*dst_mt
= intel_image
->mt
;
629 /* There is support for only up to eight samples. */
630 if (src_mt
->surf
.samples
> 8 || dst_mt
->surf
.samples
> 8)
633 if (_mesa_get_format_base_format(src_rb
->Format
) !=
634 _mesa_get_format_base_format(dst_image
->TexFormat
)) {
638 /* We can't handle format conversions between Z24 and other formats since
639 * we have to lie about the surface format. See the comments in
640 * brw_blorp_surface_info::set().
642 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
643 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
)) {
647 /* We also can't handle any combined depth-stencil formats because we
648 * have to reinterpret as a color format.
650 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
651 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
654 if (!brw
->mesa_format_supports_render
[dst_image
->TexFormat
])
657 /* Source clipping shouldn't be necessary, since copytexsubimage (in
658 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
661 * Destination clipping shouldn't be necessary since the restrictions on
662 * glCopyTexSubImage prevent the user from specifying a destination rectangle
663 * that falls outside the bounds of the destination texture.
664 * See error_check_subtexture_dimensions().
667 int srcY1
= srcY0
+ height
;
668 int srcX1
= srcX0
+ width
;
669 int dstX1
= dstX0
+ width
;
670 int dstY1
= dstY0
+ height
;
672 /* Account for the fact that in the system framebuffer, the origin is at
675 bool mirror_y
= false;
676 if (_mesa_is_winsys_fbo(ctx
->ReadBuffer
)) {
677 GLint tmp
= src_rb
->Height
- srcY0
;
678 srcY0
= src_rb
->Height
- srcY1
;
683 /* Account for face selection and texture view MinLayer */
684 int dst_slice
= slice
+ dst_image
->TexObject
->MinLayer
+ dst_image
->Face
;
685 int dst_level
= dst_image
->Level
+ dst_image
->TexObject
->MinLevel
;
687 brw_blorp_blit_miptrees(brw
,
688 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
689 src_rb
->Format
, blorp_get_texture_swizzle(src_irb
),
690 dst_mt
, dst_level
, dst_slice
,
691 dst_image
->TexFormat
,
692 srcX0
, srcY0
, srcX1
, srcY1
,
693 dstX0
, dstY0
, dstX1
, dstY1
,
694 GL_NEAREST
, false, mirror_y
,
697 /* If we're copying to a packed depth stencil texture and the source
698 * framebuffer has separate stencil, we need to also copy the stencil data
701 src_rb
= ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
702 if (_mesa_get_format_bits(dst_image
->TexFormat
, GL_STENCIL_BITS
) > 0 &&
704 src_irb
= intel_renderbuffer(src_rb
);
705 src_mt
= src_irb
->mt
;
707 if (src_mt
->stencil_mt
)
708 src_mt
= src_mt
->stencil_mt
;
709 if (dst_mt
->stencil_mt
)
710 dst_mt
= dst_mt
->stencil_mt
;
712 if (src_mt
!= dst_mt
) {
713 brw_blorp_blit_miptrees(brw
,
714 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
716 blorp_get_texture_swizzle(src_irb
),
717 dst_mt
, dst_level
, dst_slice
,
719 srcX0
, srcY0
, srcX1
, srcY1
,
720 dstX0
, dstY0
, dstX1
, dstY1
,
721 GL_NEAREST
, false, mirror_y
,
731 brw_blorp_framebuffer(struct brw_context
*brw
,
732 struct gl_framebuffer
*readFb
,
733 struct gl_framebuffer
*drawFb
,
734 GLint srcX0
, GLint srcY0
, GLint srcX1
, GLint srcY1
,
735 GLint dstX0
, GLint dstY0
, GLint dstX1
, GLint dstY1
,
736 GLbitfield mask
, GLenum filter
)
738 static GLbitfield buffer_bits
[] = {
741 GL_STENCIL_BUFFER_BIT
,
744 for (unsigned int i
= 0; i
< ARRAY_SIZE(buffer_bits
); ++i
) {
745 if ((mask
& buffer_bits
[i
]) &&
746 try_blorp_blit(brw
, readFb
, drawFb
,
747 srcX0
, srcY0
, srcX1
, srcY1
,
748 dstX0
, dstY0
, dstX1
, dstY1
,
749 filter
, buffer_bits
[i
])) {
750 mask
&= ~buffer_bits
[i
];
758 set_write_disables(const struct intel_renderbuffer
*irb
,
759 const GLubyte
*color_mask
, bool *color_write_disable
)
761 /* Format information in the renderbuffer represents the requirements
762 * given by the client. There are cases where the backing miptree uses,
763 * for example, RGBA to represent RGBX. Since the client is only expecting
764 * RGB we can treat alpha as not used and write whatever we like into it.
766 const GLenum base_format
= irb
->Base
.Base
._BaseFormat
;
767 const int components
= _mesa_base_format_component_count(base_format
);
768 bool disables
= false;
770 assert(components
> 0);
772 for (int i
= 0; i
< components
; i
++) {
773 color_write_disable
[i
] = !color_mask
[i
];
774 disables
= disables
|| !color_mask
[i
];
781 do_single_blorp_clear(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
782 struct gl_renderbuffer
*rb
, unsigned buf
,
783 bool partial_clear
, bool encode_srgb
)
785 struct gl_context
*ctx
= &brw
->ctx
;
786 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
787 uint32_t x0
, x1
, y0
, y1
;
789 mesa_format format
= irb
->Base
.Base
.Format
;
790 if (!encode_srgb
&& _mesa_get_format_color_encoding(format
) == GL_SRGB
)
791 format
= _mesa_get_srgb_format_linear(format
);
799 y0
= rb
->Height
- fb
->_Ymax
;
800 y1
= rb
->Height
- fb
->_Ymin
;
803 /* If the clear region is empty, just return. */
804 if (x0
== x1
|| y0
== y1
)
807 bool can_fast_clear
= !partial_clear
;
809 bool color_write_disable
[4] = { false, false, false, false };
810 if (set_write_disables(irb
, ctx
->Color
.ColorMask
[buf
], color_write_disable
))
811 can_fast_clear
= false;
813 /* We store clear colors as floats or uints as needed. If there are
814 * texture views in play, the formats will not properly be respected
815 * during resolves because the resolve operations only know about the
816 * miptree and not the renderbuffer.
818 if (irb
->Base
.Base
.Format
!= irb
->mt
->format
)
819 can_fast_clear
= false;
821 if (!irb
->mt
->supports_fast_clear
||
822 !brw_is_color_fast_clear_compatible(brw
, irb
->mt
, &ctx
->Color
.ClearColor
))
823 can_fast_clear
= false;
825 /* Surface state can only record one fast clear color value. Therefore
826 * unless different levels/layers agree on the color it can be used to
827 * represent only single level/layer. Here it will be reserved for the
828 * first slice (level 0, layer 0).
830 if (irb
->layer_count
> 1 || irb
->mt_level
|| irb
->mt_layer
)
831 can_fast_clear
= false;
833 unsigned level
= irb
->mt_level
;
834 const unsigned num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
836 /* If the MCS buffer hasn't been allocated yet, we need to allocate it now.
838 if (can_fast_clear
&& !irb
->mt
->mcs_buf
) {
839 assert(irb
->mt
->aux_usage
== ISL_AUX_USAGE_CCS_D
);
840 if (!intel_miptree_alloc_ccs(brw
, irb
->mt
)) {
841 /* There are a few reasons in addition to out-of-memory, that can
842 * cause intel_miptree_alloc_non_msrt_mcs to fail. Try to recover by
843 * falling back to non-fast clear.
845 can_fast_clear
= false;
849 if (can_fast_clear
) {
850 const enum isl_aux_state aux_state
=
851 intel_miptree_get_aux_state(irb
->mt
, irb
->mt_level
, irb
->mt_layer
);
852 union isl_color_value clear_color
=
853 brw_meta_convert_fast_clear_color(brw
, irb
->mt
,
854 &ctx
->Color
.ClearColor
);
856 bool same_clear_color
= memcmp(&irb
->mt
->fast_clear_color
,
857 &clear_color
, sizeof(clear_color
)) == 0;
859 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
860 * is redundant and can be skipped.
862 if (aux_state
== ISL_AUX_STATE_CLEAR
&& same_clear_color
)
865 irb
->mt
->fast_clear_color
= clear_color
;
867 /* If the clear color has changed, we need to emit a new SURFACE_STATE
868 * on the next draw call.
870 if (!same_clear_color
)
871 ctx
->NewDriverState
|= BRW_NEW_FAST_CLEAR_COLOR
;
873 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__
,
874 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
876 /* We can't setup the blorp_surf until we've allocated the MCS above */
877 struct isl_surf isl_tmp
[2];
878 struct blorp_surf surf
;
879 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, irb
->mt
->aux_usage
, true,
880 &level
, irb
->mt_layer
, num_layers
, isl_tmp
);
882 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
884 * "Any transition from any value in {Clear, Render, Resolve} to a
885 * different value in {Clear, Render, Resolve} requires end of pipe
888 * In other words, fast clear ops are not properly synchronized with
889 * other drawing. We need to use a PIPE_CONTROL to ensure that the
890 * contents of the previous draw hit the render target before we resolve
891 * and again afterwards to ensure that the resolve is complete before we
892 * do any more regular drawing.
894 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
896 struct blorp_batch batch
;
897 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
898 blorp_fast_clear(&batch
, &surf
,
899 brw
->mesa_to_isl_render_format
[format
],
900 level
, irb
->mt_layer
, num_layers
,
902 blorp_batch_finish(&batch
);
904 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
906 /* Now that the fast clear has occurred, put the buffer in
907 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
910 intel_miptree_set_aux_state(brw
, irb
->mt
, irb
->mt_level
,
911 irb
->mt_layer
, num_layers
,
912 ISL_AUX_STATE_CLEAR
);
914 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__
,
915 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
917 enum isl_aux_usage aux_usage
=
918 intel_miptree_render_aux_usage(brw
, irb
->mt
, encode_srgb
, false);
919 intel_miptree_prepare_render(brw
, irb
->mt
, level
, irb
->mt_layer
,
920 num_layers
, encode_srgb
, false);
922 struct isl_surf isl_tmp
[2];
923 struct blorp_surf surf
;
924 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, aux_usage
, true,
925 &level
, irb
->mt_layer
, num_layers
, isl_tmp
);
927 union isl_color_value clear_color
;
928 memcpy(clear_color
.f32
, ctx
->Color
.ClearColor
.f
, sizeof(float) * 4);
930 struct blorp_batch batch
;
931 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
932 blorp_clear(&batch
, &surf
,
933 brw
->mesa_to_isl_render_format
[format
],
934 ISL_SWIZZLE_IDENTITY
,
935 level
, irb
->mt_layer
, num_layers
,
937 clear_color
, color_write_disable
);
938 blorp_batch_finish(&batch
);
940 intel_miptree_finish_render(brw
, irb
->mt
, level
, irb
->mt_layer
,
941 num_layers
, encode_srgb
, false);
948 brw_blorp_clear_color(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
949 GLbitfield mask
, bool partial_clear
, bool encode_srgb
)
951 for (unsigned buf
= 0; buf
< fb
->_NumColorDrawBuffers
; buf
++) {
952 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[buf
];
953 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
955 /* Only clear the buffers present in the provided mask */
956 if (((1 << fb
->_ColorDrawBufferIndexes
[buf
]) & mask
) == 0)
959 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
960 * the framebuffer can be complete with some attachments missing. In
961 * this case the _ColorDrawBuffers pointer will be NULL.
966 do_single_blorp_clear(brw
, fb
, rb
, buf
, partial_clear
, encode_srgb
);
967 irb
->need_downsample
= true;
974 brw_blorp_clear_depth_stencil(struct brw_context
*brw
,
975 struct gl_framebuffer
*fb
,
976 GLbitfield mask
, bool partial_clear
)
978 const struct gl_context
*ctx
= &brw
->ctx
;
979 struct gl_renderbuffer
*depth_rb
=
980 fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
;
981 struct gl_renderbuffer
*stencil_rb
=
982 fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
984 if (!depth_rb
|| ctx
->Depth
.Mask
== GL_FALSE
)
985 mask
&= ~BUFFER_BIT_DEPTH
;
987 if (!stencil_rb
|| (ctx
->Stencil
.WriteMask
[0] & 0xff) == 0)
988 mask
&= ~BUFFER_BIT_STENCIL
;
990 if (!(mask
& (BUFFER_BITS_DEPTH_STENCIL
)))
993 uint32_t x0
, x1
, y0
, y1
, rb_name
, rb_height
;
995 rb_name
= depth_rb
->Name
;
996 rb_height
= depth_rb
->Height
;
998 assert(depth_rb
->Width
== stencil_rb
->Width
);
999 assert(depth_rb
->Height
== stencil_rb
->Height
);
1003 rb_name
= stencil_rb
->Name
;
1004 rb_height
= stencil_rb
->Height
;
1013 y0
= rb_height
- fb
->_Ymax
;
1014 y1
= rb_height
- fb
->_Ymin
;
1017 /* If the clear region is empty, just return. */
1018 if (x0
== x1
|| y0
== y1
)
1021 uint32_t level
, start_layer
, num_layers
;
1022 struct isl_surf isl_tmp
[4];
1023 struct blorp_surf depth_surf
, stencil_surf
;
1025 struct intel_mipmap_tree
*depth_mt
= NULL
;
1026 if (mask
& BUFFER_BIT_DEPTH
) {
1027 struct intel_renderbuffer
*irb
= intel_renderbuffer(depth_rb
);
1028 depth_mt
= find_miptree(GL_DEPTH_BUFFER_BIT
, irb
);
1030 level
= irb
->mt_level
;
1031 start_layer
= irb
->mt_layer
;
1032 num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
1034 intel_miptree_prepare_depth(brw
, depth_mt
, level
,
1035 start_layer
, num_layers
);
1037 unsigned depth_level
= level
;
1038 blorp_surf_for_miptree(brw
, &depth_surf
, depth_mt
, depth_mt
->aux_usage
,
1039 true, &depth_level
, start_layer
, num_layers
,
1041 assert(depth_level
== level
);
1044 uint8_t stencil_mask
= 0;
1045 struct intel_mipmap_tree
*stencil_mt
= NULL
;
1046 if (mask
& BUFFER_BIT_STENCIL
) {
1047 struct intel_renderbuffer
*irb
= intel_renderbuffer(stencil_rb
);
1048 stencil_mt
= find_miptree(GL_STENCIL_BUFFER_BIT
, irb
);
1050 if (mask
& BUFFER_BIT_DEPTH
) {
1051 assert(level
== irb
->mt_level
);
1052 assert(start_layer
== irb
->mt_layer
);
1053 assert(num_layers
== fb
->MaxNumLayers
? irb
->layer_count
: 1);
1055 level
= irb
->mt_level
;
1056 start_layer
= irb
->mt_layer
;
1057 num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
1060 stencil_mask
= ctx
->Stencil
.WriteMask
[0] & 0xff;
1062 intel_miptree_prepare_access(brw
, stencil_mt
, level
, 1,
1063 start_layer
, num_layers
,
1064 ISL_AUX_USAGE_NONE
, false);
1066 unsigned stencil_level
= level
;
1067 blorp_surf_for_miptree(brw
, &stencil_surf
, stencil_mt
,
1068 ISL_AUX_USAGE_NONE
, true,
1069 &stencil_level
, start_layer
, num_layers
,
1073 assert((mask
& BUFFER_BIT_DEPTH
) || stencil_mask
);
1075 struct blorp_batch batch
;
1076 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1077 blorp_clear_depth_stencil(&batch
, &depth_surf
, &stencil_surf
,
1078 level
, start_layer
, num_layers
,
1080 (mask
& BUFFER_BIT_DEPTH
), ctx
->Depth
.Clear
,
1081 stencil_mask
, ctx
->Stencil
.Clear
);
1082 blorp_batch_finish(&batch
);
1084 if (mask
& BUFFER_BIT_DEPTH
) {
1085 intel_miptree_finish_depth(brw
, depth_mt
, level
,
1086 start_layer
, num_layers
, true);
1090 intel_miptree_finish_write(brw
, stencil_mt
, level
,
1091 start_layer
, num_layers
,
1092 ISL_AUX_USAGE_NONE
);
1097 brw_blorp_resolve_color(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
1098 unsigned level
, unsigned layer
,
1099 enum blorp_fast_clear_op resolve_op
)
1101 DBG("%s to mt %p level %u layer %u\n", __FUNCTION__
, mt
, level
, layer
);
1103 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
1105 struct isl_surf isl_tmp
[1];
1106 struct blorp_surf surf
;
1107 blorp_surf_for_miptree(brw
, &surf
, mt
, mt
->aux_usage
, true,
1108 &level
, layer
, 1 /* num_layers */,
1111 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1113 * "Any transition from any value in {Clear, Render, Resolve} to a
1114 * different value in {Clear, Render, Resolve} requires end of pipe
1117 * In other words, fast clear ops are not properly synchronized with
1118 * other drawing. We need to use a PIPE_CONTROL to ensure that the
1119 * contents of the previous draw hit the render target before we resolve
1120 * and again afterwards to ensure that the resolve is complete before we
1121 * do any more regular drawing.
1123 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1126 struct blorp_batch batch
;
1127 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1128 blorp_ccs_resolve(&batch
, &surf
, level
, layer
,
1129 brw_blorp_to_isl_format(brw
, format
, true),
1131 blorp_batch_finish(&batch
);
1133 /* See comment above */
1134 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1138 brw_blorp_mcs_partial_resolve(struct brw_context
*brw
,
1139 struct intel_mipmap_tree
*mt
,
1140 uint32_t start_layer
, uint32_t num_layers
)
1142 DBG("%s to mt %p layers %u-%u\n", __FUNCTION__
, mt
,
1143 start_layer
, start_layer
+ num_layers
- 1);
1145 assert(mt
->aux_usage
== ISL_AUX_USAGE_MCS
);
1147 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
1148 enum isl_format isl_format
= brw_blorp_to_isl_format(brw
, format
, true);
1150 struct isl_surf isl_tmp
[1];
1151 struct blorp_surf surf
;
1153 blorp_surf_for_miptree(brw
, &surf
, mt
, ISL_AUX_USAGE_MCS
, true,
1154 &level
, start_layer
, num_layers
, isl_tmp
);
1156 struct blorp_batch batch
;
1157 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1158 blorp_mcs_partial_resolve(&batch
, &surf
, isl_format
,
1159 start_layer
, num_layers
);
1160 blorp_batch_finish(&batch
);
1164 * Perform a HiZ or depth resolve operation.
1166 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
1167 * PRM, Volume 1, Part 2:
1168 * - 7.5.3.1 Depth Buffer Clear
1169 * - 7.5.3.2 Depth Buffer Resolve
1170 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
1173 intel_hiz_exec(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
1174 unsigned int level
, unsigned int start_layer
,
1175 unsigned int num_layers
, enum blorp_hiz_op op
)
1177 assert(intel_miptree_level_has_hiz(mt
, level
));
1178 assert(op
!= BLORP_HIZ_OP_NONE
);
1179 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1180 const char *opname
= NULL
;
1183 case BLORP_HIZ_OP_DEPTH_RESOLVE
:
1184 opname
= "depth resolve";
1186 case BLORP_HIZ_OP_HIZ_RESOLVE
:
1187 opname
= "hiz ambiguate";
1189 case BLORP_HIZ_OP_DEPTH_CLEAR
:
1190 opname
= "depth clear";
1192 case BLORP_HIZ_OP_NONE
:
1197 DBG("%s %s to mt %p level %d layers %d-%d\n",
1198 __func__
, opname
, mt
, level
, start_layer
, start_layer
+ num_layers
- 1);
1200 /* The following stalls and flushes are only documented to be required for
1201 * HiZ clear operations. However, they also seem to be required for
1202 * resolve operations.
1204 if (devinfo
->gen
== 6) {
1205 /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
1207 * "If other rendering operations have preceded this clear, a
1208 * PIPE_CONTROL with write cache flush enabled and Z-inhibit
1209 * disabled must be issued before the rectangle primitive used for
1210 * the depth buffer clear operation.
1212 brw_emit_pipe_control_flush(brw
,
1213 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
1214 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1215 PIPE_CONTROL_CS_STALL
);
1216 } else if (devinfo
->gen
>= 7) {
1218 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
1220 * If other rendering operations have preceded this clear, a
1221 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1222 * enabled must be issued before the rectangle primitive used for
1223 * the depth buffer clear operation.
1225 * Same applies for Gen8 and Gen9.
1227 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
1228 * PIPE_CONTROL, Depth Cache Flush Enable:
1230 * This bit must not be set when Depth Stall Enable bit is set in
1233 * This is confirmed to hold for real, HSW gets immediate gpu hangs.
1235 * Therefore issue two pipe control flushes, one for cache flush and
1236 * another for depth stall.
1238 brw_emit_pipe_control_flush(brw
,
1239 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1240 PIPE_CONTROL_CS_STALL
);
1242 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_DEPTH_STALL
);
1245 assert(mt
->aux_usage
== ISL_AUX_USAGE_HIZ
&& mt
->hiz_buf
);
1247 struct isl_surf isl_tmp
[2];
1248 struct blorp_surf surf
;
1249 blorp_surf_for_miptree(brw
, &surf
, mt
, ISL_AUX_USAGE_HIZ
, true,
1250 &level
, start_layer
, num_layers
, isl_tmp
);
1252 struct blorp_batch batch
;
1253 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1254 blorp_hiz_op(&batch
, &surf
, level
, start_layer
, num_layers
, op
);
1255 blorp_batch_finish(&batch
);
1257 /* The following stalls and flushes are only documented to be required for
1258 * HiZ clear operations. However, they also seem to be required for
1259 * resolve operations.
1261 if (devinfo
->gen
== 6) {
1262 /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
1264 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
1265 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set
1266 * and Then followed by Depth FLUSH'
1268 brw_emit_pipe_control_flush(brw
,
1269 PIPE_CONTROL_DEPTH_STALL
);
1271 brw_emit_pipe_control_flush(brw
,
1272 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1273 PIPE_CONTROL_CS_STALL
);
1274 } else if (devinfo
->gen
>= 8) {
1276 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
1278 * "Depth buffer clear pass using any of the methods (WM_STATE,
1279 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1280 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1281 * "set" before starting to render. DepthStall and DepthFlush are
1282 * not needed between consecutive depth clear passes nor is it
1283 * required if the depth clear pass was done with
1284 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
1286 * TODO: Such as the spec says, this could be conditional.
1288 brw_emit_pipe_control_flush(brw
,
1289 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1290 PIPE_CONTROL_DEPTH_STALL
);